From 1bfd7be2080f97d15b57ec1cac0dba1f1f2ca23d Mon Sep 17 00:00:00 2001 From: Jatin Matani Date: Mon, 13 Apr 2015 15:26:48 -0700 Subject: [PATCH] Store raw strings for personal dictionary The raw strings would be sent to personal LM for decoding. Earlier lowercased strings were being used with the purpose of isValid checks (spelling does not consider casing for spell checking calls). But for showing these in suggestion, we need the raw strings. Note: PersonalDictionaryLookup#getWordsForLocale is used to feed the personal LM in PersonalLanguageModelHelper. Bug:20152986 Change-Id: I9d796fa57bf2073036bf11d86b143ff205a6199c --- .../latin/PersonalDictionaryLookup.java | 54 ++++++++++--------- .../latin/PersonalDictionaryLookupTest.java | 40 +++++++++++++- 2 files changed, 67 insertions(+), 27 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/PersonalDictionaryLookup.java b/java/src/com/android/inputmethod/latin/PersonalDictionaryLookup.java index 1ba075c54..eed4ec1a0 100644 --- a/java/src/com/android/inputmethod/latin/PersonalDictionaryLookup.java +++ b/java/src/com/android/inputmethod/latin/PersonalDictionaryLookup.java @@ -40,9 +40,9 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -196,11 +196,10 @@ public class PersonalDictionaryLookup implements Closeable { private AtomicBoolean mIsClosed = new AtomicBoolean(false); /** - * We store a map from a dictionary word to the set of locales it belongs - * in. We then iterate over the set of locales to find a match using - * LocaleUtils. + * We store a map from a dictionary word to the set of locales & raw string(as it appears) + * We then iterate over the set of locales to find a match using LocaleUtils. */ - private volatile HashMap> mDictWords; + private volatile HashMap> mDictWords; /** * We store a map from a shortcut to a word for each locale. @@ -317,7 +316,7 @@ public class PersonalDictionaryLookup implements Closeable { * @return set of words that apply to the given locale. */ public Set getWordsForLocale(@Nonnull final Locale inputLocale) { - final HashMap> dictWords = mDictWords; + final HashMap> dictWords = mDictWords; if (CollectionUtils.isNullOrEmpty(dictWords)) { return Collections.emptySet(); } @@ -325,12 +324,15 @@ public class PersonalDictionaryLookup implements Closeable { final Set words = new HashSet<>(); final String inputLocaleString = inputLocale.toString(); for (String word : dictWords.keySet()) { - for (Locale wordLocale : dictWords.get(word)) { - final String wordLocaleString = wordLocale.toString(); - final int match = LocaleUtils.getMatchLevel(wordLocaleString, inputLocaleString); - if (LocaleUtils.isMatch(match)) { - words.add(word); - } + HashMap localeStringMap = dictWords.get(word); + if (!CollectionUtils.isNullOrEmpty(localeStringMap)) { + for (Locale wordLocale : localeStringMap.keySet()) { + final String wordLocaleString = wordLocale.toString(); + final int match = LocaleUtils.getMatchLevel(wordLocaleString, inputLocaleString); + if (LocaleUtils.isMatch(match)) { + words.add(localeStringMap.get(wordLocale)); + } + } } } return words; @@ -399,29 +401,29 @@ public class PersonalDictionaryLookup implements Closeable { return false; } - // Atomically obtain the current copy of mDictWords; - final HashMap> dictWords = mDictWords; - if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : Word [" + word + "] in Locale [" + inputLocale + "]"); } + // Atomically obtain the current copy of mDictWords; + final HashMap> dictWords = mDictWords; // Lowercase the word using the given locale. Note, that dictionary // words are lowercased using their locale, and theoretically the // lowercasing between two matching locales may differ. For simplicity // we ignore that possibility. final String lowercased = word.toLowerCase(inputLocale); - final ArrayList dictLocales = dictWords.get(lowercased); - if (null == dictLocales) { + final HashMap dictLocales = dictWords.get(lowercased); + + if (CollectionUtils.isNullOrEmpty(dictLocales)) { if (DebugFlags.DEBUG_ENABLED) { - Log.d(mTag, "isValidWord() : No entry for lowercased word [" + lowercased + "]"); + Log.d(mTag, "isValidWord() : No entry for word [" + word + "]"); } return false; } else { if (DebugFlags.DEBUG_ENABLED) { - Log.d(mTag, "isValidWord() : Found entry for lowercased word [" + lowercased + "]"); + Log.d(mTag, "isValidWord() : Found entry for word [" + word + "]"); } // Iterate over the locales this word is in. - for (final Locale dictLocale : dictLocales) { + for (final Locale dictLocale : dictLocales.keySet()) { final int matchLevel = LocaleUtils.getMatchLevel(dictLocale.toString(), inputLocale.toString()); if (DebugFlags.DEBUG_ENABLED) { @@ -529,7 +531,7 @@ public class PersonalDictionaryLookup implements Closeable { return; } Log.i(mTag, "loadPersonalDictionary() : Start Loading"); - HashMap> dictWords = new HashMap<>(); + HashMap> dictWords = new HashMap<>(); HashMap> shortcutsPerLocale = new HashMap<>(); // Load the dictionary. Items are returned in the default sort order (by frequency). Cursor cursor = mResolver.query(UserDictionary.Words.CONTENT_URI, @@ -581,21 +583,21 @@ public class PersonalDictionaryLookup implements Closeable { final String dictWord = rawDictWord.toLowerCase(dictLocale); if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Adding word [" + dictWord - + "] for locale " + dictLocale); + + "] for locale " + dictLocale + "with value" + rawDictWord); } // Check if there is an existing entry for this word. - ArrayList dictLocales = dictWords.get(dictWord); - if (null == dictLocales) { + HashMap dictLocales = dictWords.get(dictWord); + if (CollectionUtils.isNullOrEmpty(dictLocales)) { // If there is no entry for this word, create one. if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Word [" + dictWord + "] not seen for other locales, creating new entry"); } - dictLocales = new ArrayList<>(); + dictLocales = new HashMap<>(); dictWords.put(dictWord, dictLocales); } // Append the locale to the list of locales this word is in. - dictLocales.add(dictLocale); + dictLocales.put(dictLocale, rawDictWord); // If there is no column for a shortcut, we're done. final int shortcutIndex = cursor.getColumnIndex(UserDictionary.Words.SHORTCUT); diff --git a/tests/src/com/android/inputmethod/latin/PersonalDictionaryLookupTest.java b/tests/src/com/android/inputmethod/latin/PersonalDictionaryLookupTest.java index 983957fd4..c06adedfd 100644 --- a/tests/src/com/android/inputmethod/latin/PersonalDictionaryLookupTest.java +++ b/tests/src/com/android/inputmethod/latin/PersonalDictionaryLookupTest.java @@ -289,7 +289,8 @@ public class PersonalDictionaryLookupTest extends AndroidTestCase { addWord("fOo", Locale.FRENCH, 17, null); // Create the PersonalDictionaryLookup and wait until it's loaded. - PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext, ExecutorUtils.SPELLING); + PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext, + ExecutorUtils.SPELLING); lookup.open(); // Both en_CA and en_US match. @@ -304,6 +305,43 @@ public class PersonalDictionaryLookupTest extends AndroidTestCase { lookup.close(); } + + public void testCaseMatchingForWordsAndShortcuts() { + Log.d(TAG, "testCaseMatchingForWordsAndShortcuts"); + addWord("Foo", Locale.US, 17, "f"); + addWord("bokabu", Locale.US, 17, "Bu"); + + // Create the PersonalDictionaryLookup and wait until it's loaded. + PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext, + ExecutorUtils.SPELLING); + lookup.open(); + + // Valid, inspite of capitalization in US but not in other + // locales. + assertTrue(lookup.isValidWord("Foo", Locale.US)); + assertTrue(lookup.isValidWord("foo", Locale.US)); + assertFalse(lookup.isValidWord("Foo", Locale.UK)); + assertFalse(lookup.isValidWord("foo", Locale.UK)); + + // Valid in all forms in US. + assertTrue(lookup.isValidWord("bokabu", Locale.US)); + assertTrue(lookup.isValidWord("BOKABU", Locale.US)); + assertTrue(lookup.isValidWord("BokaBU", Locale.US)); + + // Correct capitalization; sensitive to shortcut casing & locale. + assertEquals("Foo", lookup.expandShortcut("f", Locale.US)); + assertNull(lookup.expandShortcut("f", Locale.UK)); + + // Correct capitalization; sensitive to shortcut casing & locale. + assertEquals("bokabu", lookup.expandShortcut("Bu", Locale.US)); + assertNull(lookup.expandShortcut("Bu", Locale.UK)); + assertNull(lookup.expandShortcut("bu", Locale.US)); + + // Verify that raw strings are retained for #getWordsForLocale. + verifyWordExists(lookup.getWordsForLocale(Locale.US), "Foo"); + verifyWordDoesNotExist(lookup.getWordsForLocale(Locale.US), "foo"); + } + public void testManageListeners() { Log.d(TAG, "testManageListeners");