Store raw strings for personal dictionary

The raw strings would be sent to personal LM for decoding.
Earlier lowercased strings were being used with the purpose
of isValid checks (spelling does not consider casing for spell
checking calls). But for showing these in suggestion, we need the
raw strings.

Note: PersonalDictionaryLookup#getWordsForLocale is used to feed
the personal LM in PersonalLanguageModelHelper.

Bug:20152986
Change-Id: I9d796fa57bf2073036bf11d86b143ff205a6199c
main
Jatin Matani 2015-04-13 15:26:48 -07:00
parent 40f0f61bb3
commit 1bfd7be208
2 changed files with 67 additions and 27 deletions

View File

@ -40,9 +40,9 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@ -196,11 +196,10 @@ public class PersonalDictionaryLookup implements Closeable {
private AtomicBoolean mIsClosed = new AtomicBoolean(false);
/**
* We store a map from a dictionary word to the set of locales it belongs
* in. We then iterate over the set of locales to find a match using
* LocaleUtils.
* We store a map from a dictionary word to the set of locales & raw string(as it appears)
* We then iterate over the set of locales to find a match using LocaleUtils.
*/
private volatile HashMap<String, ArrayList<Locale>> mDictWords;
private volatile HashMap<String, HashMap<Locale, String>> mDictWords;
/**
* We store a map from a shortcut to a word for each locale.
@ -317,7 +316,7 @@ public class PersonalDictionaryLookup implements Closeable {
* @return set of words that apply to the given locale.
*/
public Set<String> getWordsForLocale(@Nonnull final Locale inputLocale) {
final HashMap<String, ArrayList<Locale>> dictWords = mDictWords;
final HashMap<String, HashMap<Locale, String>> dictWords = mDictWords;
if (CollectionUtils.isNullOrEmpty(dictWords)) {
return Collections.emptySet();
}
@ -325,12 +324,15 @@ public class PersonalDictionaryLookup implements Closeable {
final Set<String> words = new HashSet<>();
final String inputLocaleString = inputLocale.toString();
for (String word : dictWords.keySet()) {
for (Locale wordLocale : dictWords.get(word)) {
final String wordLocaleString = wordLocale.toString();
final int match = LocaleUtils.getMatchLevel(wordLocaleString, inputLocaleString);
if (LocaleUtils.isMatch(match)) {
words.add(word);
}
HashMap<Locale, String> localeStringMap = dictWords.get(word);
if (!CollectionUtils.isNullOrEmpty(localeStringMap)) {
for (Locale wordLocale : localeStringMap.keySet()) {
final String wordLocaleString = wordLocale.toString();
final int match = LocaleUtils.getMatchLevel(wordLocaleString, inputLocaleString);
if (LocaleUtils.isMatch(match)) {
words.add(localeStringMap.get(wordLocale));
}
}
}
}
return words;
@ -399,29 +401,29 @@ public class PersonalDictionaryLookup implements Closeable {
return false;
}
// Atomically obtain the current copy of mDictWords;
final HashMap<String, ArrayList<Locale>> dictWords = mDictWords;
if (DebugFlags.DEBUG_ENABLED) {
Log.d(mTag, "isValidWord() : Word [" + word + "] in Locale [" + inputLocale + "]");
}
// Atomically obtain the current copy of mDictWords;
final HashMap<String, HashMap<Locale, String>> dictWords = mDictWords;
// Lowercase the word using the given locale. Note, that dictionary
// words are lowercased using their locale, and theoretically the
// lowercasing between two matching locales may differ. For simplicity
// we ignore that possibility.
final String lowercased = word.toLowerCase(inputLocale);
final ArrayList<Locale> dictLocales = dictWords.get(lowercased);
if (null == dictLocales) {
final HashMap<Locale, String> dictLocales = dictWords.get(lowercased);
if (CollectionUtils.isNullOrEmpty(dictLocales)) {
if (DebugFlags.DEBUG_ENABLED) {
Log.d(mTag, "isValidWord() : No entry for lowercased word [" + lowercased + "]");
Log.d(mTag, "isValidWord() : No entry for word [" + word + "]");
}
return false;
} else {
if (DebugFlags.DEBUG_ENABLED) {
Log.d(mTag, "isValidWord() : Found entry for lowercased word [" + lowercased + "]");
Log.d(mTag, "isValidWord() : Found entry for word [" + word + "]");
}
// Iterate over the locales this word is in.
for (final Locale dictLocale : dictLocales) {
for (final Locale dictLocale : dictLocales.keySet()) {
final int matchLevel = LocaleUtils.getMatchLevel(dictLocale.toString(),
inputLocale.toString());
if (DebugFlags.DEBUG_ENABLED) {
@ -529,7 +531,7 @@ public class PersonalDictionaryLookup implements Closeable {
return;
}
Log.i(mTag, "loadPersonalDictionary() : Start Loading");
HashMap<String, ArrayList<Locale>> dictWords = new HashMap<>();
HashMap<String, HashMap<Locale, String>> dictWords = new HashMap<>();
HashMap<Locale, HashMap<String, String>> shortcutsPerLocale = new HashMap<>();
// Load the dictionary. Items are returned in the default sort order (by frequency).
Cursor cursor = mResolver.query(UserDictionary.Words.CONTENT_URI,
@ -581,21 +583,21 @@ public class PersonalDictionaryLookup implements Closeable {
final String dictWord = rawDictWord.toLowerCase(dictLocale);
if (DebugFlags.DEBUG_ENABLED) {
Log.d(mTag, "loadPersonalDictionary() : Adding word [" + dictWord
+ "] for locale " + dictLocale);
+ "] for locale " + dictLocale + "with value" + rawDictWord);
}
// Check if there is an existing entry for this word.
ArrayList<Locale> dictLocales = dictWords.get(dictWord);
if (null == dictLocales) {
HashMap<Locale, String> dictLocales = dictWords.get(dictWord);
if (CollectionUtils.isNullOrEmpty(dictLocales)) {
// If there is no entry for this word, create one.
if (DebugFlags.DEBUG_ENABLED) {
Log.d(mTag, "loadPersonalDictionary() : Word [" + dictWord +
"] not seen for other locales, creating new entry");
}
dictLocales = new ArrayList<>();
dictLocales = new HashMap<>();
dictWords.put(dictWord, dictLocales);
}
// Append the locale to the list of locales this word is in.
dictLocales.add(dictLocale);
dictLocales.put(dictLocale, rawDictWord);
// If there is no column for a shortcut, we're done.
final int shortcutIndex = cursor.getColumnIndex(UserDictionary.Words.SHORTCUT);

View File

@ -289,7 +289,8 @@ public class PersonalDictionaryLookupTest extends AndroidTestCase {
addWord("fOo", Locale.FRENCH, 17, null);
// Create the PersonalDictionaryLookup and wait until it's loaded.
PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext, ExecutorUtils.SPELLING);
PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext,
ExecutorUtils.SPELLING);
lookup.open();
// Both en_CA and en_US match.
@ -304,6 +305,43 @@ public class PersonalDictionaryLookupTest extends AndroidTestCase {
lookup.close();
}
public void testCaseMatchingForWordsAndShortcuts() {
Log.d(TAG, "testCaseMatchingForWordsAndShortcuts");
addWord("Foo", Locale.US, 17, "f");
addWord("bokabu", Locale.US, 17, "Bu");
// Create the PersonalDictionaryLookup and wait until it's loaded.
PersonalDictionaryLookup lookup = new PersonalDictionaryLookup(mContext,
ExecutorUtils.SPELLING);
lookup.open();
// Valid, inspite of capitalization in US but not in other
// locales.
assertTrue(lookup.isValidWord("Foo", Locale.US));
assertTrue(lookup.isValidWord("foo", Locale.US));
assertFalse(lookup.isValidWord("Foo", Locale.UK));
assertFalse(lookup.isValidWord("foo", Locale.UK));
// Valid in all forms in US.
assertTrue(lookup.isValidWord("bokabu", Locale.US));
assertTrue(lookup.isValidWord("BOKABU", Locale.US));
assertTrue(lookup.isValidWord("BokaBU", Locale.US));
// Correct capitalization; sensitive to shortcut casing & locale.
assertEquals("Foo", lookup.expandShortcut("f", Locale.US));
assertNull(lookup.expandShortcut("f", Locale.UK));
// Correct capitalization; sensitive to shortcut casing & locale.
assertEquals("bokabu", lookup.expandShortcut("Bu", Locale.US));
assertNull(lookup.expandShortcut("Bu", Locale.UK));
assertNull(lookup.expandShortcut("bu", Locale.US));
// Verify that raw strings are retained for #getWordsForLocale.
verifyWordExists(lookup.getWordsForLocale(Locale.US), "Foo");
verifyWordDoesNotExist(lookup.getWordsForLocale(Locale.US), "foo");
}
public void testManageListeners() {
Log.d(TAG, "testManageListeners");