Add proximity for Russian.

This also tries to make the code as easy to extend as possible
for future developments.

Bug: 5701241
Change-Id: I1ed48e6a5cc7aab94c5d6e309930cc004247d7e7
This commit is contained in:
Jean Chalard 2011-12-08 16:52:08 +09:00
parent cce3f791af
commit 1830cd1dc8
3 changed files with 182 additions and 86 deletions

View file

@ -65,12 +65,12 @@ public class ProximityInfo {
return new ProximityInfo(1, 1, 1, 1, 1, 1, Collections.<Key>emptyList(), null); return new ProximityInfo(1, 1, 1, 1, 1, 1, Collections.<Key>emptyList(), null);
} }
public static ProximityInfo createSpellCheckerProximityInfo() { public static ProximityInfo createSpellCheckerProximityInfo(final int[] proximity) {
final ProximityInfo spellCheckerProximityInfo = createDummyProximityInfo(); final ProximityInfo spellCheckerProximityInfo = createDummyProximityInfo();
spellCheckerProximityInfo.mNativeProximityInfo = spellCheckerProximityInfo.mNativeProximityInfo =
spellCheckerProximityInfo.setProximityInfoNative( spellCheckerProximityInfo.setProximityInfoNative(
SpellCheckerProximityInfo.ROW_SIZE, SpellCheckerProximityInfo.ROW_SIZE,
480, 300, 10, 3, SpellCheckerProximityInfo.PROXIMITY, 480, 300, 11, 3, proximity,
0, null, null, null, null, null, null, null, null); 0, null, null, null, null, null, null, null, null);
return spellCheckerProximityInfo; return spellCheckerProximityInfo;
} }

View file

@ -99,6 +99,25 @@ public class AndroidSpellCheckerService extends SpellCheckerService
private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
new HashSet<WeakReference<DictionaryCollection>>(); new HashSet<WeakReference<DictionaryCollection>>();
public static final int SCRIPT_LATIN = 0;
public static final int SCRIPT_CYRILLIC = 1;
private static final TreeMap<String, Integer> mLanguageToScript;
static {
// List of the supported languages and their associated script. We won't check
// words written in another script than the selected script, because we know we
// don't have those in our dictionary so we will underline everything and we
// will never have any suggestions, so it makes no sense checking them.
mLanguageToScript = new TreeMap<String, Integer>();
mLanguageToScript.put("en", SCRIPT_LATIN);
mLanguageToScript.put("fr", SCRIPT_LATIN);
mLanguageToScript.put("de", SCRIPT_LATIN);
mLanguageToScript.put("nl", SCRIPT_LATIN);
mLanguageToScript.put("cs", SCRIPT_LATIN);
mLanguageToScript.put("es", SCRIPT_LATIN);
mLanguageToScript.put("it", SCRIPT_LATIN);
mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
}
@Override public void onCreate() { @Override public void onCreate() {
super.onCreate(); super.onCreate();
mSuggestionThreshold = mSuggestionThreshold =
@ -110,6 +129,15 @@ public class AndroidSpellCheckerService extends SpellCheckerService
onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
} }
private static int getScriptFromLocale(final Locale locale) {
final Integer script = mLanguageToScript.get(locale.getLanguage());
if (null == script) {
throw new RuntimeException("We have been called with an unsupported language: \""
+ locale.getLanguage() + "\". Framework bug?");
}
return script;
}
@Override @Override
public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
if (!PREF_USE_CONTACTS_KEY.equals(key)) return; if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
@ -363,7 +391,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService
} }
public DictAndProximity createDictAndProximity(final Locale locale) { public DictAndProximity createDictAndProximity(final Locale locale) {
final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); final int script = getScriptFromLocale(locale);
final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
SpellCheckerProximityInfo.getProximityForScript(script));
final Resources resources = getResources(); final Resources resources = getResources();
final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
final DictionaryCollection dictionaryCollection = final DictionaryCollection dictionaryCollection =
@ -415,25 +445,6 @@ public class AndroidSpellCheckerService extends SpellCheckerService
} }
private static class AndroidSpellCheckerSession extends Session { private static class AndroidSpellCheckerSession extends Session {
private static final int SCRIPT_LATIN = 0;
private static final int SCRIPT_CYRILLIC = 1;
private static final TreeMap<String, Integer> mLanguageToScript;
static {
// List of the supported languages and their associated script. We won't check
// words written in another script than the selected script, because we know we
// don't have those in our dictionary so we will underline everything and we
// will never have any suggestions, so it makes no sense checking them.
mLanguageToScript = new TreeMap<String, Integer>();
mLanguageToScript.put("en", SCRIPT_LATIN);
mLanguageToScript.put("fr", SCRIPT_LATIN);
mLanguageToScript.put("de", SCRIPT_LATIN);
mLanguageToScript.put("nl", SCRIPT_LATIN);
mLanguageToScript.put("cs", SCRIPT_LATIN);
mLanguageToScript.put("es", SCRIPT_LATIN);
mLanguageToScript.put("it", SCRIPT_LATIN);
mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
}
// Immutable, but need the locale which is not available in the constructor yet // Immutable, but need the locale which is not available in the constructor yet
private DictionaryPool mDictionaryPool; private DictionaryPool mDictionaryPool;
// Likewise // Likewise
@ -452,12 +463,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService
final String localeString = getLocale(); final String localeString = getLocale();
mDictionaryPool = mService.getDictionaryPool(localeString); mDictionaryPool = mService.getDictionaryPool(localeString);
mLocale = LocaleUtils.constructLocaleFromString(localeString); mLocale = LocaleUtils.constructLocaleFromString(localeString);
final Integer script = mLanguageToScript.get(mLocale.getLanguage()); mScript = getScriptFromLocale(mLocale);
if (null == script) {
throw new RuntimeException("We have been called with an unsupported language: \""
+ mLocale.getLanguage() + "\". Framework bug?");
}
mScript = script;
} }
/* /*
@ -565,12 +571,17 @@ public class AndroidSpellCheckerService extends SpellCheckerService
final int length = text.length(); final int length = text.length();
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
final int character = text.codePointAt(i); final int character = text.codePointAt(i);
final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); final int proximityIndex =
SpellCheckerProximityInfo.getIndexOfCodeForScript(character, mScript);
final int[] proximities; final int[] proximities;
if (-1 == proximityIndex) { if (-1 == proximityIndex) {
proximities = new int[] { character }; proximities = new int[] { character };
} else { } else {
proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, // TODO: an initial examination seems to reveal this is actually used
// read-only. It should be possible to compute the arrays statically once
// and skip doing a copy each time here.
proximities = Arrays.copyOfRange(
SpellCheckerProximityInfo.getProximityForScript(mScript),
proximityIndex, proximityIndex,
proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
} }

View file

@ -29,11 +29,27 @@ public class SpellCheckerProximityInfo {
// as the size of the passed array afterwards so they can't be different. // as the size of the passed array afterwards so they can't be different.
final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE;
// Helper methods
final protected static void buildProximityIndices(final int[] proximity,
final TreeMap<Integer, Integer> indices) {
for (int i = 0; i < proximity.length; i += ROW_SIZE) {
if (NUL != proximity[i]) indices.put(proximity[i], i);
}
}
final protected static int computeIndex(final int characterCode,
final TreeMap<Integer, Integer> indices) {
final Integer result = indices.get(characterCode);
if (null == result) return -1;
return result;
}
static class Latin {
// This is a map from the code point to the index in the PROXIMITY array. // This is a map from the code point to the index in the PROXIMITY array.
// At the time the native code to read the binary dictionary needs the proximity info be passed // At the time the native code to read the binary dictionary needs the proximity info be
// as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input character. // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input
// Since we need to build such an array, we want to be able to search in our big proximity data // character.
// quickly by character, and a map is probably the best way to do this. // Since we need to build such an array, we want to be able to search in our big proximity
// data quickly by character, and a map is probably the best way to do this.
final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
// The proximity here is the union of // The proximity here is the union of
@ -46,7 +62,7 @@ public class SpellCheckerProximityInfo {
// to spell check has been entered with one of the keyboards above. Also, specifically // to spell check has been entered with one of the keyboards above. Also, specifically
// to English, many spelling errors consist of the last vowel of the word being wrong // to English, many spelling errors consist of the last vowel of the word being wrong
// because in English vowels tend to merge with each other in pronunciation. // because in English vowels tend to merge with each other in pronunciation.
final public static int[] PROXIMITY = { final private static int[] PROXIMITY = {
'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
@ -57,6 +73,7 @@ public class SpellCheckerProximityInfo {
'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL, 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
@ -68,6 +85,7 @@ public class SpellCheckerProximityInfo {
'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL, 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL,
'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
@ -81,13 +99,80 @@ public class SpellCheckerProximityInfo {
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
}; };
static { static {
for (int i = 0; i < PROXIMITY.length; i += ROW_SIZE) { buildProximityIndices(PROXIMITY, INDICES);
if (NUL != PROXIMITY[i]) INDICES.put(PROXIMITY[i], i); }
private static int getIndexOf(int characterCode) {
return computeIndex(characterCode, INDICES);
} }
} }
public static int getIndexOf(int characterCode) {
final Integer result = INDICES.get(characterCode); static class Cyrillic {
if (null == result) return -1; final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
return result; final private static int[] PROXIMITY = {
// TODO: This table is solely based on the keyboard layout. Consult with Russian
// speakers on commonly misspelled words/letters.
'й', 'ц', 'ф', 'ы', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ц', 'й', 'ф', 'ы', 'в', 'у', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'у', 'ц', 'ы', 'в', 'а', 'к', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'к', 'у', 'в', 'а', 'п', 'е', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'е', 'к', 'а', 'п', 'р', 'н', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'н', 'е', 'п', 'р', 'о', 'г', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'г', 'н', 'р', 'о', 'л', 'ш', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ш', 'г', 'о', 'л', 'д', 'щ', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'щ', 'ш', 'л', 'д', 'ж', 'з', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'з', 'щ', 'д', 'ж', 'э', 'х', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'х', 'з', 'ж', 'э', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ф', 'й', 'ц', 'ы', 'я', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ы', 'й', 'ц', 'у', 'ф', 'в', 'я', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'в', 'ц', 'у', 'к', 'ы', 'а', 'я', 'ч', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'а', 'у', 'к', 'е', 'в', 'п', 'ч', 'с', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'п', 'к', 'е', 'н', 'а', 'р', 'с', 'м', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'р', 'е', 'н', 'г', 'п', 'о', 'м', 'и', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'о', 'н', 'г', 'ш', 'р', 'л', 'и', 'т', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'л', 'г', 'ш', 'щ', 'о', 'д', 'т', 'ь', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'д', 'ш', 'щ', 'з', 'л', 'ж', 'ь', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ж', 'щ', 'з', 'х', 'д', 'э', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'э', 'з', 'х', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'я', 'ф', 'ы', 'в', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ч', 'ы', 'в', 'а', 'я', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'с', 'в', 'а', 'п', 'ч', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'м', 'а', 'п', 'р', 'с', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'и', 'п', 'р', 'о', 'м', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'т', 'р', 'о', 'л', 'и', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ь', 'о', 'л', 'д', 'т', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'б', 'л', 'д', 'ж', 'ь', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ю', 'д', 'ж', 'э', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
};
static {
buildProximityIndices(PROXIMITY, INDICES);
}
private static int getIndexOf(int characterCode) {
return computeIndex(characterCode, INDICES);
}
}
public static int[] getProximityForScript(final int script) {
switch (script) {
case AndroidSpellCheckerService.SCRIPT_LATIN:
return Latin.PROXIMITY;
case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
return Cyrillic.PROXIMITY;
default:
throw new RuntimeException("Wrong script supplied: " + script);
}
}
public static int getIndexOfCodeForScript(final int characterCode, final int script) {
switch (script) {
case AndroidSpellCheckerService.SCRIPT_LATIN:
return Latin.getIndexOf(characterCode);
case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
return Cyrillic.getIndexOf(characterCode);
default:
throw new RuntimeException("Wrong script supplied: " + script);
}
} }
} }