am 3da7df8d: Merge "Add proximity for Russian."

* commit '3da7df8db7595de513ea3ab6a4e2f532f7de0166':
  Add proximity for Russian.
main
Jean Chalard 2011-12-08 02:24:24 -08:00 committed by Android Git Automerger
commit 178470e7f3
3 changed files with 182 additions and 86 deletions

View File

@ -65,12 +65,12 @@ public class ProximityInfo {
return new ProximityInfo(1, 1, 1, 1, 1, 1, Collections.<Key>emptyList(), null);
}
public static ProximityInfo createSpellCheckerProximityInfo() {
public static ProximityInfo createSpellCheckerProximityInfo(final int[] proximity) {
final ProximityInfo spellCheckerProximityInfo = createDummyProximityInfo();
spellCheckerProximityInfo.mNativeProximityInfo =
spellCheckerProximityInfo.setProximityInfoNative(
SpellCheckerProximityInfo.ROW_SIZE,
480, 300, 10, 3, SpellCheckerProximityInfo.PROXIMITY,
480, 300, 11, 3, proximity,
0, null, null, null, null, null, null, null, null);
return spellCheckerProximityInfo;
}

View File

@ -99,6 +99,25 @@ public class AndroidSpellCheckerService extends SpellCheckerService
private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
new HashSet<WeakReference<DictionaryCollection>>();
public static final int SCRIPT_LATIN = 0;
public static final int SCRIPT_CYRILLIC = 1;
private static final TreeMap<String, Integer> mLanguageToScript;
static {
// List of the supported languages and their associated script. We won't check
// words written in another script than the selected script, because we know we
// don't have those in our dictionary so we will underline everything and we
// will never have any suggestions, so it makes no sense checking them.
mLanguageToScript = new TreeMap<String, Integer>();
mLanguageToScript.put("en", SCRIPT_LATIN);
mLanguageToScript.put("fr", SCRIPT_LATIN);
mLanguageToScript.put("de", SCRIPT_LATIN);
mLanguageToScript.put("nl", SCRIPT_LATIN);
mLanguageToScript.put("cs", SCRIPT_LATIN);
mLanguageToScript.put("es", SCRIPT_LATIN);
mLanguageToScript.put("it", SCRIPT_LATIN);
mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
}
@Override public void onCreate() {
super.onCreate();
mSuggestionThreshold =
@ -110,6 +129,15 @@ public class AndroidSpellCheckerService extends SpellCheckerService
onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
}
private static int getScriptFromLocale(final Locale locale) {
final Integer script = mLanguageToScript.get(locale.getLanguage());
if (null == script) {
throw new RuntimeException("We have been called with an unsupported language: \""
+ locale.getLanguage() + "\". Framework bug?");
}
return script;
}
@Override
public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
@ -363,7 +391,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService
}
public DictAndProximity createDictAndProximity(final Locale locale) {
final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
final int script = getScriptFromLocale(locale);
final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
SpellCheckerProximityInfo.getProximityForScript(script));
final Resources resources = getResources();
final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
final DictionaryCollection dictionaryCollection =
@ -415,25 +445,6 @@ public class AndroidSpellCheckerService extends SpellCheckerService
}
private static class AndroidSpellCheckerSession extends Session {
private static final int SCRIPT_LATIN = 0;
private static final int SCRIPT_CYRILLIC = 1;
private static final TreeMap<String, Integer> mLanguageToScript;
static {
// List of the supported languages and their associated script. We won't check
// words written in another script than the selected script, because we know we
// don't have those in our dictionary so we will underline everything and we
// will never have any suggestions, so it makes no sense checking them.
mLanguageToScript = new TreeMap<String, Integer>();
mLanguageToScript.put("en", SCRIPT_LATIN);
mLanguageToScript.put("fr", SCRIPT_LATIN);
mLanguageToScript.put("de", SCRIPT_LATIN);
mLanguageToScript.put("nl", SCRIPT_LATIN);
mLanguageToScript.put("cs", SCRIPT_LATIN);
mLanguageToScript.put("es", SCRIPT_LATIN);
mLanguageToScript.put("it", SCRIPT_LATIN);
mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
}
// Immutable, but need the locale which is not available in the constructor yet
private DictionaryPool mDictionaryPool;
// Likewise
@ -452,12 +463,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService
final String localeString = getLocale();
mDictionaryPool = mService.getDictionaryPool(localeString);
mLocale = LocaleUtils.constructLocaleFromString(localeString);
final Integer script = mLanguageToScript.get(mLocale.getLanguage());
if (null == script) {
throw new RuntimeException("We have been called with an unsupported language: \""
+ mLocale.getLanguage() + "\". Framework bug?");
}
mScript = script;
mScript = getScriptFromLocale(mLocale);
}
/*
@ -565,12 +571,17 @@ public class AndroidSpellCheckerService extends SpellCheckerService
final int length = text.length();
for (int i = 0; i < length; ++i) {
final int character = text.codePointAt(i);
final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
final int proximityIndex =
SpellCheckerProximityInfo.getIndexOfCodeForScript(character, mScript);
final int[] proximities;
if (-1 == proximityIndex) {
proximities = new int[] { character };
} else {
proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
// TODO: an initial examination seems to reveal this is actually used
// read-only. It should be possible to compute the arrays statically once
// and skip doing a copy each time here.
proximities = Arrays.copyOfRange(
SpellCheckerProximityInfo.getProximityForScript(mScript),
proximityIndex,
proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
}

View File

@ -29,11 +29,27 @@ public class SpellCheckerProximityInfo {
// as the size of the passed array afterwards so they can't be different.
final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE;
// Helper methods
final protected static void buildProximityIndices(final int[] proximity,
final TreeMap<Integer, Integer> indices) {
for (int i = 0; i < proximity.length; i += ROW_SIZE) {
if (NUL != proximity[i]) indices.put(proximity[i], i);
}
}
final protected static int computeIndex(final int characterCode,
final TreeMap<Integer, Integer> indices) {
final Integer result = indices.get(characterCode);
if (null == result) return -1;
return result;
}
static class Latin {
// This is a map from the code point to the index in the PROXIMITY array.
// At the time the native code to read the binary dictionary needs the proximity info be passed
// as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input character.
// Since we need to build such an array, we want to be able to search in our big proximity data
// quickly by character, and a map is probably the best way to do this.
// At the time the native code to read the binary dictionary needs the proximity info be
// passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input
// character.
// Since we need to build such an array, we want to be able to search in our big proximity
// data quickly by character, and a map is probably the best way to do this.
final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
// The proximity here is the union of
@ -46,7 +62,7 @@ public class SpellCheckerProximityInfo {
// to spell check has been entered with one of the keyboards above. Also, specifically
// to English, many spelling errors consist of the last vowel of the word being wrong
// because in English vowels tend to merge with each other in pronunciation.
final public static int[] PROXIMITY = {
final private static int[] PROXIMITY = {
'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
@ -57,6 +73,7 @@ public class SpellCheckerProximityInfo {
'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
@ -68,6 +85,7 @@ public class SpellCheckerProximityInfo {
'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL,
'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
@ -81,13 +99,80 @@ public class SpellCheckerProximityInfo {
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
};
static {
for (int i = 0; i < PROXIMITY.length; i += ROW_SIZE) {
if (NUL != PROXIMITY[i]) INDICES.put(PROXIMITY[i], i);
buildProximityIndices(PROXIMITY, INDICES);
}
private static int getIndexOf(int characterCode) {
return computeIndex(characterCode, INDICES);
}
}
public static int getIndexOf(int characterCode) {
final Integer result = INDICES.get(characterCode);
if (null == result) return -1;
return result;
static class Cyrillic {
final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
final private static int[] PROXIMITY = {
// TODO: This table is solely based on the keyboard layout. Consult with Russian
// speakers on commonly misspelled words/letters.
'й', 'ц', 'ф', 'ы', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ц', 'й', 'ф', 'ы', 'в', 'у', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'у', 'ц', 'ы', 'в', 'а', 'к', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'к', 'у', 'в', 'а', 'п', 'е', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'е', 'к', 'а', 'п', 'р', 'н', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'н', 'е', 'п', 'р', 'о', 'г', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'г', 'н', 'р', 'о', 'л', 'ш', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ш', 'г', 'о', 'л', 'д', 'щ', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'щ', 'ш', 'л', 'д', 'ж', 'з', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'з', 'щ', 'д', 'ж', 'э', 'х', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'х', 'з', 'ж', 'э', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ф', 'й', 'ц', 'ы', 'я', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ы', 'й', 'ц', 'у', 'ф', 'в', 'я', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'в', 'ц', 'у', 'к', 'ы', 'а', 'я', 'ч', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'а', 'у', 'к', 'е', 'в', 'п', 'ч', 'с', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'п', 'к', 'е', 'н', 'а', 'р', 'с', 'м', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'р', 'е', 'н', 'г', 'п', 'о', 'м', 'и', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'о', 'н', 'г', 'ш', 'р', 'л', 'и', 'т', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'л', 'г', 'ш', 'щ', 'о', 'д', 'т', 'ь', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'д', 'ш', 'щ', 'з', 'л', 'ж', 'ь', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ж', 'щ', 'з', 'х', 'д', 'э', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'э', 'з', 'х', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'я', 'ф', 'ы', 'в', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ч', 'ы', 'в', 'а', 'я', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'с', 'в', 'а', 'п', 'ч', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'м', 'а', 'п', 'р', 'с', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'и', 'п', 'р', 'о', 'м', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'т', 'р', 'о', 'л', 'и', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ь', 'о', 'л', 'д', 'т', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'б', 'л', 'д', 'ж', 'ь', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
'ю', 'д', 'ж', 'э', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
};
static {
buildProximityIndices(PROXIMITY, INDICES);
}
private static int getIndexOf(int characterCode) {
return computeIndex(characterCode, INDICES);
}
}
public static int[] getProximityForScript(final int script) {
switch (script) {
case AndroidSpellCheckerService.SCRIPT_LATIN:
return Latin.PROXIMITY;
case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
return Cyrillic.PROXIMITY;
default:
throw new RuntimeException("Wrong script supplied: " + script);
}
}
public static int getIndexOfCodeForScript(final int characterCode, final int script) {
switch (script) {
case AndroidSpellCheckerService.SCRIPT_LATIN:
return Latin.getIndexOf(characterCode);
case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
return Cyrillic.getIndexOf(characterCode);
default:
throw new RuntimeException("Wrong script supplied: " + script);
}
}
}