diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java index 89944407e..e95172d1f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java @@ -67,25 +67,34 @@ public class BinaryDictionaryFileDumper { * Queries a content provider for the list of word lists for a specific locale * available to copy into Latin IME. */ - private static List getWordListIds(final Locale locale, final Context context) { + private static List getWordListWordListInfos(final Locale locale, + final Context context) { final ContentResolver resolver = context.getContentResolver(); final Uri dictionaryPackUri = getProviderUri(locale.toString()); final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null); - if (null == c) return Collections.emptyList(); + if (null == c) return Collections.emptyList(); if (c.getCount() <= 0 || !c.moveToFirst()) { c.close(); - return Collections.emptyList(); + return Collections.emptyList(); } - final List list = new ArrayList(); - do { - final String id = c.getString(0); - if (TextUtils.isEmpty(id)) continue; - list.add(id); - } while (c.moveToNext()); - c.close(); - return list; + try { + final List list = new ArrayList(); + do { + final String wordListId = c.getString(0); + final String wordListLocale = c.getString(1); + if (TextUtils.isEmpty(wordListId)) continue; + list.add(new WordListInfo(wordListId, wordListLocale)); + } while (c.moveToNext()); + c.close(); + return list; + } catch (Exception e) { + // Just in case we hit a problem in communication with the dictionary pack. + // We don't want to die. + Log.e(TAG, "Exception communicating with the dictionary pack : " + e); + return Collections.emptyList(); + } } @@ -108,7 +117,7 @@ public class BinaryDictionaryFileDumper { * to the cache file name designated by its id and locale, overwriting it if already present * and creating it (and its containing directory) if necessary. */ - private static AssetFileAddress cacheWordList(final String id, final Locale locale, + private static AssetFileAddress cacheWordList(final String id, final String locale, final ContentResolver resolver, final Context context) { final int COMPRESSED_CRYPTED_COMPRESSED = 0; @@ -213,10 +222,10 @@ public class BinaryDictionaryFileDumper { public static List cacheWordListsFromContentProvider(final Locale locale, final Context context) { final ContentResolver resolver = context.getContentResolver(); - final List idList = getWordListIds(locale, context); + final List idList = getWordListWordListInfos(locale, context); final List fileAddressList = new ArrayList(); - for (String id : idList) { - final AssetFileAddress afd = cacheWordList(id, locale, resolver, context); + for (WordListInfo id : idList) { + final AssetFileAddress afd = cacheWordList(id.mId, id.mLocale, resolver, context); if (null != afd) { fileAddressList.add(afd); } diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java index 38344300c..360c944d2 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java @@ -108,12 +108,19 @@ class BinaryDictionaryGetter { return sb.toString(); } + /** + * Helper method to get the top level cache directory. + */ + private static String getWordListCacheDirectory(final Context context) { + return context.getFilesDir() + File.separator + "dicts"; + } + /** * Find out the cache directory associated with a specific locale. */ - private static String getCacheDirectoryForLocale(Locale locale, Context context) { - final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString()); - final String absoluteDirectoryName = context.getFilesDir() + File.separator + private static String getCacheDirectoryForLocale(final String locale, final Context context) { + final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); + final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator + relativeDirectoryName; final File directory = new File(absoluteDirectoryName); if (!directory.exists()) { @@ -135,11 +142,11 @@ class BinaryDictionaryGetter { * named like the locale, except it will also escape characters that look dangerous * to some file systems. * @param id the id of the dictionary for which to get a file name - * @param locale the locale for which to get the file name + * @param locale the locale for which to get the file name as a string * @param context the context to use for getting the directory * @return the name of the file to be created */ - public static String getCacheFileName(String id, Locale locale, Context context) { + public static String getCacheFileName(String id, String locale, Context context) { final String fileName = replaceFileNameDangerousCharacters(id); return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; } @@ -199,25 +206,53 @@ class BinaryDictionaryGetter { } /** - * Returns the list of cached files for a specific locale. - * - * @param locale the locale to find the dictionary files for. - * @param context the context on which to open the files upon. - * @return an array of binary dictionary files, which may be empty but may not be null. + * Helper method to the list of cache directories, one for each distinct locale. */ - private static File[] getCachedWordLists(final Locale locale, - final Context context) { - final String directoryName = getCacheDirectoryForLocale(locale, context); - final File[] cacheFiles = new File(directoryName).listFiles(); - if (null == cacheFiles) return EMPTY_FILE_ARRAY; - return cacheFiles; + private static File[] getCachedDirectoryList(final Context context) { + return new File(getWordListCacheDirectory(context)).listFiles(); } /** - * Returns the id of the main dict for a specified locale. + * Returns the list of cached files for a specific locale. + * + * @param locale the locale to find the dictionary files for, as a string. + * @param context the context on which to open the files upon. + * @return an array of binary dictionary files, which may be empty but may not be null. + */ + private static File[] getCachedWordLists(final String locale, + final Context context) { + final File[] directoryList = getCachedDirectoryList(context); + if (null == directoryList) return EMPTY_FILE_ARRAY; + final ArrayList cacheFiles = new ArrayList(); + for (File directory : directoryList) { + if (!directory.isDirectory()) continue; + final String dirLocale = getWordListIdFromFileName(directory.getName()); + if (LocaleUtils.isMatch(LocaleUtils.getMatchLevel(dirLocale, locale))) { + final File[] wordLists = directory.listFiles(); + if (null != wordLists) { + for (File wordList : wordLists) { + cacheFiles.add(wordList); + } + } + } + } + if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY; + return cacheFiles.toArray(EMPTY_FILE_ARRAY); + } + + /** + * Returns the id associated with the main word list for a specified locale. + * + * Word lists stored in Android Keyboard's resources are referred to as the "main" + * word lists. Since they can be updated like any other list, we need to assign a + * unique ID to them. This ID is just the name of the language (locale-wise) they + * are for, and this method returns this ID. */ private static String getMainDictId(final Locale locale) { - return locale.toString(); + // This works because we don't include by default different dictionaries for + // different countries. This actually needs to return the id that we would + // like to use for word lists included in resources, and the following is okay. + return locale.getLanguage().toString(); } /** @@ -239,7 +274,7 @@ class BinaryDictionaryGetter { // storage, but we don't really care about what was copied NOW: what we want is the // list of everything we ever cached, so we ignore the return value. BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context); - final File[] cachedWordLists = getCachedWordLists(locale, context); + final File[] cachedWordLists = getCachedWordLists(locale.toString(), context); final String mainDictId = getMainDictId(locale); diff --git a/java/src/com/android/inputmethod/latin/LocaleUtils.java b/java/src/com/android/inputmethod/latin/LocaleUtils.java new file mode 100644 index 000000000..054f1f9b8 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/LocaleUtils.java @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.text.TextUtils; + +/** + * A class to help with handling Locales in string form. + * + * This file has the same meaning and features (and shares all of its code) with + * the one in the dictionary pack. They need to be kept synchronized; for any + * update/bugfix to this file, consider also updating/fixing the version in the + * dictionary pack. + */ +public class LocaleUtils { + + private final static String TAG = LocaleUtils.class.getSimpleName(); + + // Locale match level constants. + // A higher level of match is guaranteed to have a higher numerical value. + // Some room is left within constants to add match cases that may arise necessary + // in the future, for example differentiating between the case where the countries + // are both present and different, and the case where one of the locales does not + // specify the countries. This difference is not needed now. + + // Nothing matches. + public static final int LOCALE_NO_MATCH = 0; + // The languages matches, but the country are different. Or, the reference locale requires a + // country and the tested locale does not have one. + public static final int LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER = 3; + // The languages and country match, but the variants are different. Or, the reference locale + // requires a variant and the tested locale does not have one. + public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER = 6; + // The required locale is null or empty so it will accept anything, and the tested locale + // is non-null and non-empty. + public static final int LOCALE_ANY_MATCH = 10; + // The language matches, and the tested locale specifies a country but the reference locale + // does not require one. + public static final int LOCALE_LANGUAGE_MATCH = 15; + // The language and the country match, and the tested locale specifies a variant but the + // reference locale does not require one. + public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH = 20; + // The compared locales are fully identical. This is the best match level. + public static final int LOCALE_FULL_MATCH = 30; + + // The level at which a match is "normally" considered a locale match with standard algorithms. + // Don't use this directly, use #isMatch to test. + private static final int LOCALE_MATCH = LOCALE_ANY_MATCH; + + // Make this match the maximum match level. If this evolves to have more than 2 digits + // when written in base 10, also adjust the getMatchLevelSortedString method. + private static final int MATCH_LEVEL_MAX = 30; + + /** + * Return how well a tested locale matches a reference locale. + * + * This will check the tested locale against the reference locale and return a measure of how + * a well it matches the reference. The general idea is that the tested locale has to match + * every specified part of the required locale. A full match occur when they are equal, a + * partial match when the tested locale agrees with the reference locale but is more specific, + * and a difference when the tested locale does not comply with all requirements from the + * reference locale. + * In more detail, if the reference locale specifies at least a language and the testedLocale + * does not specify one, or specifies a different one, LOCALE_NO_MATCH is returned. If the + * reference locale is empty or null, it will match anything - in the form of LOCALE_FULL_MATCH + * if the tested locale is empty or null, and LOCALE_ANY_MATCH otherwise. If the reference and + * tested locale agree on the language, but not on the country, + * LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER is returned if the reference locale specifies a country, + * and LOCALE_LANGUAGE_MATCH otherwise. + * If they agree on both the language and the country, but not on the variant, + * LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER is returned if the reference locale + * specifies a variant, and LOCALE_LANGUAGE_AND_COUNTRY_MATCH otherwise. If everything matches, + * LOCALE_FULL_MATCH is returned. + * Examples: + * en <=> en_US => LOCALE_LANGUAGE_MATCH + * en_US <=> en => LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER + * en_US_POSIX <=> en_US_Android => LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER + * en_US <=> en_US_Android => LOCALE_LANGUAGE_AND_COUNTRY_MATCH + * sp_US <=> en_US => LOCALE_NO_MATCH + * de <=> de => LOCALE_FULL_MATCH + * en_US <=> en_US => LOCALE_FULL_MATCH + * "" <=> en_US => LOCALE_ANY_MATCH + * + * @param referenceLocale the reference locale to test against. + * @param testedLocale the locale to test. + * @return a constant that measures how well the tested locale matches the reference locale. + */ + public static int getMatchLevel(String referenceLocale, String testedLocale) { + if (TextUtils.isEmpty(referenceLocale)) { + return TextUtils.isEmpty(testedLocale) ? LOCALE_FULL_MATCH : LOCALE_ANY_MATCH; + } + if (null == testedLocale) return LOCALE_NO_MATCH; + String[] referenceParams = referenceLocale.split("_", 3); + String[] testedParams = testedLocale.split("_", 3); + // By spec of String#split, [0] cannot be null and length cannot be 0. + if (!referenceParams[0].equals(testedParams[0])) return LOCALE_NO_MATCH; + switch (referenceParams.length) { + case 1: + return 1 == testedParams.length ? LOCALE_FULL_MATCH : LOCALE_LANGUAGE_MATCH; + case 2: + if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER; + if (!referenceParams[1].equals(testedParams[1])) + return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER; + if (3 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH; + return LOCALE_FULL_MATCH; + case 3: + if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER; + if (!referenceParams[1].equals(testedParams[1])) + return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER; + if (2 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER; + if (!referenceParams[2].equals(testedParams[2])) + return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER; + return LOCALE_FULL_MATCH; + } + // It should be impossible to come here + return LOCALE_NO_MATCH; + } + + /** + * Return a string that represents this match level, with better matches first. + * + * The strings are sorted in lexicographic order: a better match will always be less than + * a worse match when compared together. + */ + public static String getMatchLevelSortedString(int matchLevel) { + // This works because the match levels are 0~99 (actually 0~30) + // Ideally this should use a number of digits equals to the 1og10 of the greater matchLevel + return String.format("%02d", MATCH_LEVEL_MAX - matchLevel); + } + + /** + * Find out whether a match level should be considered a match. + * + * This method takes a match level as returned by the #getMatchLevel method, and returns whether + * it should be considered a match in the usual sense with standard Locale functions. + * + * @param level the match level, as returned by getMatchLevel. + * @return whether this is a match or not. + */ + public static boolean isMatch(int level) { + return LOCALE_MATCH <= level; + } +} diff --git a/java/src/com/android/inputmethod/latin/WordListInfo.java b/java/src/com/android/inputmethod/latin/WordListInfo.java new file mode 100644 index 000000000..54f04d78f --- /dev/null +++ b/java/src/com/android/inputmethod/latin/WordListInfo.java @@ -0,0 +1,29 @@ +/** + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +/** + * Information container for a word list. + */ +public class WordListInfo { + public final String mId; + public final String mLocale; + public WordListInfo(final String id, final String locale) { + mId = id; + mLocale = locale; + } +}