Allow sharing dictionaries between similar locales.

Bug: 5058488
Change-Id: Ib12013f58afad957a8205b439f87480cc12ea06f
This commit is contained in:
Jean Chalard 2011-08-25 18:04:21 +09:00
parent 633bc88a0f
commit de4e8dedcc
4 changed files with 264 additions and 34 deletions

View file

@ -67,25 +67,34 @@ public class BinaryDictionaryFileDumper {
* Queries a content provider for the list of word lists for a specific locale
* available to copy into Latin IME.
*/
private static List<String> getWordListIds(final Locale locale, final Context context) {
private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
final Context context) {
final ContentResolver resolver = context.getContentResolver();
final Uri dictionaryPackUri = getProviderUri(locale.toString());
final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null);
if (null == c) return Collections.<String>emptyList();
if (null == c) return Collections.<WordListInfo>emptyList();
if (c.getCount() <= 0 || !c.moveToFirst()) {
c.close();
return Collections.<String>emptyList();
return Collections.<WordListInfo>emptyList();
}
final List<String> list = new ArrayList<String>();
do {
final String id = c.getString(0);
if (TextUtils.isEmpty(id)) continue;
list.add(id);
} while (c.moveToNext());
c.close();
return list;
try {
final List<WordListInfo> list = new ArrayList<WordListInfo>();
do {
final String wordListId = c.getString(0);
final String wordListLocale = c.getString(1);
if (TextUtils.isEmpty(wordListId)) continue;
list.add(new WordListInfo(wordListId, wordListLocale));
} while (c.moveToNext());
c.close();
return list;
} catch (Exception e) {
// Just in case we hit a problem in communication with the dictionary pack.
// We don't want to die.
Log.e(TAG, "Exception communicating with the dictionary pack : " + e);
return Collections.<WordListInfo>emptyList();
}
}
@ -108,7 +117,7 @@ public class BinaryDictionaryFileDumper {
* to the cache file name designated by its id and locale, overwriting it if already present
* and creating it (and its containing directory) if necessary.
*/
private static AssetFileAddress cacheWordList(final String id, final Locale locale,
private static AssetFileAddress cacheWordList(final String id, final String locale,
final ContentResolver resolver, final Context context) {
final int COMPRESSED_CRYPTED_COMPRESSED = 0;
@ -213,10 +222,10 @@ public class BinaryDictionaryFileDumper {
public static List<AssetFileAddress> cacheWordListsFromContentProvider(final Locale locale,
final Context context) {
final ContentResolver resolver = context.getContentResolver();
final List<String> idList = getWordListIds(locale, context);
final List<WordListInfo> idList = getWordListWordListInfos(locale, context);
final List<AssetFileAddress> fileAddressList = new ArrayList<AssetFileAddress>();
for (String id : idList) {
final AssetFileAddress afd = cacheWordList(id, locale, resolver, context);
for (WordListInfo id : idList) {
final AssetFileAddress afd = cacheWordList(id.mId, id.mLocale, resolver, context);
if (null != afd) {
fileAddressList.add(afd);
}

View file

@ -108,12 +108,19 @@ class BinaryDictionaryGetter {
return sb.toString();
}
/**
* Helper method to get the top level cache directory.
*/
private static String getWordListCacheDirectory(final Context context) {
return context.getFilesDir() + File.separator + "dicts";
}
/**
* Find out the cache directory associated with a specific locale.
*/
private static String getCacheDirectoryForLocale(Locale locale, Context context) {
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString());
final String absoluteDirectoryName = context.getFilesDir() + File.separator
private static String getCacheDirectoryForLocale(final String locale, final Context context) {
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
+ relativeDirectoryName;
final File directory = new File(absoluteDirectoryName);
if (!directory.exists()) {
@ -135,11 +142,11 @@ class BinaryDictionaryGetter {
* named like the locale, except it will also escape characters that look dangerous
* to some file systems.
* @param id the id of the dictionary for which to get a file name
* @param locale the locale for which to get the file name
* @param locale the locale for which to get the file name as a string
* @param context the context to use for getting the directory
* @return the name of the file to be created
*/
public static String getCacheFileName(String id, Locale locale, Context context) {
public static String getCacheFileName(String id, String locale, Context context) {
final String fileName = replaceFileNameDangerousCharacters(id);
return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
}
@ -199,25 +206,53 @@ class BinaryDictionaryGetter {
}
/**
* Returns the list of cached files for a specific locale.
*
* @param locale the locale to find the dictionary files for.
* @param context the context on which to open the files upon.
* @return an array of binary dictionary files, which may be empty but may not be null.
* Helper method to the list of cache directories, one for each distinct locale.
*/
private static File[] getCachedWordLists(final Locale locale,
final Context context) {
final String directoryName = getCacheDirectoryForLocale(locale, context);
final File[] cacheFiles = new File(directoryName).listFiles();
if (null == cacheFiles) return EMPTY_FILE_ARRAY;
return cacheFiles;
private static File[] getCachedDirectoryList(final Context context) {
return new File(getWordListCacheDirectory(context)).listFiles();
}
/**
* Returns the id of the main dict for a specified locale.
* Returns the list of cached files for a specific locale.
*
* @param locale the locale to find the dictionary files for, as a string.
* @param context the context on which to open the files upon.
* @return an array of binary dictionary files, which may be empty but may not be null.
*/
private static File[] getCachedWordLists(final String locale,
final Context context) {
final File[] directoryList = getCachedDirectoryList(context);
if (null == directoryList) return EMPTY_FILE_ARRAY;
final ArrayList<File> cacheFiles = new ArrayList<File>();
for (File directory : directoryList) {
if (!directory.isDirectory()) continue;
final String dirLocale = getWordListIdFromFileName(directory.getName());
if (LocaleUtils.isMatch(LocaleUtils.getMatchLevel(dirLocale, locale))) {
final File[] wordLists = directory.listFiles();
if (null != wordLists) {
for (File wordList : wordLists) {
cacheFiles.add(wordList);
}
}
}
}
if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
return cacheFiles.toArray(EMPTY_FILE_ARRAY);
}
/**
* Returns the id associated with the main word list for a specified locale.
*
* Word lists stored in Android Keyboard's resources are referred to as the "main"
* word lists. Since they can be updated like any other list, we need to assign a
* unique ID to them. This ID is just the name of the language (locale-wise) they
* are for, and this method returns this ID.
*/
private static String getMainDictId(final Locale locale) {
return locale.toString();
// This works because we don't include by default different dictionaries for
// different countries. This actually needs to return the id that we would
// like to use for word lists included in resources, and the following is okay.
return locale.getLanguage().toString();
}
/**
@ -239,7 +274,7 @@ class BinaryDictionaryGetter {
// storage, but we don't really care about what was copied NOW: what we want is the
// list of everything we ever cached, so we ignore the return value.
BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context);
final File[] cachedWordLists = getCachedWordLists(locale, context);
final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
final String mainDictId = getMainDictId(locale);

View file

@ -0,0 +1,157 @@
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.text.TextUtils;
/**
* A class to help with handling Locales in string form.
*
* This file has the same meaning and features (and shares all of its code) with
* the one in the dictionary pack. They need to be kept synchronized; for any
* update/bugfix to this file, consider also updating/fixing the version in the
* dictionary pack.
*/
public class LocaleUtils {
private final static String TAG = LocaleUtils.class.getSimpleName();
// Locale match level constants.
// A higher level of match is guaranteed to have a higher numerical value.
// Some room is left within constants to add match cases that may arise necessary
// in the future, for example differentiating between the case where the countries
// are both present and different, and the case where one of the locales does not
// specify the countries. This difference is not needed now.
// Nothing matches.
public static final int LOCALE_NO_MATCH = 0;
// The languages matches, but the country are different. Or, the reference locale requires a
// country and the tested locale does not have one.
public static final int LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER = 3;
// The languages and country match, but the variants are different. Or, the reference locale
// requires a variant and the tested locale does not have one.
public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER = 6;
// The required locale is null or empty so it will accept anything, and the tested locale
// is non-null and non-empty.
public static final int LOCALE_ANY_MATCH = 10;
// The language matches, and the tested locale specifies a country but the reference locale
// does not require one.
public static final int LOCALE_LANGUAGE_MATCH = 15;
// The language and the country match, and the tested locale specifies a variant but the
// reference locale does not require one.
public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH = 20;
// The compared locales are fully identical. This is the best match level.
public static final int LOCALE_FULL_MATCH = 30;
// The level at which a match is "normally" considered a locale match with standard algorithms.
// Don't use this directly, use #isMatch to test.
private static final int LOCALE_MATCH = LOCALE_ANY_MATCH;
// Make this match the maximum match level. If this evolves to have more than 2 digits
// when written in base 10, also adjust the getMatchLevelSortedString method.
private static final int MATCH_LEVEL_MAX = 30;
/**
* Return how well a tested locale matches a reference locale.
*
* This will check the tested locale against the reference locale and return a measure of how
* a well it matches the reference. The general idea is that the tested locale has to match
* every specified part of the required locale. A full match occur when they are equal, a
* partial match when the tested locale agrees with the reference locale but is more specific,
* and a difference when the tested locale does not comply with all requirements from the
* reference locale.
* In more detail, if the reference locale specifies at least a language and the testedLocale
* does not specify one, or specifies a different one, LOCALE_NO_MATCH is returned. If the
* reference locale is empty or null, it will match anything - in the form of LOCALE_FULL_MATCH
* if the tested locale is empty or null, and LOCALE_ANY_MATCH otherwise. If the reference and
* tested locale agree on the language, but not on the country,
* LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER is returned if the reference locale specifies a country,
* and LOCALE_LANGUAGE_MATCH otherwise.
* If they agree on both the language and the country, but not on the variant,
* LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER is returned if the reference locale
* specifies a variant, and LOCALE_LANGUAGE_AND_COUNTRY_MATCH otherwise. If everything matches,
* LOCALE_FULL_MATCH is returned.
* Examples:
* en <=> en_US => LOCALE_LANGUAGE_MATCH
* en_US <=> en => LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER
* en_US_POSIX <=> en_US_Android => LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER
* en_US <=> en_US_Android => LOCALE_LANGUAGE_AND_COUNTRY_MATCH
* sp_US <=> en_US => LOCALE_NO_MATCH
* de <=> de => LOCALE_FULL_MATCH
* en_US <=> en_US => LOCALE_FULL_MATCH
* "" <=> en_US => LOCALE_ANY_MATCH
*
* @param referenceLocale the reference locale to test against.
* @param testedLocale the locale to test.
* @return a constant that measures how well the tested locale matches the reference locale.
*/
public static int getMatchLevel(String referenceLocale, String testedLocale) {
if (TextUtils.isEmpty(referenceLocale)) {
return TextUtils.isEmpty(testedLocale) ? LOCALE_FULL_MATCH : LOCALE_ANY_MATCH;
}
if (null == testedLocale) return LOCALE_NO_MATCH;
String[] referenceParams = referenceLocale.split("_", 3);
String[] testedParams = testedLocale.split("_", 3);
// By spec of String#split, [0] cannot be null and length cannot be 0.
if (!referenceParams[0].equals(testedParams[0])) return LOCALE_NO_MATCH;
switch (referenceParams.length) {
case 1:
return 1 == testedParams.length ? LOCALE_FULL_MATCH : LOCALE_LANGUAGE_MATCH;
case 2:
if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (!referenceParams[1].equals(testedParams[1]))
return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (3 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH;
return LOCALE_FULL_MATCH;
case 3:
if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (!referenceParams[1].equals(testedParams[1]))
return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
if (2 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER;
if (!referenceParams[2].equals(testedParams[2]))
return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER;
return LOCALE_FULL_MATCH;
}
// It should be impossible to come here
return LOCALE_NO_MATCH;
}
/**
* Return a string that represents this match level, with better matches first.
*
* The strings are sorted in lexicographic order: a better match will always be less than
* a worse match when compared together.
*/
public static String getMatchLevelSortedString(int matchLevel) {
// This works because the match levels are 0~99 (actually 0~30)
// Ideally this should use a number of digits equals to the 1og10 of the greater matchLevel
return String.format("%02d", MATCH_LEVEL_MAX - matchLevel);
}
/**
* Find out whether a match level should be considered a match.
*
* This method takes a match level as returned by the #getMatchLevel method, and returns whether
* it should be considered a match in the usual sense with standard Locale functions.
*
* @param level the match level, as returned by getMatchLevel.
* @return whether this is a match or not.
*/
public static boolean isMatch(int level) {
return LOCALE_MATCH <= level;
}
}

View file

@ -0,0 +1,29 @@
/**
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
/**
* Information container for a word list.
*/
public class WordListInfo {
public final String mId;
public final String mLocale;
public WordListInfo(final String id, final String locale) {
mId = id;
mLocale = locale;
}
}