Merge remote-tracking branch 'goog/master' into merge
Conflicts: java/src/com/android/inputmethod/compat/InputMethodServiceCompatWrapper.java Change-Id: I596f9584478637284d6922aae672dc276563ffc1
This commit is contained in:
commit
bee72ecf21
11 changed files with 150 additions and 130 deletions
|
@ -48,7 +48,7 @@
|
||||||
<string name="bigram_suggestion" msgid="1323347224043514969">"Προτάσεις bigram"</string>
|
<string name="bigram_suggestion" msgid="1323347224043514969">"Προτάσεις bigram"</string>
|
||||||
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Χρήση προηγούμενης λέξης για τη βελτίωση πρότασης"</string>
|
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Χρήση προηγούμενης λέξης για τη βελτίωση πρότασης"</string>
|
||||||
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g> : Αποθηκεύτηκε"</string>
|
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g> : Αποθηκεύτηκε"</string>
|
||||||
<string name="label_go_key" msgid="1635148082137219148">"Μετάβαση"</string>
|
<string name="label_go_key" msgid="1635148082137219148">"Μετ."</string>
|
||||||
<string name="label_next_key" msgid="362972844525672568">"Επόμενο"</string>
|
<string name="label_next_key" msgid="362972844525672568">"Επόμενο"</string>
|
||||||
<string name="label_done_key" msgid="2441578748772529288">"Τέλος"</string>
|
<string name="label_done_key" msgid="2441578748772529288">"Τέλος"</string>
|
||||||
<string name="label_send_key" msgid="2815056534433717444">"Αποστολή"</string>
|
<string name="label_send_key" msgid="2815056534433717444">"Αποστολή"</string>
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
<string name="bigram_suggestion" msgid="1323347224043514969">"Digram-suggesties"</string>
|
<string name="bigram_suggestion" msgid="1323347224043514969">"Digram-suggesties"</string>
|
||||||
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Vorig woord gebruiken om suggestie te verbeteren"</string>
|
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Vorig woord gebruiken om suggestie te verbeteren"</string>
|
||||||
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g>: opgeslagen"</string>
|
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g>: opgeslagen"</string>
|
||||||
<string name="label_go_key" msgid="1635148082137219148">"Beginnen"</string>
|
<string name="label_go_key" msgid="1635148082137219148">"Start"</string>
|
||||||
<string name="label_next_key" msgid="362972844525672568">"Volgende"</string>
|
<string name="label_next_key" msgid="362972844525672568">"Volgende"</string>
|
||||||
<string name="label_done_key" msgid="2441578748772529288">"Gereed"</string>
|
<string name="label_done_key" msgid="2441578748772529288">"Gereed"</string>
|
||||||
<string name="label_send_key" msgid="2815056534433717444">"Verzenden"</string>
|
<string name="label_send_key" msgid="2815056534433717444">"Verzenden"</string>
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
<string name="bigram_suggestion" msgid="1323347224043514969">"Sugestii pentru cuvinte de două litere"</string>
|
<string name="bigram_suggestion" msgid="1323347224043514969">"Sugestii pentru cuvinte de două litere"</string>
|
||||||
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Utilizaţi cuvântul anterior pentru a îmbunătăţi sugestia"</string>
|
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Utilizaţi cuvântul anterior pentru a îmbunătăţi sugestia"</string>
|
||||||
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g>: salvat"</string>
|
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g>: salvat"</string>
|
||||||
<string name="label_go_key" msgid="1635148082137219148">"Accesaţi"</string>
|
<string name="label_go_key" msgid="1635148082137219148">"OK"</string>
|
||||||
<string name="label_next_key" msgid="362972844525672568">"Înainte"</string>
|
<string name="label_next_key" msgid="362972844525672568">"Înainte"</string>
|
||||||
<string name="label_done_key" msgid="2441578748772529288">"Terminat"</string>
|
<string name="label_done_key" msgid="2441578748772529288">"Terminat"</string>
|
||||||
<string name="label_send_key" msgid="2815056534433717444">"Trimiteţi"</string>
|
<string name="label_send_key" msgid="2815056534433717444">"Trimiteţi"</string>
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
<string name="bigram_suggestion" msgid="1323347224043514969">"Mga Suhestiyon na Bigram"</string>
|
<string name="bigram_suggestion" msgid="1323347224043514969">"Mga Suhestiyon na Bigram"</string>
|
||||||
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Gamitin ang nakaraang salita upang pahusayin ang suhestiyon"</string>
|
<string name="bigram_suggestion_summary" msgid="4383845146070101531">"Gamitin ang nakaraang salita upang pahusayin ang suhestiyon"</string>
|
||||||
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g> : Na-save"</string>
|
<string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g> : Na-save"</string>
|
||||||
<string name="label_go_key" msgid="1635148082137219148">"Pumunta"</string>
|
<string name="label_go_key" msgid="1635148082137219148">"Punta"</string>
|
||||||
<string name="label_next_key" msgid="362972844525672568">"Susunod"</string>
|
<string name="label_next_key" msgid="362972844525672568">"Susunod"</string>
|
||||||
<string name="label_done_key" msgid="2441578748772529288">"Tapos na"</string>
|
<string name="label_done_key" msgid="2441578748772529288">"Tapos na"</string>
|
||||||
<string name="label_send_key" msgid="2815056534433717444">"Ipadala"</string>
|
<string name="label_send_key" msgid="2815056534433717444">"Ipadala"</string>
|
||||||
|
|
|
@ -67,7 +67,6 @@ public class InputMethodManagerCompatWrapper {
|
||||||
|
|
||||||
// For the compatibility, IMM will create dummy subtypes if subtypes are not found.
|
// For the compatibility, IMM will create dummy subtypes if subtypes are not found.
|
||||||
// This is required to be false if the current behavior is broken. For now, it's ok to be true.
|
// This is required to be false if the current behavior is broken. For now, it's ok to be true.
|
||||||
private static final boolean ALLOW_DUMMY_SUBTYPE = true;
|
|
||||||
private static final boolean HAS_VOICE_FUNCTION = true;
|
private static final boolean HAS_VOICE_FUNCTION = true;
|
||||||
private static final String VOICE_MODE = "voice";
|
private static final String VOICE_MODE = "voice";
|
||||||
private static final String KEYBOARD_MODE = "keyboard";
|
private static final String KEYBOARD_MODE = "keyboard";
|
||||||
|
@ -119,11 +118,13 @@ public class InputMethodManagerCompatWrapper {
|
||||||
Object retval = CompatUtils.invoke(mImm, null, METHOD_getEnabledInputMethodSubtypeList,
|
Object retval = CompatUtils.invoke(mImm, null, METHOD_getEnabledInputMethodSubtypeList,
|
||||||
(imi != null ? imi.getInputMethodInfo() : null), allowsImplicitlySelectedSubtypes);
|
(imi != null ? imi.getInputMethodInfo() : null), allowsImplicitlySelectedSubtypes);
|
||||||
if (retval == null || !(retval instanceof List) || ((List<?>)retval).isEmpty()) {
|
if (retval == null || !(retval instanceof List) || ((List<?>)retval).isEmpty()) {
|
||||||
if (!ALLOW_DUMMY_SUBTYPE) {
|
if (InputMethodServiceCompatWrapper.
|
||||||
|
CAN_HANDLE_ON_CURRENT_INPUT_METHOD_SUBTYPE_CHANGED) {
|
||||||
// Returns an empty list
|
// Returns an empty list
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
// Creates dummy subtypes
|
// Creates dummy subtypes
|
||||||
|
@SuppressWarnings("unused")
|
||||||
List<InputMethodSubtypeCompatWrapper> subtypeList =
|
List<InputMethodSubtypeCompatWrapper> subtypeList =
|
||||||
new ArrayList<InputMethodSubtypeCompatWrapper>();
|
new ArrayList<InputMethodSubtypeCompatWrapper>();
|
||||||
InputMethodSubtypeCompatWrapper keyboardSubtype = getLastResortSubtype(KEYBOARD_MODE);
|
InputMethodSubtypeCompatWrapper keyboardSubtype = getLastResortSubtype(KEYBOARD_MODE);
|
||||||
|
@ -159,11 +160,13 @@ public class InputMethodManagerCompatWrapper {
|
||||||
getShortcutInputMethodsAndSubtypes() {
|
getShortcutInputMethodsAndSubtypes() {
|
||||||
Object retval = CompatUtils.invoke(mImm, null, METHOD_getShortcutInputMethodsAndSubtypes);
|
Object retval = CompatUtils.invoke(mImm, null, METHOD_getShortcutInputMethodsAndSubtypes);
|
||||||
if (retval == null || !(retval instanceof Map) || ((Map<?, ?>)retval).isEmpty()) {
|
if (retval == null || !(retval instanceof Map) || ((Map<?, ?>)retval).isEmpty()) {
|
||||||
if (!ALLOW_DUMMY_SUBTYPE) {
|
if (InputMethodServiceCompatWrapper.
|
||||||
|
CAN_HANDLE_ON_CURRENT_INPUT_METHOD_SUBTYPE_CHANGED) {
|
||||||
// Returns an empty map
|
// Returns an empty map
|
||||||
return Collections.emptyMap();
|
return Collections.emptyMap();
|
||||||
}
|
}
|
||||||
// Creates dummy subtypes
|
// Creates dummy subtypes
|
||||||
|
@SuppressWarnings("unused")
|
||||||
InputMethodInfoCompatWrapper imi = getLatinImeInputMethodInfo();
|
InputMethodInfoCompatWrapper imi = getLatinImeInputMethodInfo();
|
||||||
InputMethodSubtypeCompatWrapper voiceSubtype = getLastResortSubtype(VOICE_MODE);
|
InputMethodSubtypeCompatWrapper voiceSubtype = getLastResortSubtype(VOICE_MODE);
|
||||||
if (imi != null && voiceSubtype != null) {
|
if (imi != null && voiceSubtype != null) {
|
||||||
|
@ -196,8 +199,10 @@ public class InputMethodManagerCompatWrapper {
|
||||||
|
|
||||||
public void setInputMethodAndSubtype(
|
public void setInputMethodAndSubtype(
|
||||||
IBinder token, String id, InputMethodSubtypeCompatWrapper subtype) {
|
IBinder token, String id, InputMethodSubtypeCompatWrapper subtype) {
|
||||||
CompatUtils.invoke(mImm, null, METHOD_setInputMethodAndSubtype,
|
if (subtype != null && subtype.hasOriginalObject()) {
|
||||||
token, id, subtype.getOriginalObject());
|
CompatUtils.invoke(mImm, null, METHOD_setInputMethodAndSubtype,
|
||||||
|
token, id, subtype.getOriginalObject());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean switchToLastInputMethod(IBinder token) {
|
public boolean switchToLastInputMethod(IBinder token) {
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class InputMethodServiceCompatWrapper extends InputMethodService {
|
||||||
// For the API level 11 or later, LatinIME should override onCurrentInputMethodSubtypeChanged().
|
// For the API level 11 or later, LatinIME should override onCurrentInputMethodSubtypeChanged().
|
||||||
// For the API level 10 or previous, we handle the "subtype changed" events by ourselves
|
// For the API level 10 or previous, we handle the "subtype changed" events by ourselves
|
||||||
// without having support from framework -- onCurrentInputMethodSubtypeChanged().
|
// without having support from framework -- onCurrentInputMethodSubtypeChanged().
|
||||||
private static final boolean CAN_HANDLE_ON_CURRENT_INPUT_METHOD_SUBTYPE_CHANGED = false;
|
public static final boolean CAN_HANDLE_ON_CURRENT_INPUT_METHOD_SUBTYPE_CHANGED = false;
|
||||||
|
|
||||||
private InputMethodManagerCompatWrapper mImm;
|
private InputMethodManagerCompatWrapper mImm;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
|
||||||
|
|
||||||
import android.content.ContentResolver;
|
import android.content.ContentResolver;
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
|
import android.content.res.AssetFileDescriptor;
|
||||||
import android.net.Uri;
|
import android.net.Uri;
|
||||||
import android.text.TextUtils;
|
import android.text.TextUtils;
|
||||||
|
|
||||||
|
@ -96,9 +97,9 @@ public class BinaryDictionaryFileDumper {
|
||||||
// file.
|
// file.
|
||||||
final ContentResolver resolver = context.getContentResolver();
|
final ContentResolver resolver = context.getContentResolver();
|
||||||
final Uri dictionaryPackUri = getProviderUri(locale);
|
final Uri dictionaryPackUri = getProviderUri(locale);
|
||||||
final InputStream stream = resolver.openInputStream(dictionaryPackUri);
|
final AssetFileDescriptor afd = resolver.openAssetFileDescriptor(dictionaryPackUri, "r");
|
||||||
if (null == stream) return null;
|
if (null == afd) return null;
|
||||||
return copyFileTo(stream, getCacheFileNameForLocale(locale, context));
|
return copyFileTo(afd.createInputStream(), getCacheFileNameForLocale(locale, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -128,6 +129,8 @@ public class BinaryDictionaryFileDumper {
|
||||||
/**
|
/**
|
||||||
* Copies the data in an input stream to a target file, creating the file if necessary and
|
* Copies the data in an input stream to a target file, creating the file if necessary and
|
||||||
* overwriting it if it already exists.
|
* overwriting it if it already exists.
|
||||||
|
* @param input the stream to be copied.
|
||||||
|
* @param outputFileName the name of a file to copy the data to. It is created if necessary.
|
||||||
*/
|
*/
|
||||||
private static String copyFileTo(final InputStream input, final String outputFileName)
|
private static String copyFileTo(final InputStream input, final String outputFileName)
|
||||||
throws FileNotFoundException, IOException {
|
throws FileNotFoundException, IOException {
|
||||||
|
|
|
@ -382,12 +382,16 @@ public class SubtypeSwitcher {
|
||||||
return false;
|
return false;
|
||||||
if (mShortcutSubtype == null)
|
if (mShortcutSubtype == null)
|
||||||
return true;
|
return true;
|
||||||
|
// For compatibility, if the shortcut subtype is dummy, we assume the shortcut IME
|
||||||
|
// (built-in voice dummy subtype) is available.
|
||||||
|
if (!mShortcutSubtype.hasOriginalObject()) return true;
|
||||||
final boolean allowsImplicitlySelectedSubtypes = true;
|
final boolean allowsImplicitlySelectedSubtypes = true;
|
||||||
for (final InputMethodSubtypeCompatWrapper enabledSubtype :
|
for (final InputMethodSubtypeCompatWrapper enabledSubtype :
|
||||||
mImm.getEnabledInputMethodSubtypeList(
|
mImm.getEnabledInputMethodSubtypeList(
|
||||||
mShortcutInputMethodInfo, allowsImplicitlySelectedSubtypes)) {
|
mShortcutInputMethodInfo, allowsImplicitlySelectedSubtypes)) {
|
||||||
if (enabledSubtype.equals(mShortcutSubtype))
|
if (enabledSubtype.equals(mShortcutSubtype)) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,7 +140,7 @@ static void prof_out(void) {
|
||||||
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
|
||||||
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
|
|
|
@ -448,8 +448,14 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
|
||||||
word[i] = mWord[i - firstWordLength - 1];
|
word[i] = mWord[i - firstWordLength - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
int pairFreq = ((firstFreq + secondFreq) / 2);
|
// Promote pairFreq with multiplying by 2, because the word length is the same as the typed
|
||||||
|
// length.
|
||||||
|
int pairFreq = firstFreq + secondFreq;
|
||||||
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
|
||||||
|
TYPED_LETTER_MULTIPLIER);
|
||||||
|
}
|
||||||
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &pairFreq);
|
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &pairFreq);
|
||||||
addWord(word, newWordLength, pairFreq);
|
addWord(word, newWordLength, pairFreq);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -41,18 +41,17 @@ import javax.xml.parsers.SAXParserFactory;
|
||||||
* in the data. There is no need to increase the version when only the words in the data changes.
|
* in the data. There is no need to increase the version when only the words in the data changes.
|
||||||
*/
|
*/
|
||||||
public class MakeBinaryDictionary {
|
public class MakeBinaryDictionary {
|
||||||
|
|
||||||
private static final int VERSION_NUM = 200;
|
private static final int VERSION_NUM = 200;
|
||||||
|
|
||||||
public static final int ALPHA_SIZE = 256;
|
private static final String TAG_WORD = "w";
|
||||||
|
private static final String ATTR_FREQ = "f";
|
||||||
public static final String TAG_WORD = "w";
|
|
||||||
public static final String ATTR_FREQ = "f";
|
|
||||||
|
|
||||||
private static final int FLAG_ADDRESS_MASK = 0x400000;
|
private static final int FLAG_ADDRESS_MASK = 0x400000;
|
||||||
private static final int FLAG_TERMINAL_MASK = 0x800000;
|
private static final int FLAG_TERMINAL_MASK = 0x800000;
|
||||||
private static final int ADDRESS_MASK = 0x3FFFFF;
|
private static final int ADDRESS_MASK = 0x3FFFFF;
|
||||||
|
|
||||||
|
private static final int INITIAL_STRING_BUILDER_CAPACITY = 48;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit for this variable is in bytes
|
* Unit for this variable is in bytes
|
||||||
* If destination file name is main.dict and file limit causes dictionary to be separated into
|
* If destination file name is main.dict and file limit causes dictionary to be separated into
|
||||||
|
@ -61,15 +60,15 @@ public class MakeBinaryDictionary {
|
||||||
private static int sOutputFileSize;
|
private static int sOutputFileSize;
|
||||||
private static boolean sSplitOutput;
|
private static boolean sSplitOutput;
|
||||||
|
|
||||||
public static final CharNode EMPTY_NODE = new CharNode();
|
private static final CharNode EMPTY_NODE = new CharNode();
|
||||||
|
|
||||||
List<CharNode> roots;
|
private List<CharNode> mRoots;
|
||||||
Map<String, Integer> mDictionary;
|
private Map<String, Integer> mDictionary;
|
||||||
int mWordCount;
|
private int mWordCount;
|
||||||
|
|
||||||
BigramDictionary bigramDict;
|
private BigramDictionary mBigramDict;
|
||||||
|
|
||||||
static class CharNode {
|
private static class CharNode {
|
||||||
char data;
|
char data;
|
||||||
int freq;
|
int freq;
|
||||||
boolean terminal;
|
boolean terminal;
|
||||||
|
@ -81,7 +80,7 @@ public class MakeBinaryDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void usage() {
|
private static void usage() {
|
||||||
System.err.println("Usage: makedict -s <src_dict.xml> [-b <src_bigram.xml>] "
|
System.err.println("Usage: makedict -s <src_dict.xml> [-b <src_bigram.xml>] "
|
||||||
+ "-d <dest.dict> [--size filesize]");
|
+ "-d <dest.dict> [--size filesize]");
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
|
@ -118,36 +117,37 @@ public class MakeBinaryDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public MakeBinaryDictionary(String srcFilename, String bigramSrcFilename, String destFilename){
|
private MakeBinaryDictionary(String srcFilename, String bigramSrcFilename,
|
||||||
|
String destFilename) {
|
||||||
System.out.println("Generating dictionary version " + VERSION_NUM);
|
System.out.println("Generating dictionary version " + VERSION_NUM);
|
||||||
bigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null));
|
mBigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null));
|
||||||
populateDictionary(srcFilename);
|
populateDictionary(srcFilename);
|
||||||
writeToDict(destFilename);
|
writeToDict(destFilename);
|
||||||
|
|
||||||
// Enable the code below to verify that the generated tree is traversable
|
// Enable the code below to verify that the generated tree is traversable
|
||||||
// and bigram data is stored correctly.
|
// and bigram data is stored correctly.
|
||||||
if (false) {
|
if (false) {
|
||||||
bigramDict.reverseLookupAll(mDictionary, dict);
|
mBigramDict.reverseLookupAll(mDictionary, mDict);
|
||||||
traverseDict(2, new char[32], 0);
|
traverseDict(2, new char[32], 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void populateDictionary(String filename) {
|
private void populateDictionary(String filename) {
|
||||||
roots = new ArrayList<CharNode>();
|
mRoots = new ArrayList<CharNode>();
|
||||||
mDictionary = new HashMap<String, Integer>();
|
mDictionary = new HashMap<String, Integer>();
|
||||||
try {
|
try {
|
||||||
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
|
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
|
||||||
parser.parse(new File(filename), new DefaultHandler() {
|
parser.parse(new File(filename), new DefaultHandler() {
|
||||||
boolean inWord;
|
boolean inWord;
|
||||||
int freq;
|
int freq;
|
||||||
StringBuilder wordBuilder = new StringBuilder(48);
|
StringBuilder wordBuilder = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(String uri, String localName,
|
public void startElement(String uri, String localName,
|
||||||
String qName, Attributes attributes) {
|
String qName, Attributes attributes) {
|
||||||
if (qName.equals("w")) {
|
if (qName.equals(TAG_WORD)) {
|
||||||
inWord = true;
|
inWord = true;
|
||||||
freq = Integer.parseInt(attributes.getValue(0));
|
freq = Integer.parseInt(attributes.getValue(ATTR_FREQ));
|
||||||
wordBuilder.setLength(0);
|
wordBuilder.setLength(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -162,7 +162,7 @@ public class MakeBinaryDictionary {
|
||||||
@Override
|
@Override
|
||||||
public void endElement(String uri, String localName,
|
public void endElement(String uri, String localName,
|
||||||
String qName) {
|
String qName) {
|
||||||
if (qName.equals("w")) {
|
if (qName.equals(TAG_WORD)) {
|
||||||
if (wordBuilder.length() >= 1) {
|
if (wordBuilder.length() >= 1) {
|
||||||
addWordTop(wordBuilder.toString(), freq);
|
addWordTop(wordBuilder.toString(), freq);
|
||||||
mWordCount++;
|
mWordCount++;
|
||||||
|
@ -178,7 +178,7 @@ public class MakeBinaryDictionary {
|
||||||
System.out.println("Nodes = " + CharNode.sNodes);
|
System.out.println("Nodes = " + CharNode.sNodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int indexOf(List<CharNode> children, char c) {
|
private static int indexOf(List<CharNode> children, char c) {
|
||||||
if (children == null) {
|
if (children == null) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -190,27 +190,30 @@ public class MakeBinaryDictionary {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addWordTop(String word, int occur) {
|
private void addWordTop(String word, int freq) {
|
||||||
if (occur > 255) occur = 255;
|
if (freq < 0) {
|
||||||
|
freq = 0;
|
||||||
|
} else if (freq > 255) {
|
||||||
|
freq = 255;
|
||||||
|
}
|
||||||
char firstChar = word.charAt(0);
|
char firstChar = word.charAt(0);
|
||||||
int index = indexOf(roots, firstChar);
|
int index = indexOf(mRoots, firstChar);
|
||||||
if (index == -1) {
|
if (index == -1) {
|
||||||
CharNode newNode = new CharNode();
|
CharNode newNode = new CharNode();
|
||||||
newNode.data = firstChar;
|
newNode.data = firstChar;
|
||||||
newNode.freq = occur;
|
index = mRoots.size();
|
||||||
index = roots.size();
|
mRoots.add(newNode);
|
||||||
roots.add(newNode);
|
|
||||||
} else {
|
|
||||||
roots.get(index).freq += occur;
|
|
||||||
}
|
}
|
||||||
|
final CharNode node = mRoots.get(index);
|
||||||
if (word.length() > 1) {
|
if (word.length() > 1) {
|
||||||
addWordRec(roots.get(index), word, 1, occur);
|
addWordRec(node, word, 1, freq);
|
||||||
} else {
|
} else {
|
||||||
roots.get(index).terminal = true;
|
node.terminal = true;
|
||||||
|
node.freq = freq;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addWordRec(CharNode parent, String word, int charAt, int occur) {
|
private void addWordRec(CharNode parent, String word, int charAt, int freq) {
|
||||||
CharNode child = null;
|
CharNode child = null;
|
||||||
char data = word.charAt(charAt);
|
char data = word.charAt(charAt);
|
||||||
if (parent.children == null) {
|
if (parent.children == null) {
|
||||||
|
@ -229,89 +232,89 @@ public class MakeBinaryDictionary {
|
||||||
parent.children.add(child);
|
parent.children.add(child);
|
||||||
}
|
}
|
||||||
child.data = data;
|
child.data = data;
|
||||||
if (child.freq == 0) child.freq = occur;
|
|
||||||
if (word.length() > charAt + 1) {
|
if (word.length() > charAt + 1) {
|
||||||
addWordRec(child, word, charAt + 1, occur);
|
addWordRec(child, word, charAt + 1, freq);
|
||||||
} else {
|
} else {
|
||||||
child.terminal = true;
|
child.terminal = true;
|
||||||
child.freq = occur;
|
child.freq = freq;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] dict;
|
private byte[] mDict;
|
||||||
int dictSize;
|
private int mDictSize;
|
||||||
static final int CHAR_WIDTH = 8;
|
private static final int CHAR_WIDTH = 8;
|
||||||
static final int FLAGS_WIDTH = 1; // Terminal flag (word end)
|
private static final int FLAGS_WIDTH = 1; // Terminal flag (word end)
|
||||||
static final int ADDR_WIDTH = 23; // Offset to children
|
private static final int ADDR_WIDTH = 23; // Offset to children
|
||||||
static final int FREQ_WIDTH_BYTES = 1;
|
private static final int FREQ_WIDTH_BYTES = 1;
|
||||||
static final int COUNT_WIDTH_BYTES = 1;
|
private static final int COUNT_WIDTH_BYTES = 1;
|
||||||
|
|
||||||
private void addCount(int count) {
|
private void addCount(int count) {
|
||||||
dict[dictSize++] = (byte) (0xFF & count);
|
mDict[mDictSize++] = (byte) (0xFF & count);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addNode(CharNode node, String word1) {
|
private void addNode(CharNode node, String word1) {
|
||||||
if (node.terminal) { // store address of each word1
|
if (node.terminal) { // store address of each word1 for bigram dic generation
|
||||||
mDictionary.put(word1, dictSize);
|
mDictionary.put(word1, mDictSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int charData = 0xFFFF & node.data;
|
int charData = 0xFFFF & node.data;
|
||||||
if (charData > 254) {
|
if (charData > 254) {
|
||||||
dict[dictSize++] = (byte) 255;
|
mDict[mDictSize++] = (byte) 255;
|
||||||
dict[dictSize++] = (byte) ((node.data >> 8) & 0xFF);
|
mDict[mDictSize++] = (byte) ((node.data >> 8) & 0xFF);
|
||||||
dict[dictSize++] = (byte) (node.data & 0xFF);
|
mDict[mDictSize++] = (byte) (node.data & 0xFF);
|
||||||
} else {
|
} else {
|
||||||
dict[dictSize++] = (byte) (0xFF & node.data);
|
mDict[mDictSize++] = (byte) (0xFF & node.data);
|
||||||
}
|
}
|
||||||
if (node.children != null) {
|
if (node.children != null) {
|
||||||
dictSize += 3; // Space for children address
|
mDictSize += 3; // Space for children address
|
||||||
} else {
|
} else {
|
||||||
dictSize += 1; // Space for just the terminal/address flags
|
mDictSize += 1; // Space for just the terminal/address flags
|
||||||
}
|
}
|
||||||
if ((0xFFFFFF & node.freq) > 255) {
|
if ((0xFFFFFF & node.freq) > 255) {
|
||||||
node.freq = 255;
|
node.freq = 255;
|
||||||
}
|
}
|
||||||
if (node.terminal) {
|
if (node.terminal) {
|
||||||
byte freq = (byte) (0xFF & node.freq);
|
byte freq = (byte) (0xFF & node.freq);
|
||||||
dict[dictSize++] = freq;
|
mDict[mDictSize++] = freq;
|
||||||
// bigram
|
// bigram
|
||||||
if (bigramDict.mBi.containsKey(word1)) {
|
if (mBigramDict.mBi.containsKey(word1)) {
|
||||||
int count = bigramDict.mBi.get(word1).count;
|
int count = mBigramDict.mBi.get(word1).count;
|
||||||
bigramDict.mBigramToFill.add(word1);
|
mBigramDict.mBigramToFill.add(word1);
|
||||||
bigramDict.mBigramToFillAddress.add(dictSize);
|
mBigramDict.mBigramToFillAddress.add(mDictSize);
|
||||||
dictSize += (4 * count);
|
mDictSize += (4 * count);
|
||||||
} else {
|
} else {
|
||||||
dict[dictSize++] = (byte) (0x00);
|
mDict[mDictSize++] = (byte) (0x00);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int nullChildrenCount = 0;
|
private int mNullChildrenCount = 0;
|
||||||
int notTerminalCount = 0;
|
private int mNotTerminalCount = 0;
|
||||||
|
|
||||||
private void updateNodeAddress(int nodeAddress, CharNode node,
|
private void updateNodeAddress(int nodeAddress, CharNode node,
|
||||||
int childrenAddress) {
|
int childrenAddress) {
|
||||||
if ((dict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
|
if ((mDict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
|
||||||
nodeAddress += 2;
|
nodeAddress += 2;
|
||||||
}
|
}
|
||||||
childrenAddress = ADDRESS_MASK & childrenAddress;
|
childrenAddress = ADDRESS_MASK & childrenAddress;
|
||||||
if (childrenAddress == 0) {
|
if (childrenAddress == 0) {
|
||||||
nullChildrenCount++;
|
mNullChildrenCount++;
|
||||||
} else {
|
} else {
|
||||||
childrenAddress |= FLAG_ADDRESS_MASK;
|
childrenAddress |= FLAG_ADDRESS_MASK;
|
||||||
}
|
}
|
||||||
if (node.terminal) {
|
if (node.terminal) {
|
||||||
childrenAddress |= FLAG_TERMINAL_MASK;
|
childrenAddress |= FLAG_TERMINAL_MASK;
|
||||||
} else {
|
} else {
|
||||||
notTerminalCount++;
|
mNotTerminalCount++;
|
||||||
}
|
}
|
||||||
dict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
|
mDict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
|
||||||
if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
|
if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
|
||||||
dict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
|
mDict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
|
||||||
dict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
|
mDict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeWordsRec(List<CharNode> children, StringBuilder word) {
|
private void writeWordsRec(List<CharNode> children, StringBuilder word) {
|
||||||
if (children == null || children.size() == 0) {
|
if (children == null || children.size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -319,60 +322,59 @@ public class MakeBinaryDictionary {
|
||||||
addCount(childCount);
|
addCount(childCount);
|
||||||
int[] childrenAddresses = new int[childCount];
|
int[] childrenAddresses = new int[childCount];
|
||||||
for (int j = 0; j < childCount; j++) {
|
for (int j = 0; j < childCount; j++) {
|
||||||
CharNode node = children.get(j);
|
CharNode child = children.get(j);
|
||||||
childrenAddresses[j] = dictSize;
|
childrenAddresses[j] = mDictSize;
|
||||||
word.append(children.get(j).data);
|
word.append(child.data);
|
||||||
addNode(node, word.toString());
|
addNode(child, word.toString());
|
||||||
word.deleteCharAt(word.length()-1);
|
word.setLength(word.length() - 1);
|
||||||
}
|
}
|
||||||
for (int j = 0; j < childCount; j++) {
|
for (int j = 0; j < childCount; j++) {
|
||||||
CharNode node = children.get(j);
|
CharNode child = children.get(j);
|
||||||
int nodeAddress = childrenAddresses[j];
|
int nodeAddress = childrenAddresses[j];
|
||||||
int cacheDictSize = dictSize;
|
int cacheDictSize = mDictSize;
|
||||||
word.append(children.get(j).data);
|
word.append(child.data);
|
||||||
writeWordsRec(node.children, word);
|
writeWordsRec(child.children, word);
|
||||||
word.deleteCharAt(word.length()-1);
|
word.setLength(word.length() - 1);
|
||||||
updateNodeAddress(nodeAddress, node, node.children != null
|
updateNodeAddress(nodeAddress, child, child.children != null ? cacheDictSize : 0);
|
||||||
? cacheDictSize : 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeToDict(String dictFilename) {
|
private void writeToDict(String dictFilename) {
|
||||||
// 4MB max, 22-bit offsets
|
// 4MB max, 22-bit offsets
|
||||||
dict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
|
mDict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
|
||||||
// < 1MB in most cases, as there is a limit in the
|
// < 1MB in most cases, as there is a limit in the
|
||||||
// resource size in apks.
|
// resource size in apks.
|
||||||
dictSize = 0;
|
mDictSize = 0;
|
||||||
|
|
||||||
dict[dictSize++] = (byte) (0xFF & VERSION_NUM); // version info
|
mDict[mDictSize++] = (byte) (0xFF & VERSION_NUM); // version info
|
||||||
dict[dictSize++] = (byte) (0xFF & (bigramDict.mHasBigram ? 1 : 0));
|
mDict[mDictSize++] = (byte) (0xFF & (mBigramDict.mHasBigram ? 1 : 0));
|
||||||
|
|
||||||
StringBuilder word = new StringBuilder(48);
|
final StringBuilder word = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY);
|
||||||
writeWordsRec(roots, word);
|
writeWordsRec(mRoots, word);
|
||||||
dict = bigramDict.writeBigrams(dict, mDictionary);
|
mDict = mBigramDict.writeBigrams(mDict, mDictionary);
|
||||||
System.out.println("Dict Size = " + dictSize);
|
System.out.println("Dict Size = " + mDictSize);
|
||||||
if (!sSplitOutput) {
|
if (!sSplitOutput) {
|
||||||
sOutputFileSize = dictSize;
|
sOutputFileSize = mDictSize;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
int currentLoc = 0;
|
int currentLoc = 0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int extension = dictFilename.indexOf(".dict");
|
int extension = dictFilename.indexOf(".dict");
|
||||||
String filename = dictFilename.substring(0, extension);
|
String filename = dictFilename.substring(0, extension);
|
||||||
while (dictSize > 0) {
|
while (mDictSize > 0) {
|
||||||
FileOutputStream fos;
|
FileOutputStream fos;
|
||||||
if (sSplitOutput) {
|
if (sSplitOutput) {
|
||||||
fos = new FileOutputStream(filename + i + ".dict");
|
fos = new FileOutputStream(filename + i + ".dict");
|
||||||
} else {
|
} else {
|
||||||
fos = new FileOutputStream(filename + ".dict");
|
fos = new FileOutputStream(filename + ".dict");
|
||||||
}
|
}
|
||||||
if (dictSize > sOutputFileSize) {
|
if (mDictSize > sOutputFileSize) {
|
||||||
fos.write(dict, currentLoc, sOutputFileSize);
|
fos.write(mDict, currentLoc, sOutputFileSize);
|
||||||
dictSize -= sOutputFileSize;
|
mDictSize -= sOutputFileSize;
|
||||||
currentLoc += sOutputFileSize;
|
currentLoc += sOutputFileSize;
|
||||||
} else {
|
} else {
|
||||||
fos.write(dict, currentLoc, dictSize);
|
fos.write(mDict, currentLoc, mDictSize);
|
||||||
dictSize = 0;
|
mDictSize = 0;
|
||||||
}
|
}
|
||||||
fos.close();
|
fos.close();
|
||||||
i++;
|
i++;
|
||||||
|
@ -382,36 +384,36 @@ public class MakeBinaryDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void traverseDict(int pos, char[] word, int depth) {
|
private void traverseDict(int pos, char[] word, int depth) {
|
||||||
int count = dict[pos++] & 0xFF;
|
int count = mDict[pos++] & 0xFF;
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
char c = (char) (dict[pos++] & 0xFF);
|
char c = (char) (mDict[pos++] & 0xFF);
|
||||||
if (c == 0xFF) { // two byte character
|
if (c == 0xFF) { // two byte character
|
||||||
c = (char) (((dict[pos] & 0xFF) << 8) | (dict[pos+1] & 0xFF));
|
c = (char) (((mDict[pos] & 0xFF) << 8) | (mDict[pos+1] & 0xFF));
|
||||||
pos += 2;
|
pos += 2;
|
||||||
}
|
}
|
||||||
word[depth] = c;
|
word[depth] = c;
|
||||||
boolean terminal = getFirstBitOfByte(pos, dict);
|
boolean terminal = getFirstBitOfByte(pos, mDict);
|
||||||
int address = 0;
|
int address = 0;
|
||||||
if ((dict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check
|
if ((mDict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check
|
||||||
address = get22BitAddress(pos, dict);
|
address = get22BitAddress(pos, mDict);
|
||||||
pos += 3;
|
pos += 3;
|
||||||
} else {
|
} else {
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
showWord(word, depth + 1, dict[pos] & 0xFF);
|
showWord(word, depth + 1, mDict[pos] & 0xFF);
|
||||||
pos++;
|
pos++;
|
||||||
|
|
||||||
int bigramExist = (dict[pos] & bigramDict.FLAG_BIGRAM_READ);
|
int bigramExist = (mDict[pos] & mBigramDict.FLAG_BIGRAM_READ);
|
||||||
if (bigramExist > 0) {
|
if (bigramExist > 0) {
|
||||||
int nextBigramExist = 1;
|
int nextBigramExist = 1;
|
||||||
while (nextBigramExist > 0) {
|
while (nextBigramExist > 0) {
|
||||||
int bigramAddress = get22BitAddress(pos, dict);
|
int bigramAddress = get22BitAddress(pos, mDict);
|
||||||
pos += 3;
|
pos += 3;
|
||||||
int frequency = (bigramDict.FLAG_BIGRAM_FREQ & dict[pos]);
|
int frequency = (mBigramDict.FLAG_BIGRAM_FREQ & mDict[pos]);
|
||||||
bigramDict.searchForTerminalNode(bigramAddress, frequency, dict);
|
mBigramDict.searchForTerminalNode(bigramAddress, frequency, mDict);
|
||||||
nextBigramExist = (dict[pos++] & bigramDict.FLAG_BIGRAM_CONTINUED);
|
nextBigramExist = (mDict[pos++] & mBigramDict.FLAG_BIGRAM_CONTINUED);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pos++;
|
pos++;
|
||||||
|
@ -423,21 +425,21 @@ public class MakeBinaryDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void showWord(char[] word, int size, int freq) {
|
private static void showWord(char[] word, int size, int freq) {
|
||||||
System.out.print(new String(word, 0, size) + " " + freq + "\n");
|
System.out.print(new String(word, 0, size) + " " + freq + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get22BitAddress(int pos, byte[] dict) {
|
/* package */ static int get22BitAddress(int pos, byte[] dict) {
|
||||||
return ((dict[pos + 0] & 0x3F) << 16)
|
return ((dict[pos + 0] & 0x3F) << 16)
|
||||||
| ((dict[pos + 1] & 0xFF) << 8)
|
| ((dict[pos + 1] & 0xFF) << 8)
|
||||||
| ((dict[pos + 2] & 0xFF));
|
| ((dict[pos + 2] & 0xFF));
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean getFirstBitOfByte(int pos, byte[] dict) {
|
/* package */ static boolean getFirstBitOfByte(int pos, byte[] dict) {
|
||||||
return (dict[pos] & 0x80) > 0;
|
return (dict[pos] & 0x80) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean getSecondBitOfByte(int pos, byte[] dict) {
|
/* package */ static boolean getSecondBitOfByte(int pos, byte[] dict) {
|
||||||
return (dict[pos] & 0x40) > 0;
|
return (dict[pos] & 0x40) > 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue