diff --git a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java index d6ce88d72..dafbc04b9 100644 --- a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java +++ b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java @@ -17,7 +17,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.BinaryDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.CombinedFormatUtils; import java.util.Arrays; @@ -57,8 +57,7 @@ public final class ProbabilityInfo { @Override public String toString() { - return "f=" + mProbability + (hasHistoricalInfo() ? - ",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : ""); + return CombinedFormatUtils.formatProbabilityInfo(this); } @Override diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index e764ae3d6..b93a0a525 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.CombinedFormatUtils; import com.android.inputmethod.latin.utils.StringUtils; import java.util.ArrayList; @@ -52,8 +53,8 @@ public final class WordProperty implements Comparable { mBigrams = bigrams; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklistEntry; - mHasBigrams = !bigrams.isEmpty(); - mHasShortcuts = !shortcutTargets.isEmpty(); + mHasBigrams = bigrams != null && !bigrams.isEmpty(); + mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); } private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { @@ -158,32 +159,6 @@ public final class WordProperty implements Comparable { @Override public String toString() { - // TODO: Move this logic to CombinedInputOutput. - final StringBuffer builder = new StringBuffer(); - builder.append(" word=" + mWord); - builder.append(","); - builder.append(mProbabilityInfo.toString()); - if (mIsNotAWord) { - builder.append(","); - builder.append("not_a_word=true"); - } - if (mIsBlacklistEntry) { - builder.append(","); - builder.append("blacklisted=true"); - } - builder.append("\n"); - for (int i = 0; i < mBigrams.size(); i++) { - builder.append(" bigram=" + mBigrams.get(i).mWord); - builder.append(","); - builder.append(mBigrams.get(i).mProbabilityInfo.toString()); - builder.append("\n"); - } - for (int i = 0; i < mShortcutTargets.size(); i++) { - builder.append(" shortcut=" + mShortcutTargets.get(i).mWord); - builder.append(","); - builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString()); - builder.append("\n"); - } - return builder.toString(); + return CombinedFormatUtils.formatWordProperty(this); } } diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java new file mode 100644 index 000000000..1348d5e77 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.utils; + +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; + +import java.util.HashMap; + +public class CombinedFormatUtils { + public static final String DICTIONARY_TAG = "dictionary"; + public static final String BIGRAM_TAG = "bigram"; + public static final String SHORTCUT_TAG = "shortcut"; + public static final String PROBABILITY_TAG = "f"; + public static final String HISTORICAL_INFO_TAG = "historicalInfo"; + public static final String HISTORICAL_INFO_SEPARATOR = ":"; + public static final String WORD_TAG = "word"; + public static final String NOT_A_WORD_TAG = "not_a_word"; + public static final String BLACKLISTED_TAG = "blacklisted"; + + public static String formatAttributeMap(final HashMap attributeMap) { + final StringBuilder builder = new StringBuilder(); + builder.append(DICTIONARY_TAG + "="); + if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) { + builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)); + } + for (final String key : attributeMap.keySet()) { + if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) { + continue; + } + final String value = attributeMap.get(key); + builder.append("," + key + "=" + value); + } + builder.append("\n"); + return builder.toString(); + } + + public static String formatWordProperty(final WordProperty wordProperty) { + final StringBuilder builder = new StringBuilder(); + builder.append(" " + WORD_TAG + "=" + wordProperty.mWord); + builder.append(","); + builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo)); + if (wordProperty.mIsNotAWord) { + builder.append("," + NOT_A_WORD_TAG + "=true"); + } + if (wordProperty.mIsBlacklistEntry) { + builder.append("," + BLACKLISTED_TAG + "=true"); + } + builder.append("\n"); + if (wordProperty.mShortcutTargets != null) { + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord); + builder.append(","); + builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo)); + builder.append("\n"); + } + } + if (wordProperty.mBigrams != null) { + for (final WeightedString bigram : wordProperty.mBigrams) { + builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); + builder.append(","); + builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); + builder.append("\n"); + } + } + return builder.toString(); + } + + public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) { + final StringBuilder builder = new StringBuilder(); + builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability); + if (probabilityInfo.hasHistoricalInfo()) { + builder.append(","); + builder.append(HISTORICAL_INFO_TAG + "="); + builder.append(probabilityInfo.mTimestamp); + builder.append(HISTORICAL_INFO_SEPARATOR); + builder.append(probabilityInfo.mLevel); + builder.append(HISTORICAL_INFO_SEPARATOR); + builder.append(probabilityInfo.mCount); + } + return builder.toString(); + } +} diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index 5d1a30ebd..b83ce57e3 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \ $(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \ + $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \ diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index eae9d9fc1..8d2f5fbbf 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.CombinedFormatUtils; import java.io.BufferedReader; import java.io.File; @@ -41,16 +42,10 @@ import java.util.TreeSet; * All functions in this class are static. */ public class CombinedInputOutput { - - private static final String DICTIONARY_TAG = "dictionary"; - private static final String BIGRAM_TAG = "bigram"; - private static final String SHORTCUT_TAG = "shortcut"; - private static final String PROBABILITY_TAG = "f"; - private static final String WORD_TAG = "word"; - private static final String NOT_A_WORD_TAG = "not_a_word"; private static final String WHITELIST_TAG = "whitelist"; private static final String OPTIONS_TAG = "options"; private static final String COMMENT_LINE_STARTER = "#"; + private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3; /** * Basic test to find out whether the file is in the combined format or not. @@ -68,7 +63,8 @@ public class CombinedInputOutput { while (firstLine.startsWith(COMMENT_LINE_STARTER)) { firstLine = reader.readLine(); } - return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); + return firstLine.matches( + "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); } catch (FileNotFoundException e) { return false; } catch (IOException e) { @@ -123,7 +119,7 @@ public class CombinedInputOutput { while (null != (line = reader.readLine())) { if (line.startsWith(COMMENT_LINE_STARTER)) continue; final String args[] = line.trim().split(","); - if (args[0].matches(WORD_TAG + "=.*")) { + if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { @@ -136,23 +132,30 @@ public class CombinedInputOutput { for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (WORD_TAG.equals(params[0])) { + if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { word = params[1]; - } else if (PROBABILITY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { freq = Integer.parseInt(params[1]); - } else if (NOT_A_WORD_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + // TODO: Use parsed historical info. + } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { isNotAWord = "true".equals(params[1]); } } - } else if (args[0].matches(SHORTCUT_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) { String shortcut = null; int shortcutFreq = 0; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (SHORTCUT_TAG.equals(params[0])) { + if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) { shortcut = params[1]; - } else if (PROBABILITY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { shortcutFreq = WHITELIST_TAG.equals(params[1]) ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY : Integer.parseInt(params[1]); @@ -163,16 +166,23 @@ public class CombinedInputOutput { } else { throw new RuntimeException("Wrong format : " + line); } - } else if (args[0].matches(BIGRAM_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) { String secondWordOfBigram = null; int bigramFreq = 0; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (BIGRAM_TAG.equals(params[0])) { + if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) { secondWordOfBigram = params[1]; - } else if (PROBABILITY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { bigramFreq = Integer.parseInt(params[1]); + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + // TODO: Use parsed historical info. } } if (null != secondWordOfBigram) { @@ -198,40 +208,16 @@ public class CombinedInputOutput { * @param destination a destination stream to write to. * @param dict the dictionary to write. */ - public static void writeDictionaryCombined(Writer destination, FusionDictionary dict) - throws IOException { + public static void writeDictionaryCombined( + final Writer destination, final FusionDictionary dict) throws IOException { final TreeSet wordPropertiesInDict = new TreeSet(); - for (WordProperty wordProperty: dict) { + for (final WordProperty wordProperty : dict) { // This for ordering by frequency, then by asciibetic order wordPropertiesInDict.add(wordProperty); } - final HashMap options = dict.mOptions.mAttributes; - destination.write(DICTIONARY_TAG + "="); - if (options.containsKey(DICTIONARY_TAG)) { - destination.write(options.get(DICTIONARY_TAG)); - options.remove(DICTIONARY_TAG); - } - for (final String key : dict.mOptions.mAttributes.keySet()) { - final String value = dict.mOptions.mAttributes.get(key); - destination.write("," + key + "=" + value); - } - destination.write("\n"); - for (WordProperty wordProperty : wordPropertiesInDict) { - destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + "," - + PROBABILITY_TAG + "=" + wordProperty.getProbability() - + (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n")); - if (null != wordProperty.mShortcutTargets) { - for (WeightedString target : wordProperty.mShortcutTargets) { - destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + "," - + PROBABILITY_TAG + "=" + target.getProbability() + "\n"); - } - } - if (null != wordProperty.mBigrams) { - for (WeightedString bigram : wordProperty.mBigrams) { - destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + "," - + PROBABILITY_TAG + "=" + bigram.getProbability() + "\n"); - } - } + destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes)); + for (final WordProperty wordProperty : wordPropertiesInDict) { + destination.write(CombinedFormatUtils.formatWordProperty(wordProperty)); } destination.close(); }