Merge "Use CombinedFormatUtils to convert dict elements to strings."

This commit is contained in:
Keisuke Kuroyanagi 2014-02-06 07:29:29 +00:00 committed by Android (Google) Code Review
commit fd018c1588
5 changed files with 140 additions and 80 deletions

View file

@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.util.Arrays; import java.util.Arrays;
@ -57,8 +57,7 @@ public final class ProbabilityInfo {
@Override @Override
public String toString() { public String toString() {
return "f=" + mProbability + (hasHistoricalInfo() ? return CombinedFormatUtils.formatProbabilityInfo(this);
",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
} }
@Override @Override

View file

@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.StringUtils;
import java.util.ArrayList; import java.util.ArrayList;
@ -52,8 +53,8 @@ public final class WordProperty implements Comparable<WordProperty> {
mBigrams = bigrams; mBigrams = bigrams;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = !bigrams.isEmpty(); mHasBigrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = !shortcutTargets.isEmpty(); mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
} }
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
@ -158,32 +159,6 @@ public final class WordProperty implements Comparable<WordProperty> {
@Override @Override
public String toString() { public String toString() {
// TODO: Move this logic to CombinedInputOutput. return CombinedFormatUtils.formatWordProperty(this);
final StringBuffer builder = new StringBuffer();
builder.append(" word=" + mWord);
builder.append(",");
builder.append(mProbabilityInfo.toString());
if (mIsNotAWord) {
builder.append(",");
builder.append("not_a_word=true");
}
if (mIsBlacklistEntry) {
builder.append(",");
builder.append("blacklisted=true");
}
builder.append("\n");
for (int i = 0; i < mBigrams.size(); i++) {
builder.append(" bigram=" + mBigrams.get(i).mWord);
builder.append(",");
builder.append(mBigrams.get(i).mProbabilityInfo.toString());
builder.append("\n");
}
for (int i = 0; i < mShortcutTargets.size(); i++) {
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
builder.append(",");
builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
builder.append("\n");
}
return builder.toString();
} }
} }

View file

@ -0,0 +1,99 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.utils;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.util.HashMap;
public class CombinedFormatUtils {
public static final String DICTIONARY_TAG = "dictionary";
public static final String BIGRAM_TAG = "bigram";
public static final String SHORTCUT_TAG = "shortcut";
public static final String PROBABILITY_TAG = "f";
public static final String HISTORICAL_INFO_TAG = "historicalInfo";
public static final String HISTORICAL_INFO_SEPARATOR = ":";
public static final String WORD_TAG = "word";
public static final String NOT_A_WORD_TAG = "not_a_word";
public static final String BLACKLISTED_TAG = "blacklisted";
public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
final StringBuilder builder = new StringBuilder();
builder.append(DICTIONARY_TAG + "=");
if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) {
builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY));
}
for (final String key : attributeMap.keySet()) {
if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) {
continue;
}
final String value = attributeMap.get(key);
builder.append("," + key + "=" + value);
}
builder.append("\n");
return builder.toString();
}
public static String formatWordProperty(final WordProperty wordProperty) {
final StringBuilder builder = new StringBuilder();
builder.append(" " + WORD_TAG + "=" + wordProperty.mWord);
builder.append(",");
builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
if (wordProperty.mIsNotAWord) {
builder.append("," + NOT_A_WORD_TAG + "=true");
}
if (wordProperty.mIsBlacklistEntry) {
builder.append("," + BLACKLISTED_TAG + "=true");
}
builder.append("\n");
if (wordProperty.mShortcutTargets != null) {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
builder.append(",");
builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo));
builder.append("\n");
}
}
if (wordProperty.mBigrams != null) {
for (final WeightedString bigram : wordProperty.mBigrams) {
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
builder.append(",");
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
builder.append("\n");
}
}
return builder.toString();
}
public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) {
final StringBuilder builder = new StringBuilder();
builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability);
if (probabilityInfo.hasHistoricalInfo()) {
builder.append(",");
builder.append(HISTORICAL_INFO_TAG + "=");
builder.append(probabilityInfo.mTimestamp);
builder.append(HISTORICAL_INFO_SEPARATOR);
builder.append(probabilityInfo.mLevel);
builder.append(HISTORICAL_INFO_SEPARATOR);
builder.append(probabilityInfo.mCount);
}
return builder.toString();
}
}

View file

@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \

View file

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
@ -41,16 +42,10 @@ import java.util.TreeSet;
* All functions in this class are static. * All functions in this class are static.
*/ */
public class CombinedInputOutput { public class CombinedInputOutput {
private static final String DICTIONARY_TAG = "dictionary";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
private static final String PROBABILITY_TAG = "f";
private static final String WORD_TAG = "word";
private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist"; private static final String WHITELIST_TAG = "whitelist";
private static final String OPTIONS_TAG = "options"; private static final String OPTIONS_TAG = "options";
private static final String COMMENT_LINE_STARTER = "#"; private static final String COMMENT_LINE_STARTER = "#";
private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
/** /**
* Basic test to find out whether the file is in the combined format or not. * Basic test to find out whether the file is in the combined format or not.
@ -68,7 +63,8 @@ public class CombinedInputOutput {
while (firstLine.startsWith(COMMENT_LINE_STARTER)) { while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
firstLine = reader.readLine(); firstLine = reader.readLine();
} }
return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); return firstLine.matches(
"^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
return false; return false;
} catch (IOException e) { } catch (IOException e) {
@ -123,7 +119,7 @@ public class CombinedInputOutput {
while (null != (line = reader.readLine())) { while (null != (line = reader.readLine())) {
if (line.startsWith(COMMENT_LINE_STARTER)) continue; if (line.startsWith(COMMENT_LINE_STARTER)) continue;
final String args[] = line.trim().split(","); final String args[] = line.trim().split(",");
if (args[0].matches(WORD_TAG + "=.*")) { if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) { if (null != word) {
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) { for (WeightedString s : bigrams) {
@ -136,23 +132,30 @@ public class CombinedInputOutput {
for (String param : args) { for (String param : args) {
final String params[] = param.split("=", 2); final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line); if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (WORD_TAG.equals(params[0])) { if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
word = params[1]; word = params[1];
} else if (PROBABILITY_TAG.equals(params[0])) { } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
freq = Integer.parseInt(params[1]); freq = Integer.parseInt(params[1]);
} else if (NOT_A_WORD_TAG.equals(params[0])) { } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
final String[] historicalInfoParams =
params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
throw new RuntimeException("Wrong format (historical info) : " + line);
}
// TODO: Use parsed historical info.
} else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]); isNotAWord = "true".equals(params[1]);
} }
} }
} else if (args[0].matches(SHORTCUT_TAG + "=.*")) { } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
String shortcut = null; String shortcut = null;
int shortcutFreq = 0; int shortcutFreq = 0;
for (String param : args) { for (String param : args) {
final String params[] = param.split("=", 2); final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line); if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (SHORTCUT_TAG.equals(params[0])) { if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1]; shortcut = params[1];
} else if (PROBABILITY_TAG.equals(params[0])) { } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1]) shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]); : Integer.parseInt(params[1]);
@ -163,16 +166,23 @@ public class CombinedInputOutput {
} else { } else {
throw new RuntimeException("Wrong format : " + line); throw new RuntimeException("Wrong format : " + line);
} }
} else if (args[0].matches(BIGRAM_TAG + "=.*")) { } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
String secondWordOfBigram = null; String secondWordOfBigram = null;
int bigramFreq = 0; int bigramFreq = 0;
for (String param : args) { for (String param : args) {
final String params[] = param.split("=", 2); final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line); if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (BIGRAM_TAG.equals(params[0])) { if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1]; secondWordOfBigram = params[1];
} else if (PROBABILITY_TAG.equals(params[0])) { } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
bigramFreq = Integer.parseInt(params[1]); bigramFreq = Integer.parseInt(params[1]);
} else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
final String[] historicalInfoParams =
params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
throw new RuntimeException("Wrong format (historical info) : " + line);
}
// TODO: Use parsed historical info.
} }
} }
if (null != secondWordOfBigram) { if (null != secondWordOfBigram) {
@ -198,40 +208,16 @@ public class CombinedInputOutput {
* @param destination a destination stream to write to. * @param destination a destination stream to write to.
* @param dict the dictionary to write. * @param dict the dictionary to write.
*/ */
public static void writeDictionaryCombined(Writer destination, FusionDictionary dict) public static void writeDictionaryCombined(
throws IOException { final Writer destination, final FusionDictionary dict) throws IOException {
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>(); final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
for (WordProperty wordProperty: dict) { for (final WordProperty wordProperty : dict) {
// This for ordering by frequency, then by asciibetic order // This for ordering by frequency, then by asciibetic order
wordPropertiesInDict.add(wordProperty); wordPropertiesInDict.add(wordProperty);
} }
final HashMap<String, String> options = dict.mOptions.mAttributes; destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
destination.write(DICTIONARY_TAG + "="); for (final WordProperty wordProperty : wordPropertiesInDict) {
if (options.containsKey(DICTIONARY_TAG)) { destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
destination.write(options.get(DICTIONARY_TAG));
options.remove(DICTIONARY_TAG);
}
for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key);
destination.write("," + key + "=" + value);
}
destination.write("\n");
for (WordProperty wordProperty : wordPropertiesInDict) {
destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
+ PROBABILITY_TAG + "=" + wordProperty.getProbability()
+ (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
if (null != wordProperty.mShortcutTargets) {
for (WeightedString target : wordProperty.mShortcutTargets) {
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
+ PROBABILITY_TAG + "=" + target.getProbability() + "\n");
}
}
if (null != wordProperty.mBigrams) {
for (WeightedString bigram : wordProperty.mBigrams) {
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
+ PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
}
}
} }
destination.close(); destination.close();
} }