am fd018c15
: Merge "Use CombinedFormatUtils to convert dict elements to strings."
* commit 'fd018c1588c24b1fd32746d9bf0a089713c407e0': Use CombinedFormatUtils to convert dict elements to strings.
This commit is contained in:
commit
e02805d90a
5 changed files with 140 additions and 80 deletions
|
@ -17,7 +17,7 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.BinaryDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
@ -57,8 +57,7 @@ public final class ProbabilityInfo {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "f=" + mProbability + (hasHistoricalInfo() ?
|
||||
",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
|
||||
return CombinedFormatUtils.formatProbabilityInfo(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
|||
import com.android.inputmethod.latin.BinaryDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
||||
import com.android.inputmethod.latin.utils.StringUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -52,8 +53,8 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
mBigrams = bigrams;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
mHasBigrams = !bigrams.isEmpty();
|
||||
mHasShortcuts = !shortcutTargets.isEmpty();
|
||||
mHasBigrams = bigrams != null && !bigrams.isEmpty();
|
||||
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
|
||||
}
|
||||
|
||||
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
|
||||
|
@ -158,32 +159,6 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
// TODO: Move this logic to CombinedInputOutput.
|
||||
final StringBuffer builder = new StringBuffer();
|
||||
builder.append(" word=" + mWord);
|
||||
builder.append(",");
|
||||
builder.append(mProbabilityInfo.toString());
|
||||
if (mIsNotAWord) {
|
||||
builder.append(",");
|
||||
builder.append("not_a_word=true");
|
||||
}
|
||||
if (mIsBlacklistEntry) {
|
||||
builder.append(",");
|
||||
builder.append("blacklisted=true");
|
||||
}
|
||||
builder.append("\n");
|
||||
for (int i = 0; i < mBigrams.size(); i++) {
|
||||
builder.append(" bigram=" + mBigrams.get(i).mWord);
|
||||
builder.append(",");
|
||||
builder.append(mBigrams.get(i).mProbabilityInfo.toString());
|
||||
builder.append("\n");
|
||||
}
|
||||
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
||||
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
||||
builder.append(",");
|
||||
builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
|
||||
builder.append("\n");
|
||||
}
|
||||
return builder.toString();
|
||||
return CombinedFormatUtils.formatWordProperty(this);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.utils;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
|
||||
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
public class CombinedFormatUtils {
|
||||
public static final String DICTIONARY_TAG = "dictionary";
|
||||
public static final String BIGRAM_TAG = "bigram";
|
||||
public static final String SHORTCUT_TAG = "shortcut";
|
||||
public static final String PROBABILITY_TAG = "f";
|
||||
public static final String HISTORICAL_INFO_TAG = "historicalInfo";
|
||||
public static final String HISTORICAL_INFO_SEPARATOR = ":";
|
||||
public static final String WORD_TAG = "word";
|
||||
public static final String NOT_A_WORD_TAG = "not_a_word";
|
||||
public static final String BLACKLISTED_TAG = "blacklisted";
|
||||
|
||||
public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
builder.append(DICTIONARY_TAG + "=");
|
||||
if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) {
|
||||
builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY));
|
||||
}
|
||||
for (final String key : attributeMap.keySet()) {
|
||||
if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) {
|
||||
continue;
|
||||
}
|
||||
final String value = attributeMap.get(key);
|
||||
builder.append("," + key + "=" + value);
|
||||
}
|
||||
builder.append("\n");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static String formatWordProperty(final WordProperty wordProperty) {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
builder.append(" " + WORD_TAG + "=" + wordProperty.mWord);
|
||||
builder.append(",");
|
||||
builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
|
||||
if (wordProperty.mIsNotAWord) {
|
||||
builder.append("," + NOT_A_WORD_TAG + "=true");
|
||||
}
|
||||
if (wordProperty.mIsBlacklistEntry) {
|
||||
builder.append("," + BLACKLISTED_TAG + "=true");
|
||||
}
|
||||
builder.append("\n");
|
||||
if (wordProperty.mShortcutTargets != null) {
|
||||
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
|
||||
builder.append(",");
|
||||
builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo));
|
||||
builder.append("\n");
|
||||
}
|
||||
}
|
||||
if (wordProperty.mBigrams != null) {
|
||||
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
|
||||
builder.append(",");
|
||||
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
|
||||
builder.append("\n");
|
||||
}
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability);
|
||||
if (probabilityInfo.hasHistoricalInfo()) {
|
||||
builder.append(",");
|
||||
builder.append(HISTORICAL_INFO_TAG + "=");
|
||||
builder.append(probabilityInfo.mTimestamp);
|
||||
builder.append(HISTORICAL_INFO_SEPARATOR);
|
||||
builder.append(probabilityInfo.mLevel);
|
||||
builder.append(HISTORICAL_INFO_SEPARATOR);
|
||||
builder.append(probabilityInfo.mCount);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
|
@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \
|
|||
$(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
|
||||
|
|
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
|
|||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
|
@ -41,16 +42,10 @@ import java.util.TreeSet;
|
|||
* All functions in this class are static.
|
||||
*/
|
||||
public class CombinedInputOutput {
|
||||
|
||||
private static final String DICTIONARY_TAG = "dictionary";
|
||||
private static final String BIGRAM_TAG = "bigram";
|
||||
private static final String SHORTCUT_TAG = "shortcut";
|
||||
private static final String PROBABILITY_TAG = "f";
|
||||
private static final String WORD_TAG = "word";
|
||||
private static final String NOT_A_WORD_TAG = "not_a_word";
|
||||
private static final String WHITELIST_TAG = "whitelist";
|
||||
private static final String OPTIONS_TAG = "options";
|
||||
private static final String COMMENT_LINE_STARTER = "#";
|
||||
private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
|
||||
|
||||
/**
|
||||
* Basic test to find out whether the file is in the combined format or not.
|
||||
|
@ -68,7 +63,8 @@ public class CombinedInputOutput {
|
|||
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
|
||||
firstLine = reader.readLine();
|
||||
}
|
||||
return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
|
||||
return firstLine.matches(
|
||||
"^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
|
||||
} catch (FileNotFoundException e) {
|
||||
return false;
|
||||
} catch (IOException e) {
|
||||
|
@ -123,7 +119,7 @@ public class CombinedInputOutput {
|
|||
while (null != (line = reader.readLine())) {
|
||||
if (line.startsWith(COMMENT_LINE_STARTER)) continue;
|
||||
final String args[] = line.trim().split(",");
|
||||
if (args[0].matches(WORD_TAG + "=.*")) {
|
||||
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
|
||||
if (null != word) {
|
||||
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
|
||||
for (WeightedString s : bigrams) {
|
||||
|
@ -136,23 +132,30 @@ public class CombinedInputOutput {
|
|||
for (String param : args) {
|
||||
final String params[] = param.split("=", 2);
|
||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||
if (WORD_TAG.equals(params[0])) {
|
||||
if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
|
||||
word = params[1];
|
||||
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||
} else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
|
||||
freq = Integer.parseInt(params[1]);
|
||||
} else if (NOT_A_WORD_TAG.equals(params[0])) {
|
||||
} else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
|
||||
final String[] historicalInfoParams =
|
||||
params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
|
||||
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
|
||||
throw new RuntimeException("Wrong format (historical info) : " + line);
|
||||
}
|
||||
// TODO: Use parsed historical info.
|
||||
} else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
|
||||
isNotAWord = "true".equals(params[1]);
|
||||
}
|
||||
}
|
||||
} else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
|
||||
} else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
|
||||
String shortcut = null;
|
||||
int shortcutFreq = 0;
|
||||
for (String param : args) {
|
||||
final String params[] = param.split("=", 2);
|
||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||
if (SHORTCUT_TAG.equals(params[0])) {
|
||||
if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
|
||||
shortcut = params[1];
|
||||
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||
} else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
|
||||
shortcutFreq = WHITELIST_TAG.equals(params[1])
|
||||
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
||||
: Integer.parseInt(params[1]);
|
||||
|
@ -163,16 +166,23 @@ public class CombinedInputOutput {
|
|||
} else {
|
||||
throw new RuntimeException("Wrong format : " + line);
|
||||
}
|
||||
} else if (args[0].matches(BIGRAM_TAG + "=.*")) {
|
||||
} else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
|
||||
String secondWordOfBigram = null;
|
||||
int bigramFreq = 0;
|
||||
for (String param : args) {
|
||||
final String params[] = param.split("=", 2);
|
||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||
if (BIGRAM_TAG.equals(params[0])) {
|
||||
if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
|
||||
secondWordOfBigram = params[1];
|
||||
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||
} else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
|
||||
bigramFreq = Integer.parseInt(params[1]);
|
||||
} else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
|
||||
final String[] historicalInfoParams =
|
||||
params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
|
||||
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
|
||||
throw new RuntimeException("Wrong format (historical info) : " + line);
|
||||
}
|
||||
// TODO: Use parsed historical info.
|
||||
}
|
||||
}
|
||||
if (null != secondWordOfBigram) {
|
||||
|
@ -198,40 +208,16 @@ public class CombinedInputOutput {
|
|||
* @param destination a destination stream to write to.
|
||||
* @param dict the dictionary to write.
|
||||
*/
|
||||
public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
|
||||
throws IOException {
|
||||
public static void writeDictionaryCombined(
|
||||
final Writer destination, final FusionDictionary dict) throws IOException {
|
||||
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
|
||||
for (WordProperty wordProperty: dict) {
|
||||
for (final WordProperty wordProperty : dict) {
|
||||
// This for ordering by frequency, then by asciibetic order
|
||||
wordPropertiesInDict.add(wordProperty);
|
||||
}
|
||||
final HashMap<String, String> options = dict.mOptions.mAttributes;
|
||||
destination.write(DICTIONARY_TAG + "=");
|
||||
if (options.containsKey(DICTIONARY_TAG)) {
|
||||
destination.write(options.get(DICTIONARY_TAG));
|
||||
options.remove(DICTIONARY_TAG);
|
||||
}
|
||||
for (final String key : dict.mOptions.mAttributes.keySet()) {
|
||||
final String value = dict.mOptions.mAttributes.get(key);
|
||||
destination.write("," + key + "=" + value);
|
||||
}
|
||||
destination.write("\n");
|
||||
for (WordProperty wordProperty : wordPropertiesInDict) {
|
||||
destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
|
||||
+ PROBABILITY_TAG + "=" + wordProperty.getProbability()
|
||||
+ (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
|
||||
if (null != wordProperty.mShortcutTargets) {
|
||||
for (WeightedString target : wordProperty.mShortcutTargets) {
|
||||
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
|
||||
+ PROBABILITY_TAG + "=" + target.getProbability() + "\n");
|
||||
}
|
||||
}
|
||||
if (null != wordProperty.mBigrams) {
|
||||
for (WeightedString bigram : wordProperty.mBigrams) {
|
||||
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
|
||||
+ PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
|
||||
}
|
||||
}
|
||||
destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
|
||||
for (final WordProperty wordProperty : wordPropertiesInDict) {
|
||||
destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
|
||||
}
|
||||
destination.close();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue