Merge "Using "blacklist" flag as "possibly offensive""
This commit is contained in:
commit
aa20342d7e
14 changed files with 72 additions and 53 deletions
|
@ -171,6 +171,7 @@ public final class FormatSpec {
|
||||||
// ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
|
// ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
|
||||||
public static final int VERSION2 = 2;
|
public static final int VERSION2 = 2;
|
||||||
public static final int VERSION201 = 201;
|
public static final int VERSION201 = 201;
|
||||||
|
public static final int VERSION202 = 202;
|
||||||
public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
|
public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
|
||||||
// Dictionary version used for testing.
|
// Dictionary version used for testing.
|
||||||
public static final int VERSION4_ONLY_FOR_TESTING = 399;
|
public static final int VERSION4_ONLY_FOR_TESTING = 399;
|
||||||
|
|
|
@ -36,7 +36,8 @@ public class CombinedFormatUtils {
|
||||||
public static final String WORD_TAG = "word";
|
public static final String WORD_TAG = "word";
|
||||||
public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence";
|
public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence";
|
||||||
public static final String NOT_A_WORD_TAG = "not_a_word";
|
public static final String NOT_A_WORD_TAG = "not_a_word";
|
||||||
public static final String BLACKLISTED_TAG = "blacklisted";
|
public static final String POSSIBLY_OFFENSIVE_TAG = "possibly_offensive";
|
||||||
|
public static final String TRUE_VALUE = "true";
|
||||||
|
|
||||||
public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
|
public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
|
||||||
final StringBuilder builder = new StringBuilder();
|
final StringBuilder builder = new StringBuilder();
|
||||||
|
@ -61,13 +62,13 @@ public class CombinedFormatUtils {
|
||||||
builder.append(",");
|
builder.append(",");
|
||||||
builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
|
builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
|
||||||
if (wordProperty.mIsBeginningOfSentence) {
|
if (wordProperty.mIsBeginningOfSentence) {
|
||||||
builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
|
builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=" + TRUE_VALUE);
|
||||||
}
|
}
|
||||||
if (wordProperty.mIsNotAWord) {
|
if (wordProperty.mIsNotAWord) {
|
||||||
builder.append("," + NOT_A_WORD_TAG + "=true");
|
builder.append("," + NOT_A_WORD_TAG + "=" + TRUE_VALUE);
|
||||||
}
|
}
|
||||||
if (wordProperty.mIsPossiblyOffensive) {
|
if (wordProperty.mIsPossiblyOffensive) {
|
||||||
builder.append("," + BLACKLISTED_TAG + "=true");
|
builder.append("," + POSSIBLY_OFFENSIVE_TAG + "=" + TRUE_VALUE);
|
||||||
}
|
}
|
||||||
builder.append("\n");
|
builder.append("\n");
|
||||||
if (wordProperty.mHasShortcuts) {
|
if (wordProperty.mHasShortcuts) {
|
||||||
|
@ -111,4 +112,8 @@ public class CombinedFormatUtils {
|
||||||
}
|
}
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isLiteralTrue(final String value) {
|
||||||
|
return TRUE_VALUE.equalsIgnoreCase(value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,9 +134,11 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
// same so we use them for both here.
|
// same so we use them for both here.
|
||||||
switch (mDictFormatVersion) {
|
switch (mDictFormatVersion) {
|
||||||
case FormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
return FormatUtils::VERSION_2;
|
|
||||||
case FormatUtils::VERSION_201:
|
case FormatUtils::VERSION_201:
|
||||||
return FormatUtils::VERSION_201;
|
AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
|
||||||
|
return FormatUtils::UNKNOWN_VERSION;
|
||||||
|
case FormatUtils::VERSION_202:
|
||||||
|
return FormatUtils::VERSION_202;
|
||||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||||
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
|
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
|
||||||
case FormatUtils::VERSION_4:
|
case FormatUtils::VERSION_4:
|
||||||
|
|
|
@ -111,7 +111,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case FormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
case FormatUtils::VERSION_201:
|
case FormatUtils::VERSION_201:
|
||||||
// Version 2 or 201 dictionary writing is not supported.
|
case FormatUtils::VERSION_202:
|
||||||
|
// None of the static dictionaries (v2x) support writing
|
||||||
return false;
|
return false;
|
||||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||||
case FormatUtils::VERSION_4:
|
case FormatUtils::VERSION_4:
|
||||||
|
|
|
@ -140,7 +140,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
|
||||||
|
|
||||||
const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
|
const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
|
||||||
const PtNodeParams &ptNodeParams) const {
|
const PtNodeParams &ptNodeParams) const {
|
||||||
return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
|
return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
|
||||||
ptNodeParams.getProbability() == 0);
|
ptNodeParams.getProbability() == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,7 +164,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
|
||||||
}
|
}
|
||||||
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
|
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
|
||||||
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
|
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
|
||||||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (prevWordIds.empty()) {
|
if (prevWordIds.empty()) {
|
||||||
|
|
|
@ -115,7 +115,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
|
||||||
switch (formatVersion) {
|
switch (formatVersion) {
|
||||||
case FormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
case FormatUtils::VERSION_201:
|
case FormatUtils::VERSION_201:
|
||||||
AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
|
case FormatUtils::VERSION_202:
|
||||||
|
AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path);
|
||||||
break;
|
break;
|
||||||
case FormatUtils::VERSION_4: {
|
case FormatUtils::VERSION_4: {
|
||||||
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
|
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
|
||||||
|
@ -177,6 +178,9 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
|
||||||
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
|
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
|
||||||
case FormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
case FormatUtils::VERSION_201:
|
case FormatUtils::VERSION_201:
|
||||||
|
AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
|
||||||
|
break;
|
||||||
|
case FormatUtils::VERSION_202:
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
||||||
new PatriciaTriePolicy(std::move(mmappedBuffer)));
|
new PatriciaTriePolicy(std::move(mmappedBuffer)));
|
||||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||||
|
|
|
@ -144,17 +144,6 @@ class PtNodeParams {
|
||||||
return PatriciaTrieReadingUtils::isTerminal(mFlags);
|
return PatriciaTrieReadingUtils::isTerminal(mFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isBlacklisted() const {
|
|
||||||
// Note: this method will be removed in the next change.
|
|
||||||
// It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
|
|
||||||
// * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
|
|
||||||
// when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
|
|
||||||
// * getWordAttributes will be changed to always return blacklisted=false and
|
|
||||||
// isPossiblyOffensive according to the function below (instead of the current
|
|
||||||
// behaviour of checking if the probability is zero)
|
|
||||||
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isPossiblyOffensive() const {
|
AK_FORCE_INLINE bool isPossiblyOffensive() const {
|
||||||
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
|
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -317,8 +316,8 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(
|
||||||
|
|
||||||
const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
|
const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
|
||||||
const PtNodeParams &ptNodeParams) const {
|
const PtNodeParams &ptNodeParams) const {
|
||||||
return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
|
return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
|
||||||
ptNodeParams.getProbability() == 0);
|
ptNodeParams.isPossiblyOffensive());
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
|
@ -345,10 +344,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
|
||||||
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
|
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
|
||||||
const PtNodeParams ptNodeParams =
|
const PtNodeParams ptNodeParams =
|
||||||
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
||||||
if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
|
if (ptNodeParams.isNotAWord()) {
|
||||||
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
// If this is not a word, it should behave as having no probability outside of the
|
||||||
// having no probability outside of the suggestion process (where it should be used
|
// suggestion process (where it should be used for shortcuts).
|
||||||
// for shortcuts).
|
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (!prevWordIds.empty()) {
|
if (!prevWordIds.empty()) {
|
||||||
|
|
|
@ -28,9 +28,11 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
|
||||||
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
|
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
|
||||||
switch (formatVersion) {
|
switch (formatVersion) {
|
||||||
case VERSION_2:
|
case VERSION_2:
|
||||||
return VERSION_2;
|
|
||||||
case VERSION_201:
|
case VERSION_201:
|
||||||
return VERSION_201;
|
AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
|
||||||
|
return UNKNOWN_VERSION;
|
||||||
|
case VERSION_202:
|
||||||
|
return VERSION_202;
|
||||||
case VERSION_4_ONLY_FOR_TESTING:
|
case VERSION_4_ONLY_FOR_TESTING:
|
||||||
return VERSION_4_ONLY_FOR_TESTING;
|
return VERSION_4_ONLY_FOR_TESTING;
|
||||||
case VERSION_4:
|
case VERSION_4:
|
||||||
|
|
|
@ -31,8 +31,12 @@ class FormatUtils {
|
||||||
public:
|
public:
|
||||||
enum FORMAT_VERSION {
|
enum FORMAT_VERSION {
|
||||||
// These MUST have the same values as the relevant constants in FormatSpec.java.
|
// These MUST have the same values as the relevant constants in FormatSpec.java.
|
||||||
|
// TODO: Remove VERSION_2 and VERSION_201 when we:
|
||||||
|
// * Confirm that old versions of LatinIME download old-format dictionaries
|
||||||
|
// * We no longer need the corresponding constants on the Java side for dicttool
|
||||||
VERSION_2 = 2,
|
VERSION_2 = 2,
|
||||||
VERSION_201 = 201,
|
VERSION_201 = 201,
|
||||||
|
VERSION_202 = 202,
|
||||||
VERSION_4_ONLY_FOR_TESTING = 399,
|
VERSION_4_ONLY_FOR_TESTING = 399,
|
||||||
VERSION_4 = 402,
|
VERSION_4 = 402,
|
||||||
VERSION_4_DEV = 403,
|
VERSION_4_DEV = 403,
|
||||||
|
|
|
@ -178,7 +178,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
|
||||||
throw new IOException("Cannot read the dictionary header.");
|
throw new IOException("Cannot read the dictionary header.");
|
||||||
}
|
}
|
||||||
if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
|
if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
|
||||||
header.mFormatOptions.mVersion != FormatSpec.VERSION201) {
|
header.mFormatOptions.mVersion != FormatSpec.VERSION201 &&
|
||||||
|
header.mFormatOptions.mVersion != FormatSpec.VERSION202) {
|
||||||
throw new UnsupportedFormatException("File header has a wrong version : "
|
throw new UnsupportedFormatException("File header has a wrong version : "
|
||||||
+ header.mFormatOptions.mVersion);
|
+ header.mFormatOptions.mVersion);
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,7 +124,8 @@ public class Ver2DictEncoder implements DictEncoder {
|
||||||
@Override
|
@Override
|
||||||
public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
|
public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
if (formatOptions.mVersion > FormatSpec.VERSION201) {
|
// We no longer support anything but the latest version of v2.
|
||||||
|
if (formatOptions.mVersion != FormatSpec.VERSION202) {
|
||||||
throw new UnsupportedFormatException(
|
throw new UnsupportedFormatException(
|
||||||
"The given format options has wrong version number : "
|
"The given format options has wrong version number : "
|
||||||
+ formatOptions.mVersion);
|
+ formatOptions.mVersion);
|
||||||
|
|
|
@ -98,6 +98,7 @@ public class CombinedInputOutput {
|
||||||
String word = null;
|
String word = null;
|
||||||
ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
|
ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
|
||||||
boolean isNotAWord = false;
|
boolean isNotAWord = false;
|
||||||
|
boolean isPossiblyOffensive = false;
|
||||||
ArrayList<WeightedString> bigrams = new ArrayList<>();
|
ArrayList<WeightedString> bigrams = new ArrayList<>();
|
||||||
ArrayList<WeightedString> shortcuts = new ArrayList<>();
|
ArrayList<WeightedString> shortcuts = new ArrayList<>();
|
||||||
while (null != (line = reader.readLine())) {
|
while (null != (line = reader.readLine())) {
|
||||||
|
@ -106,7 +107,7 @@ public class CombinedInputOutput {
|
||||||
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
|
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
|
||||||
if (null != word) {
|
if (null != word) {
|
||||||
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
|
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
|
||||||
isNotAWord, false /* isPossiblyOffensive */);
|
isNotAWord, isPossiblyOffensive);
|
||||||
for (WeightedString s : bigrams) {
|
for (WeightedString s : bigrams) {
|
||||||
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
|
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
|
||||||
}
|
}
|
||||||
|
@ -114,27 +115,37 @@ public class CombinedInputOutput {
|
||||||
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
|
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
|
||||||
if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
|
if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
|
||||||
isNotAWord = false;
|
isNotAWord = false;
|
||||||
|
isPossiblyOffensive = false;
|
||||||
for (String param : args) {
|
for (String param : args) {
|
||||||
final String params[] = param.split("=", 2);
|
final String params[] = param.split("=", 2);
|
||||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||||
if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
|
switch (params[0]) {
|
||||||
word = params[1];
|
case CombinedFormatUtils.WORD_TAG:
|
||||||
} else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
|
word = params[1];
|
||||||
probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
|
break;
|
||||||
probabilityInfo.mTimestamp, probabilityInfo.mLevel,
|
case CombinedFormatUtils.PROBABILITY_TAG:
|
||||||
probabilityInfo.mCount);
|
probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
|
||||||
} else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
|
probabilityInfo.mTimestamp, probabilityInfo.mLevel,
|
||||||
final String[] historicalInfoParams =
|
probabilityInfo.mCount);
|
||||||
params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
|
break;
|
||||||
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
|
case CombinedFormatUtils.HISTORICAL_INFO_TAG:
|
||||||
throw new RuntimeException("Wrong format (historical info) : " + line);
|
final String[] historicalInfoParams = params[1].split(
|
||||||
}
|
CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
|
||||||
probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
|
if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
|
||||||
Integer.parseInt(historicalInfoParams[0]),
|
throw new RuntimeException("Wrong format (historical info) : "
|
||||||
Integer.parseInt(historicalInfoParams[1]),
|
+ line);
|
||||||
Integer.parseInt(historicalInfoParams[2]));
|
}
|
||||||
} else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
|
probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
|
||||||
isNotAWord = "true".equals(params[1]);
|
Integer.parseInt(historicalInfoParams[0]),
|
||||||
|
Integer.parseInt(historicalInfoParams[1]),
|
||||||
|
Integer.parseInt(historicalInfoParams[2]));
|
||||||
|
break;
|
||||||
|
case CombinedFormatUtils.NOT_A_WORD_TAG:
|
||||||
|
isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]);
|
||||||
|
break;
|
||||||
|
case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG:
|
||||||
|
isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
|
} else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
|
||||||
|
@ -190,7 +201,7 @@ public class CombinedInputOutput {
|
||||||
}
|
}
|
||||||
if (null != word) {
|
if (null != word) {
|
||||||
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
|
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
|
||||||
false /* isPossiblyOffensive */);
|
isPossiblyOffensive);
|
||||||
for (WeightedString s : bigrams) {
|
for (WeightedString s : bigrams) {
|
||||||
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
|
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,7 +120,7 @@ public class DictionaryMaker {
|
||||||
String inputCombined = null;
|
String inputCombined = null;
|
||||||
String outputBinary = null;
|
String outputBinary = null;
|
||||||
String outputCombined = null;
|
String outputCombined = null;
|
||||||
int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
|
int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202.
|
||||||
// Don't use code point table by default.
|
// Don't use code point table by default.
|
||||||
int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
|
int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue