Merge "Renaming "blacklist" flag to "possibly offensive""

This commit is contained in:
Adrian Velicu 2014-10-21 07:39:18 +00:00 committed by Android (Google) Code Review
commit c51b9b5b3f
40 changed files with 246 additions and 185 deletions

View file

@ -69,7 +69,7 @@ public final class BinaryDictionary extends Dictionary {
// Format to get unigram flags from native side via getWordPropertyNative().
private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
@ -195,7 +195,7 @@ public final class BinaryDictionary extends Dictionary {
float[] inOutWeightOfLangModelVsSpatialModel);
private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
boolean isNotAWord, boolean isBlacklisted, int timestamp);
boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
private static native boolean removeUnigramEntryNative(long dict, int[] word);
private static native boolean addNgramEntryNative(long dict,
int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
@ -402,7 +402,7 @@ public final class BinaryDictionary extends Dictionary {
outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
return new WordProperty(codePoints,
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
@ -439,7 +439,7 @@ public final class BinaryDictionary extends Dictionary {
public boolean addUnigramEntry(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability,
final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
final boolean isPossiblyOffensive, final int timestamp) {
if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return false;
}
@ -447,7 +447,8 @@ public final class BinaryDictionary extends Dictionary {
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null;
if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) {
shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive,
timestamp)) {
return false;
}
mHasUpdated = true;

View file

@ -137,7 +137,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
}
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, FREQUENCY_FOR_CONTACTS, null /* shortcut */,
0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */,
0 /* shortcutFreq */, false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
}
}
@ -238,7 +238,8 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, FREQUENCY_FOR_CONTACTS,
null /* shortcut */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (!ngramContext.isValid() && mUseFirstLastBigrams) {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addNgramEntryLocked(ngramContext, word, FREQUENCY_FOR_CONTACTS_BIGRAM,

View file

@ -809,7 +809,7 @@ public class DictionaryFacilitator {
contextualDict.addUnigramEntryWithCheckingDistracter(
subPhraseStr, probability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY /* shortcutFreq */,
false /* isNotAWord */, false /* isBlacklisted */,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP,
DistracterFilter.EMPTY_DISTRACTER_FILTER);
contextualDict.addNgramEntry(ngramContext, subPhraseStr,
@ -819,7 +819,7 @@ public class DictionaryFacilitator {
contextualDict.addUnigramEntryWithCheckingDistracter(
phrase[i], probability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY /* shortcutFreq */,
false /* isNotAWord */, false /* isBlacklisted */,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP,
DistracterFilter.EMPTY_DISTRACTER_FILTER);
contextualDict.addNgramEntry(ngramContext, phrase[i],

View file

@ -316,22 +316,22 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
*/
public void addUnigramEntryWithCheckingDistracter(final String word, final int frequency,
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp,
final boolean isPossiblyOffensive, final int timestamp,
@Nonnull final DistracterFilter distracterFilter) {
updateDictionaryWithWriteLockIfWordIsNotADistracter(new Runnable() {
@Override
public void run() {
addUnigramLocked(word, frequency, shortcutTarget, shortcutFreq,
isNotAWord, isBlacklisted, timestamp);
isNotAWord, isPossiblyOffensive, timestamp);
}
}, word, distracterFilter);
}
protected void addUnigramLocked(final String word, final int frequency,
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
final boolean isPossiblyOffensive, final int timestamp) {
if (!mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp)) {
false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, timestamp)) {
Log.e(TAG, "Cannot add unigram entry. word: " + word);
}
}

View file

@ -257,12 +257,14 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */,
0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) {
runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(shortcut, adjustedFrequency, word,
USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
}
}
cursor.moveToNext();

View file

@ -93,7 +93,7 @@ public final class FormatSpec {
* s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
* | is possibly offensive ? 1 bit, 1 = yes, 0 = no : FLAG_IS_POSSIBLY_OFFENSIVE
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
@ -197,7 +197,7 @@ public final class FormatSpec {
static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
static final int FLAG_HAS_BIGRAMS = 0x04;
static final int FLAG_IS_NOT_A_WORD = 0x02;
static final int FLAG_IS_BLACKLISTED = 0x01;
static final int FLAG_IS_POSSIBLY_OFFENSIVE = 0x01;
static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80;
static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40;

View file

@ -41,7 +41,7 @@ public final class WordProperty implements Comparable<WordProperty> {
// TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
public final boolean mIsPossiblyOffensive;
public final boolean mHasShortcuts;
public final boolean mHasNgrams;
@ -52,7 +52,7 @@ public final class WordProperty implements Comparable<WordProperty> {
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets,
@Nullable final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) {
final boolean isNotAWord, final boolean isPossiblyOffensive) {
mWord = word;
mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets;
@ -69,7 +69,7 @@ public final class WordProperty implements Comparable<WordProperty> {
}
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mIsPossiblyOffensive = isPossiblyOffensive;
mHasNgrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
}
@ -85,7 +85,7 @@ public final class WordProperty implements Comparable<WordProperty> {
// Construct word property using information from native code.
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public WordProperty(final int[] codePoints, final boolean isNotAWord,
final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts,
final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
final boolean isBeginningOfSentence, final int[] probabilityInfo,
final ArrayList<int[][]> ngramPrevWordsArray,
final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
@ -98,7 +98,7 @@ public final class WordProperty implements Comparable<WordProperty> {
final ArrayList<NgramProperty> ngrams = new ArrayList<>();
mIsBeginningOfSentence = isBeginningOfSentence;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted;
mIsPossiblyOffensive = isPossiblyOffensive;
mHasShortcuts = hasShortcuts;
mHasNgrams = hasBigram;
@ -150,7 +150,7 @@ public final class WordProperty implements Comparable<WordProperty> {
word.mShortcutTargets,
word.mNgrams,
word.mIsNotAWord,
word.mIsBlacklistEntry
word.mIsPossiblyOffensive
});
}
@ -180,7 +180,7 @@ public final class WordProperty implements Comparable<WordProperty> {
WordProperty w = (WordProperty)o;
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets) && equals(mNgrams, w.mNgrams)
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
&& mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
}

View file

@ -63,7 +63,7 @@ public class CombinedFormatUtils {
if (wordProperty.mIsNotAWord) {
builder.append("," + NOT_A_WORD_TAG + "=true");
}
if (wordProperty.mIsBlacklistEntry) {
if (wordProperty.mIsPossiblyOffensive) {
builder.append("," + BLACKLISTED_TAG + "=true");
}
builder.append("\n");

View file

@ -54,7 +54,7 @@ public final class LanguageModelParam {
public final int mBigramProbability;
public final int mShortcutProbability;
public final boolean mIsNotAWord;
public final boolean mIsBlacklisted;
public final boolean mIsPossiblyOffensive;
// Time stamp in seconds.
public final int mTimestamp;
@ -78,7 +78,7 @@ public final class LanguageModelParam {
mBigramProbability = bigramProbability;
mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
mIsNotAWord = false;
mIsBlacklisted = false;
mIsPossiblyOffensive = false;
mTimestamp = timestamp;
}

View file

@ -358,7 +358,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
@ -377,8 +377,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
std::move(shortcuts));
isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
1 /* count */), std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
@ -480,8 +480,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isBlacklistedFieldId =
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
jfieldID isPossiblyOffensiveFieldId =
env->GetFieldID(languageModelParamClass, "mIsPossiblyOffensive", "Z");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
@ -504,7 +504,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jboolean isPossiblyOffensive = env->GetBooleanField(languageModelParam,
isPossiblyOffensiveFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
@ -519,7 +520,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability,
isPossiblyOffensive, unigramProbability,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);

View file

@ -49,21 +49,44 @@ class UnigramProperty {
};
UnigramProperty()
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false),
mIsBlacklisted(false), mIsPossiblyOffensive(false), mProbability(NOT_A_PROBABILITY),
mHistoricalInfo(), mShortcuts() {}
// In contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
const std::vector<ShortcutProperty> &&shortcuts)
const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mIsNotAWord(isNotAWord), mIsBlacklisted(false),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
// Without shortcuts.
// Without shortcuts, in contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mIsNotAWord(isNotAWord), mIsBlacklisted(false),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {}
// In contexts which DO support the Blacklisted flag (v403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
// Without shortcuts, in contexts which DO support the Blacklisted flag (v403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {}
bool representsBeginningOfSentence() const {
@ -74,13 +97,12 @@ class UnigramProperty {
return mIsNotAWord;
}
bool isBlacklisted() const {
return mIsBlacklisted;
bool isPossiblyOffensive() const {
return mIsPossiblyOffensive;
}
bool isPossiblyOffensive() const {
// TODO: Have dedicated flag.
return mProbability == 0;
bool isBlacklisted() const {
return mIsBlacklisted;
}
bool hasShortcuts() const {
@ -106,6 +128,7 @@ class UnigramProperty {
const bool mRepresentsBeginningOfSentence;
const bool mIsNotAWord;
const bool mIsBlacklisted;
const bool mIsPossiblyOffensive;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
const std::vector<ShortcutProperty> mShortcuts;

View file

@ -28,7 +28,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isPossiblyOffensive(),
!mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);

View file

@ -43,6 +43,14 @@ class WordAttributes {
return mIsNotAWord;
}
// Whether or not a word is possibly offensive.
// * Static dictionaries <v202, as well as dynamic dictionaries <v403, will set this based on
// whether or not the probability of the word is zero.
// * Static dictionaries >=v203 will set this based on the IS_POSSIBLY_OFFENSIVE PtNode flag.
// * Dynamic dictionaries >=v403 will set this based on the IS_POSSIBLY_OFFENSIVE language model
// flag (the PtNode flag IS_BLACKLISTED is ignored and kept as zero)
//
// See the ::getWordAttributes function for each of these dictionary policies for more details.
bool isPossiblyOffensive() const {
return mIsPossiblyOffensive;
}

View file

@ -245,7 +245,7 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams.isPossiblyOffensive(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
@ -316,7 +316,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
if (!ptNodeParams->hasShortcutTargets()) {
// Update has shortcut targets flag.
return updatePtNodeFlags(ptNodeParams->getHeadPos(),
ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
ptNodeParams->isPossiblyOffensive(), ptNodeParams->isNotAWord(),
ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
@ -330,7 +330,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isPossiblyOffensive(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
@ -386,8 +386,9 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
return updatePtNodeFlags(nodePos, ptNodeParams->isPossiblyOffensive(),
ptNodeParams->isNotAWord(), isTerminal, ptNodeParams->hasShortcutTargets(),
ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}

View file

@ -608,8 +608,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
*historicalInfo, std::move(shortcuts));
ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -146,7 +146,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -180,8 +180,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
return false;
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability()));
unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, parentPtNodePos, ptNodeCodePoints,
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) {
return false;
@ -214,7 +215,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
false /* isNotAWord */, false /* isPossiblyOffensive */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
@ -222,7 +223,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
firstPtNodeCodePoints, unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -240,7 +241,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isPossiblyOffensive(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
reallocatingPtNodeParams->getProbability()));
@ -249,7 +250,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, firstPartOfReallocatedPtNodePos,
newPtNodeCodePoints.skip(overlappingCodePointCount),
unigramProperty->getProbability()));
@ -276,20 +277,20 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos,
const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
}
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos,
const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePoints, probability);

View file

@ -85,12 +85,12 @@ class DynamicPtUpdatingHelper {
const CodePointArrayView newPtNodeCodePoints);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const bool isNotAWord, const bool isPossiblyOffensive, const bool isTerminal,
const int parentPos, const CodePointArrayView codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
const bool isTerminal, const int parentPos, const CodePointArrayView codePoints,
const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord,
const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */

View file

@ -41,8 +41,8 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
// Flag for non-words (typically, shortcut only entries)
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
// Flag for blacklist
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
// Flag for possibly offensive words
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_POSSIBLY_OFFENSIVE = 0x01;
/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {

View file

@ -54,8 +54,8 @@ class PatriciaTrieReadingUtils {
/**
* Node Flags
*/
static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
return (flags & FLAG_IS_BLACKLISTED) != 0;
static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
}
static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
@ -82,12 +82,12 @@ class PatriciaTrieReadingUtils {
return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
}
static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
const bool hasBigrams, const bool hasMultipleChars,
const int childrenPositionFieldSize) {
NodeFlags nodeFlags = 0;
nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
@ -127,7 +127,7 @@ class PatriciaTrieReadingUtils {
static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
static const NodeFlags FLAG_HAS_BIGRAMS;
static const NodeFlags FLAG_IS_NOT_A_WORD;
static const NodeFlags FLAG_IS_BLACKLISTED;
static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
};
} // namespace latinime
#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */

View file

@ -145,7 +145,18 @@ class PtNodeParams {
}
AK_FORCE_INLINE bool isBlacklisted() const {
return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
// Note: this method will be removed in the next change.
// It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
// * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
// when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
// * getWordAttributes will be changed to always return blacklisted=false and
// isPossiblyOffensive according to the function below (instead of the current
// behaviour of checking if the probability is zero)
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
AK_FORCE_INLINE bool isPossiblyOffensive() const {
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
AK_FORCE_INLINE bool isNotAWord() const {

View file

@ -476,8 +476,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
}
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
HistoricalInfo(), std::move(shortcuts));
ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -342,7 +342,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, const bo
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(false /* isNotAWord */,
false /* isBlacklisted */, isTerminal, false /* hasShortcutTargets */,
false /* isPossiblyOffensive */, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE);
if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);

View file

@ -299,7 +299,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
}
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
false /* isBlacklisted */, false /* isPossiblyOffensive */,
MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -375,8 +376,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
if (wordId == NOT_A_WORD_ID) {
// The word is not in the dictionary.
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY,
HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
false /* isNotAWord */, false /* isBlacklisted */, false /* isPossiblyOffensive */,
NOT_A_PROBABILITY, HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */,
0 /* count */));
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
AKLOGE("Cannot add unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
@ -391,7 +393,7 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
&& ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */,
true /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY,
true /* isNotAWord */, false /* isPossiblyOffensive */, NOT_A_PROBABILITY,
HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
@ -529,7 +531,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
probabilityEntry.isPossiblyOffensive(), probabilityEntry.getProbability(),
*historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -684,8 +684,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
mCurrentTime);
true /* isBeginningOfSentence */, true /* isNotAWord */,
false /* isPossiblyOffensive */, mCurrentTime);
final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));

View file

@ -200,7 +200,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
10 /* shortcutProbability */, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
@ -221,7 +221,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -971,11 +972,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int unigramProbability = random.nextInt(0xFF);
final boolean isNotAWord = random.nextBoolean();
final boolean isBlacklisted = random.nextBoolean();
final boolean isPossiblyOffensive = random.nextBoolean();
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
@ -987,7 +988,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(word, wordProperty.mWord);
assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
assertEquals(false, wordProperty.mHasNgrams);
assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
@ -1142,7 +1143,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
false /* isBeginningOfSentence */);
assertEquals(1, wordProperty.mShortcutTargets.size());
@ -1151,7 +1152,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa",
false /* isBeginningOfSentence */);
assertEquals(1, wordProperty.mShortcutTargets.size());
@ -1160,7 +1161,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
false /* isPossiblyOffensive */, 0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
@ -1223,7 +1224,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = unigramProbabilities.get(word);
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
false /* isPossiblyOffensive */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@ -1255,6 +1256,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
}
public void testPossiblyOffensiveAttributeMaintained() {
final BinaryDictionary binaryDictionary =
getEmptyBinaryDictionary(FormatSpec.VERSION4_DEV);
binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY,
false, true, true, 0);
WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
public void testDictMigration() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
@ -1271,10 +1281,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
false /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE,
"aaa", bigramProbability, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
@ -1298,7 +1308,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
wordProperty = binaryDictionary.getWordProperty("ddd",
false /* isBeginningOfSentence */);
assertTrue(wordProperty.mIsBlacklistEntry);
assertTrue(wordProperty.mIsPossiblyOffensive);
assertTrue(wordProperty.mIsNotAWord);
}

View file

@ -35,16 +35,20 @@ public class FusionDictionaryTests extends AndroidTestCase {
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String,String>()));
dict.add("abc", new ProbabilityInfo(10), null, false /* isNotAWord */);
dict.add("abc", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
dict.add("aa", new ProbabilityInfo(10), null, false /* isNotAWord */);
dict.add("aa", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
dict.add("babcd", new ProbabilityInfo(10), null, false /* isNotAWord */);
dict.add("bacde", new ProbabilityInfo(10), null, false /* isNotAWord */);
dict.add("babcd", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("bacde", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));

View file

@ -149,7 +149,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
}
dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */,
false /* isPossiblyOffensive */);
}
}

View file

@ -572,12 +572,12 @@ public class BinaryDictEncoderUtils {
* @param hasShortcuts whether the PtNode has shortcuts.
* @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the PtNode is not a word.
* @param isBlackListEntry whether the PtNode is a blacklist entry.
* @param isPossiblyOffensive whether the PtNode is a possibly offensive entry.
* @return the flags
*/
static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isBlackListEntry) {
final boolean isNotAWord, final boolean isPossiblyOffensive) {
byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
@ -600,7 +600,7 @@ public class BinaryDictEncoderUtils {
if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
if (isBlackListEntry) flags |= FormatSpec.FLAG_IS_BLACKLISTED;
if (isPossiblyOffensive) flags |= FormatSpec.FLAG_IS_POSSIBLY_OFFENSIVE;
return flags;
}
@ -609,7 +609,7 @@ public class BinaryDictEncoderUtils {
getByteSize(childrenOffset),
node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
node.mBigrams != null && !node.mBigrams.isEmpty(),
node.mIsNotAWord, node.mIsBlacklistEntry);
node.mIsNotAWord, node.mIsPossiblyOffensive);
}
/**

View file

@ -89,7 +89,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal.
PtNodeArray mChildren;
boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry;
boolean mIsPossiblyOffensive;
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
// generation. Before and After always hold the same value except during dictionary
// address compression, where the update process needs to know about both values at the
@ -102,7 +102,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
final boolean isNotAWord, final boolean isBlacklistEntry) {
final boolean isNotAWord, final boolean isPossiblyOffensive) {
mChars = chars;
mProbabilityInfo = probabilityInfo;
mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability;
@ -110,12 +110,12 @@ public final class FusionDictionary implements Iterable<WordProperty> {
mBigrams = bigrams;
mChildren = null;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mIsPossiblyOffensive = isPossiblyOffensive;
}
public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
final boolean isNotAWord, final boolean isBlacklistEntry,
final boolean isNotAWord, final boolean isPossiblyOffensive,
final PtNodeArray children) {
mChars = chars;
mProbabilityInfo = probabilityInfo;
@ -123,7 +123,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
mBigrams = bigrams;
mChildren = children;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mIsPossiblyOffensive = isPossiblyOffensive;
}
public void addChild(PtNode n) {
@ -153,8 +153,8 @@ public final class FusionDictionary implements Iterable<WordProperty> {
return mIsNotAWord;
}
public boolean getIsBlacklistEntry() {
return mIsBlacklistEntry;
public boolean getIsPossiblyOffensive() {
return mIsPossiblyOffensive;
}
public ArrayList<WeightedString> getShortcutTargets() {
@ -238,7 +238,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
private void update(final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) {
final boolean isNotAWord, final boolean isPossiblyOffensive) {
mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo);
if (shortcutTargets != null) {
if (mShortcutTargets == null) {
@ -275,7 +275,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
}
}
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mIsPossiblyOffensive = isPossiblyOffensive;
}
}
@ -323,24 +323,12 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* @param probabilityInfo probability information of the word.
* @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
* @param isPossiblyOffensive true if this word is possibly offensive
*/
public void add(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord,
false /* isBlacklistEntry */);
}
/**
* Helper method to add a blacklist entry as a string.
*
* @param word the word to add as a blacklist entry.
* @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
*/
public void addBlacklistEntry(final String word,
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
add(getCodePoints(word), new ProbabilityInfo(0), shortcutTargets, isNotAWord,
true /* isBlacklistEntry */);
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord,
final boolean isPossiblyOffensive) {
add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, isPossiblyOffensive);
}
/**
@ -375,7 +363,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
if (ptNode1 == null) {
add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */,
false /* isBlacklistEntry */);
false /* isPossiblyOffensive */);
// The PtNode for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been
// a cutting point until now. In this case, we need to refresh ptNode.
@ -397,11 +385,11 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* @param probabilityInfo the probability information of the word.
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
* @param isBlacklistEntry true if this is a blacklisted word, false otherwise
* @param isPossiblyOffensive true if this word is possibly offensive
*/
private void add(final int[] word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets,
final boolean isNotAWord, final boolean isBlacklistEntry) {
final boolean isNotAWord, final boolean isPossiblyOffensive) {
assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY);
if (word.length >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length);
@ -431,7 +419,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord,
isBlacklistEntry);
isPossiblyOffensive);
currentNodeArray.mData.add(insertionIndex, newPtNode);
if (DBG) checkStack(currentNodeArray);
} else {
@ -442,14 +430,14 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// should end already exists as is. Since the old PtNode was not a terminal,
// make it one by filling in its frequency and other attributes
currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord,
isBlacklistEntry);
isPossiblyOffensive);
} else {
// The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this.
final PtNode newNode = new PtNode(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry);
isNotAWord, isPossiblyOffensive);
currentPtNode.mChildren = new PtNodeArray();
currentPtNode.mChildren.mData.add(newNode);
}
@ -459,7 +447,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// new shortcuts to the existing shortcut list if it already exists.
currentPtNode.update(probabilityInfo, shortcutTargets, null,
currentPtNode.mIsNotAWord && isNotAWord,
currentPtNode.mIsBlacklistEntry || isBlacklistEntry);
currentPtNode.mIsPossiblyOffensive || isPossiblyOffensive);
} else {
// Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails.
@ -468,7 +456,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex,
currentPtNode.mChars.length), currentPtNode.mShortcutTargets,
currentPtNode.mBigrams, currentPtNode.mProbabilityInfo,
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry,
currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive,
currentPtNode.mChildren);
newChildren.mData.add(newOldWord);
@ -477,17 +465,17 @@ public final class FusionDictionary implements Iterable<WordProperty> {
newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry, newChildren);
isNotAWord, isPossiblyOffensive, newChildren);
} else {
newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
null /* shortcutTargets */, null /* bigrams */,
null /* probabilityInfo */, false /* isNotAWord */,
false /* isBlacklistEntry */, newChildren);
false /* isPossiblyOffensive */, newChildren);
final PtNode newWord = new PtNode(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry);
isNotAWord, isPossiblyOffensive);
final int addIndex = word[charIndex + differentCharIndex]
> currentPtNode.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@ -549,7 +537,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final ArrayList<PtNode> data = nodeArray.mData;
final PtNode reference = new PtNode(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */,
false /* isNotAWord */, false /* isBlacklistEntry */);
false /* isNotAWord */, false /* isPossiblyOffensive */);
int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@ -686,7 +674,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
return new WordProperty(mCurrentString.toString(),
currentPtNode.mProbabilityInfo,
currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry);
currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive);
}
} else {
mPositions.removeLast();

View file

@ -283,13 +283,9 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
// Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mIsBlacklistEntry) {
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
wordProperty.mIsNotAWord);
} else {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
}
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord,
wordProperty.mIsPossiblyOffensive);
}
// Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {

View file

@ -85,7 +85,8 @@ public class Ver2DictEncoderTests extends AndroidTestCase {
}
}
dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */,
false /* isPossiblyOffensive */);
}
}
}

View file

@ -88,13 +88,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
// Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mIsBlacklistEntry) {
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
wordProperty.mIsNotAWord);
} else {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
}
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord,
wordProperty.mIsPossiblyOffensive);
}
// Insert bigrams into the fusion dictionary.
// TODO: Support ngrams.

View file

@ -79,7 +79,7 @@ public class Ver4DictEncoder implements DictEncoder {
if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */,
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) {
wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) {
MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord);
}
} else {
@ -88,7 +88,7 @@ public class Ver4DictEncoder implements DictEncoder {
wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(),
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) {
wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) {
MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord
+ ", shortcutTarget: " + shortcutTarget.mWord);
return;

View file

@ -106,7 +106,7 @@ public class CombinedInputOutput {
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
isNotAWord);
isNotAWord, false /* isPossiblyOffensive */);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
@ -189,7 +189,8 @@ public class CombinedInputOutput {
}
}
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
false /* isPossiblyOffensive */);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}

View file

@ -128,10 +128,10 @@ public class Diff extends Dicttool.Command {
+ word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true;
}
if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
System.out.println("Blacklist: " + word0Property.mWord + " "
+ word0Property.mIsBlacklistEntry + " -> "
+ word1PtNode.getIsBlacklistEntry());
if (word0Property.mIsPossiblyOffensive != word1PtNode.getIsPossiblyOffensive()) {
System.out.println("Possibly-offensive: " + word0Property.mWord + " "
+ word0Property.mIsPossiblyOffensive + " -> "
+ word1PtNode.getIsPossiblyOffensive());
hasDifferences = true;
}
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,

View file

@ -76,8 +76,8 @@ public class Info extends Dicttool.Command {
if (ptNode.getIsNotAWord()) {
System.out.println(" Is not a word");
}
if (ptNode.getIsBlacklistEntry()) {
System.out.println(" Is a blacklist entry");
if (ptNode.getIsPossiblyOffensive()) {
System.out.println(" Is possibly offensive");
}
final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets();
if (null == shortcutTargets || shortcutTargets.isEmpty()) {

View file

@ -90,7 +90,8 @@ public class XmlDictInputOutput {
for (final String shortcutOnly : mShortcutsMap.keySet()) {
if (dict.hasWord(shortcutOnly)) continue;
dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY),
mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
mShortcutsMap.get(shortcutOnly), true /* isNotAWord */,
false /* isPossiblyOffensive */);
}
mDictionary = null;
mShortcutsMap.clear();
@ -138,7 +139,7 @@ public class XmlDictInputOutput {
public void endElement(String uri, String localName, String qName) {
if (WORD == mState) {
mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord),
false /* isNotAWord */);
false /* isNotAWord */, false /* isPossiblyOffensive */);
mState = START;
}
}

View file

@ -54,11 +54,16 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE);
testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID);
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions);
dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */);
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
final File dst = File.createTempFile("testGetRawDict", ".tmp");
dst.deleteOnExit();

View file

@ -33,11 +33,16 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>()));
dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */);
dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
final ArrayList<PtNodeArray> result =
BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
assertEquals(4, result.size());

View file

@ -101,7 +101,8 @@ public class FusionDictionaryTest extends TestCase {
prepare(time);
for (int i = 0; i < sWords.size(); ++i) {
System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i)));
dict.add(sWords.get(i), new ProbabilityInfo(180), null, false);
dict.add(sWords.get(i), new ProbabilityInfo(180), null, false,
false /* isPossiblyOffensive */);
dumpDict(dict);
checkDictionary(dict, sWords, i);
}