Renaming "blacklist" flag to "possibly offensive"

No behaviour changes.
Unified the overloaded FusionDictionary::add method to always take an
isPossiblyOffensive argument.

Bug: 11031090
Change-Id: I5741a023ca1ce842d2cf10d4f6c926b0efabaa78
main
Adrian Velicu 2014-10-14 12:13:11 +09:00
parent 9ba36f29cc
commit 05172bf1a5
40 changed files with 246 additions and 185 deletions

View File

@ -69,7 +69,7 @@ public final class BinaryDictionary extends Dictionary {
// Format to get unigram flags from native side via getWordPropertyNative(). // Format to get unigram flags from native side via getWordPropertyNative().
private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2; private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
@ -195,7 +195,7 @@ public final class BinaryDictionary extends Dictionary {
float[] inOutWeightOfLangModelVsSpatialModel); float[] inOutWeightOfLangModelVsSpatialModel);
private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
boolean isNotAWord, boolean isBlacklisted, int timestamp); boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
private static native boolean removeUnigramEntryNative(long dict, int[] word); private static native boolean removeUnigramEntryNative(long dict, int[] word);
private static native boolean addNgramEntryNative(long dict, private static native boolean addNgramEntryNative(long dict,
int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
@ -402,7 +402,7 @@ public final class BinaryDictionary extends Dictionary {
outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
return new WordProperty(codePoints, return new WordProperty(codePoints,
outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX], outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
@ -439,7 +439,7 @@ public final class BinaryDictionary extends Dictionary {
public boolean addUnigramEntry(final String word, final int probability, public boolean addUnigramEntry(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final String shortcutTarget, final int shortcutProbability,
final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) { final boolean isPossiblyOffensive, final int timestamp) {
if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return false; return false;
} }
@ -447,7 +447,8 @@ public final class BinaryDictionary extends Dictionary {
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null; StringUtils.toCodePointArray(shortcutTarget) : null;
if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) { shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive,
timestamp)) {
return false; return false;
} }
mHasUpdated = true; mHasUpdated = true;

View File

@ -137,7 +137,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
} }
runGCIfRequiredLocked(true /* mindsBlockByGC */); runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, FREQUENCY_FOR_CONTACTS, null /* shortcut */, addUnigramLocked(word, FREQUENCY_FOR_CONTACTS, null /* shortcut */,
0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */, 0 /* shortcutFreq */, false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
} }
} }
@ -238,7 +238,8 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
runGCIfRequiredLocked(true /* mindsBlockByGC */); runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, FREQUENCY_FOR_CONTACTS, addUnigramLocked(word, FREQUENCY_FOR_CONTACTS,
null /* shortcut */, 0 /* shortcutFreq */, false /* isNotAWord */, null /* shortcut */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (!ngramContext.isValid() && mUseFirstLastBigrams) { if (!ngramContext.isValid() && mUseFirstLastBigrams) {
runGCIfRequiredLocked(true /* mindsBlockByGC */); runGCIfRequiredLocked(true /* mindsBlockByGC */);
addNgramEntryLocked(ngramContext, word, FREQUENCY_FOR_CONTACTS_BIGRAM, addNgramEntryLocked(ngramContext, word, FREQUENCY_FOR_CONTACTS_BIGRAM,

View File

@ -809,7 +809,7 @@ public class DictionaryFacilitator {
contextualDict.addUnigramEntryWithCheckingDistracter( contextualDict.addUnigramEntryWithCheckingDistracter(
subPhraseStr, probability, null /* shortcutTarget */, subPhraseStr, probability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY /* shortcutFreq */, Dictionary.NOT_A_PROBABILITY /* shortcutFreq */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP, BinaryDictionary.NOT_A_VALID_TIMESTAMP,
DistracterFilter.EMPTY_DISTRACTER_FILTER); DistracterFilter.EMPTY_DISTRACTER_FILTER);
contextualDict.addNgramEntry(ngramContext, subPhraseStr, contextualDict.addNgramEntry(ngramContext, subPhraseStr,
@ -819,7 +819,7 @@ public class DictionaryFacilitator {
contextualDict.addUnigramEntryWithCheckingDistracter( contextualDict.addUnigramEntryWithCheckingDistracter(
phrase[i], probability, null /* shortcutTarget */, phrase[i], probability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY /* shortcutFreq */, Dictionary.NOT_A_PROBABILITY /* shortcutFreq */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP, BinaryDictionary.NOT_A_VALID_TIMESTAMP,
DistracterFilter.EMPTY_DISTRACTER_FILTER); DistracterFilter.EMPTY_DISTRACTER_FILTER);
contextualDict.addNgramEntry(ngramContext, phrase[i], contextualDict.addNgramEntry(ngramContext, phrase[i],

View File

@ -316,22 +316,22 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
*/ */
public void addUnigramEntryWithCheckingDistracter(final String word, final int frequency, public void addUnigramEntryWithCheckingDistracter(final String word, final int frequency,
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord, final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp, final boolean isPossiblyOffensive, final int timestamp,
@Nonnull final DistracterFilter distracterFilter) { @Nonnull final DistracterFilter distracterFilter) {
updateDictionaryWithWriteLockIfWordIsNotADistracter(new Runnable() { updateDictionaryWithWriteLockIfWordIsNotADistracter(new Runnable() {
@Override @Override
public void run() { public void run() {
addUnigramLocked(word, frequency, shortcutTarget, shortcutFreq, addUnigramLocked(word, frequency, shortcutTarget, shortcutFreq,
isNotAWord, isBlacklisted, timestamp); isNotAWord, isPossiblyOffensive, timestamp);
} }
}, word, distracterFilter); }, word, distracterFilter);
} }
protected void addUnigramLocked(final String word, final int frequency, protected void addUnigramLocked(final String word, final int frequency,
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord, final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) { final boolean isPossiblyOffensive, final int timestamp) {
if (!mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq, if (!mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp)) { false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, timestamp)) {
Log.e(TAG, "Cannot add unigram entry. word: " + word); Log.e(TAG, "Cannot add unigram entry. word: " + word);
} }
} }

View File

@ -257,12 +257,14 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
runGCIfRequiredLocked(true /* mindsBlockByGC */); runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */, addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */,
0 /* shortcutFreq */, false /* isNotAWord */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) { if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) {
runGCIfRequiredLocked(true /* mindsBlockByGC */); runGCIfRequiredLocked(true /* mindsBlockByGC */);
addUnigramLocked(shortcut, adjustedFrequency, word, addUnigramLocked(shortcut, adjustedFrequency, word,
USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */, USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
} }
} }
cursor.moveToNext(); cursor.moveToNext();

View File

@ -93,7 +93,7 @@ public final class FormatSpec {
* s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED * | is possibly offensive ? 1 bit, 1 = yes, 0 = no : FLAG_IS_POSSIBLY_OFFENSIVE
* *
* c | IF FLAG_HAS_MULTIPLE_CHARS * c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers * h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
@ -197,7 +197,7 @@ public final class FormatSpec {
static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
static final int FLAG_HAS_BIGRAMS = 0x04; static final int FLAG_HAS_BIGRAMS = 0x04;
static final int FLAG_IS_NOT_A_WORD = 0x02; static final int FLAG_IS_NOT_A_WORD = 0x02;
static final int FLAG_IS_BLACKLISTED = 0x01; static final int FLAG_IS_POSSIBLY_OFFENSIVE = 0x01;
static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80; static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80;
static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40; static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40;

View File

@ -41,7 +41,7 @@ public final class WordProperty implements Comparable<WordProperty> {
// TODO: Support mIsBeginningOfSentence. // TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence; public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord; public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry; public final boolean mIsPossiblyOffensive;
public final boolean mHasShortcuts; public final boolean mHasShortcuts;
public final boolean mHasNgrams; public final boolean mHasNgrams;
@ -52,7 +52,7 @@ public final class WordProperty implements Comparable<WordProperty> {
public WordProperty(final String word, final ProbabilityInfo probabilityInfo, public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
@Nullable final ArrayList<WeightedString> bigrams, @Nullable final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) { final boolean isNotAWord, final boolean isPossiblyOffensive) {
mWord = word; mWord = word;
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
@ -69,7 +69,7 @@ public final class WordProperty implements Comparable<WordProperty> {
} }
mIsBeginningOfSentence = false; mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsPossiblyOffensive = isPossiblyOffensive;
mHasNgrams = bigrams != null && !bigrams.isEmpty(); mHasNgrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
} }
@ -85,7 +85,7 @@ public final class WordProperty implements Comparable<WordProperty> {
// Construct word property using information from native code. // Construct word property using information from native code.
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
public WordProperty(final int[] codePoints, final boolean isNotAWord, public WordProperty(final int[] codePoints, final boolean isNotAWord,
final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts, final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
final boolean isBeginningOfSentence, final int[] probabilityInfo, final boolean isBeginningOfSentence, final int[] probabilityInfo,
final ArrayList<int[][]> ngramPrevWordsArray, final ArrayList<int[][]> ngramPrevWordsArray,
final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
@ -98,7 +98,7 @@ public final class WordProperty implements Comparable<WordProperty> {
final ArrayList<NgramProperty> ngrams = new ArrayList<>(); final ArrayList<NgramProperty> ngrams = new ArrayList<>();
mIsBeginningOfSentence = isBeginningOfSentence; mIsBeginningOfSentence = isBeginningOfSentence;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted; mIsPossiblyOffensive = isPossiblyOffensive;
mHasShortcuts = hasShortcuts; mHasShortcuts = hasShortcuts;
mHasNgrams = hasBigram; mHasNgrams = hasBigram;
@ -150,7 +150,7 @@ public final class WordProperty implements Comparable<WordProperty> {
word.mShortcutTargets, word.mShortcutTargets,
word.mNgrams, word.mNgrams,
word.mIsNotAWord, word.mIsNotAWord,
word.mIsBlacklistEntry word.mIsPossiblyOffensive
}); });
} }
@ -180,7 +180,7 @@ public final class WordProperty implements Comparable<WordProperty> {
WordProperty w = (WordProperty)o; WordProperty w = (WordProperty)o;
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets) && equals(mNgrams, w.mNgrams) && mShortcutTargets.equals(w.mShortcutTargets) && equals(mNgrams, w.mNgrams)
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams; && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
} }

View File

@ -63,7 +63,7 @@ public class CombinedFormatUtils {
if (wordProperty.mIsNotAWord) { if (wordProperty.mIsNotAWord) {
builder.append("," + NOT_A_WORD_TAG + "=true"); builder.append("," + NOT_A_WORD_TAG + "=true");
} }
if (wordProperty.mIsBlacklistEntry) { if (wordProperty.mIsPossiblyOffensive) {
builder.append("," + BLACKLISTED_TAG + "=true"); builder.append("," + BLACKLISTED_TAG + "=true");
} }
builder.append("\n"); builder.append("\n");

View File

@ -54,7 +54,7 @@ public final class LanguageModelParam {
public final int mBigramProbability; public final int mBigramProbability;
public final int mShortcutProbability; public final int mShortcutProbability;
public final boolean mIsNotAWord; public final boolean mIsNotAWord;
public final boolean mIsBlacklisted; public final boolean mIsPossiblyOffensive;
// Time stamp in seconds. // Time stamp in seconds.
public final int mTimestamp; public final int mTimestamp;
@ -78,7 +78,7 @@ public final class LanguageModelParam {
mBigramProbability = bigramProbability; mBigramProbability = bigramProbability;
mShortcutProbability = Dictionary.NOT_A_PROBABILITY; mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
mIsNotAWord = false; mIsNotAWord = false;
mIsBlacklisted = false; mIsPossiblyOffensive = false;
mTimestamp = timestamp; mTimestamp = timestamp;
} }

View File

@ -358,7 +358,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted, jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
jint timestamp) { jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
@ -377,8 +377,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
} }
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
std::move(shortcuts)); 1 /* count */), std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount), return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty); &unigramProperty);
} }
@ -480,8 +480,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I"); env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId = jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z"); env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isBlacklistedFieldId = jfieldID isPossiblyOffensiveFieldId =
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z"); env->GetFieldID(languageModelParamClass, "mIsPossiblyOffensive", "Z");
env->DeleteLocalRef(languageModelParamClass); env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) { for (int i = startIndex; i < languageModelParamCount; ++i) {
@ -504,7 +504,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId); jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); jboolean isPossiblyOffensive = env->GetBooleanField(languageModelParam,
isPossiblyOffensiveFieldId);
jintArray shortcutTarget = static_cast<jintArray>( jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId)); env->GetObjectField(languageModelParam, shortcutTargetFieldId));
std::vector<UnigramProperty::ShortcutProperty> shortcuts; std::vector<UnigramProperty::ShortcutProperty> shortcuts;
@ -519,7 +520,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
} }
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, isPossiblyOffensive, unigramProbability,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts)); HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length), dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty); &unigramProperty);

View File

@ -49,21 +49,44 @@ class UnigramProperty {
}; };
UnigramProperty() UnigramProperty()
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false), : mRepresentsBeginningOfSentence(false), mIsNotAWord(false),
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {} mIsBlacklisted(false), mIsPossiblyOffensive(false), mProbability(NOT_A_PROBABILITY),
mHistoricalInfo(), mShortcuts() {}
// In contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo, const bool isPossiblyOffensive, const int probability,
const std::vector<ShortcutProperty> &&shortcuts) const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence), : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), mIsNotAWord(isNotAWord), mIsBlacklisted(false),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {} mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
// Without shortcuts. // Without shortcuts, in contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo) const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence), : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), mIsNotAWord(isNotAWord), mIsBlacklisted(false),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {}
// In contexts which DO support the Blacklisted flag (v403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
// Without shortcuts, in contexts which DO support the Blacklisted flag (v403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {} mHistoricalInfo(historicalInfo), mShortcuts() {}
bool representsBeginningOfSentence() const { bool representsBeginningOfSentence() const {
@ -74,13 +97,12 @@ class UnigramProperty {
return mIsNotAWord; return mIsNotAWord;
} }
bool isBlacklisted() const { bool isPossiblyOffensive() const {
return mIsBlacklisted; return mIsPossiblyOffensive;
} }
bool isPossiblyOffensive() const { bool isBlacklisted() const {
// TODO: Have dedicated flag. return mIsBlacklisted;
return mProbability == 0;
} }
bool hasShortcuts() const { bool hasShortcuts() const {
@ -106,6 +128,7 @@ class UnigramProperty {
const bool mRepresentsBeginningOfSentence; const bool mRepresentsBeginningOfSentence;
const bool mIsNotAWord; const bool mIsNotAWord;
const bool mIsBlacklisted; const bool mIsBlacklisted;
const bool mIsPossiblyOffensive;
const int mProbability; const int mProbability;
const HistoricalInfo mHistoricalInfo; const HistoricalInfo mHistoricalInfo;
const std::vector<ShortcutProperty> mShortcuts; const std::vector<ShortcutProperty> mShortcuts;

View File

@ -28,7 +28,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */); false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isPossiblyOffensive(),
!mNgrams.empty(), mUnigramProperty.hasShortcuts(), !mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()}; mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);

View File

@ -43,6 +43,14 @@ class WordAttributes {
return mIsNotAWord; return mIsNotAWord;
} }
// Whether or not a word is possibly offensive.
// * Static dictionaries <v202, as well as dynamic dictionaries <v403, will set this based on
// whether or not the probability of the word is zero.
// * Static dictionaries >=v203 will set this based on the IS_POSSIBLY_OFFENSIVE PtNode flag.
// * Dynamic dictionaries >=v403 will set this based on the IS_POSSIBLY_OFFENSIVE language model
// flag (the PtNode flag IS_BLACKLISTED is ignored and kept as zero)
//
// See the ::getWordAttributes function for each of these dictionary policies for more details.
bool isPossiblyOffensive() const { bool isPossiblyOffensive() const {
return mIsPossiblyOffensive; return mIsPossiblyOffensive;
} }

View File

@ -245,7 +245,7 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
if (!sourcePtNodeParams.hasBigrams()) { if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag. // Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(), return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(), sourcePtNodeParams.isPossiblyOffensive(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(), sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */, true /* hasBigrams */,
sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */); sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
@ -316,7 +316,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
if (!ptNodeParams->hasShortcutTargets()) { if (!ptNodeParams->hasShortcutTargets()) {
// Update has shortcut targets flag. // Update has shortcut targets flag.
return updatePtNodeFlags(ptNodeParams->getHeadPos(), return updatePtNodeFlags(ptNodeParams->getHeadPos(),
ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), ptNodeParams->isPossiblyOffensive(), ptNodeParams->isNotAWord(),
ptNodeParams->isTerminal(), true /* hasShortcutTargets */, ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
ptNodeParams->hasBigrams(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
@ -330,7 +330,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos( const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(), return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isPossiblyOffensive(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets, ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
} }
@ -386,8 +386,9 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false; return false;
} }
return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), return updatePtNodeFlags(nodePos, ptNodeParams->isPossiblyOffensive(),
isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), ptNodeParams->isNotAWord(), isTerminal, ptNodeParams->hasShortcutTargets(),
ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
} }

View File

@ -608,8 +608,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
} }
} }
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
*historicalInfo, std::move(shortcuts)); ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View File

@ -146,7 +146,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
const int movedPos = mBuffer->getTailPosition(); const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos; int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, originalPtNodeParams->getParentPos(), true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability())); originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -180,8 +180,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
return false; return false;
} }
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability())); true /* isTerminal */, parentPtNodePos, ptNodeCodePoints,
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
@ -214,7 +215,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount); reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */, false /* isNotAWord */, false /* isPossiblyOffensive */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints, reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
NOT_A_PROBABILITY)); NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
@ -222,7 +223,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
} else { } else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
firstPtNodeCodePoints, unigramProperty->getProbability())); firstPtNodeCodePoints, unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -240,7 +241,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node. // Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos; const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(), reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isPossiblyOffensive(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount), reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
reallocatingPtNodeParams->getProbability())); reallocatingPtNodeParams->getProbability()));
@ -249,7 +250,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, firstPartOfReallocatedPtNodePos, true /* isTerminal */, firstPartOfReallocatedPtNodePos,
newPtNodeCodePoints.skip(overlappingCodePointCount), newPtNodeCodePoints.skip(overlappingCodePointCount),
unigramProperty->getProbability())); unigramProperty->getProbability()));
@ -276,20 +277,20 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos, const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const { const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */, false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability); return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
} }
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord, const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos, const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const { const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */, false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePoints, probability); return PtNodeParams(flags, parentPos, codePoints, probability);

View File

@ -85,12 +85,12 @@ class DynamicPtUpdatingHelper {
const CodePointArrayView newPtNodeCodePoints); const CodePointArrayView newPtNodeCodePoints);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const bool isNotAWord, const bool isPossiblyOffensive, const bool isTerminal,
const int parentPos, const CodePointArrayView codePoints, const int probability) const; const int parentPos, const CodePointArrayView codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted, const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord,
const bool isTerminal, const int parentPos, const CodePointArrayView codePoints, const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const int probability) const; const CodePointArrayView codePoints, const int probability) const;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */

View File

@ -41,8 +41,8 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
// Flag for non-words (typically, shortcut only entries) // Flag for non-words (typically, shortcut only entries)
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
// Flag for blacklist // Flag for possibly offensive words
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_POSSIBLY_OFFENSIVE = 0x01;
/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( /* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {

View File

@ -54,8 +54,8 @@ class PatriciaTrieReadingUtils {
/** /**
* Node Flags * Node Flags
*/ */
static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) { static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
return (flags & FLAG_IS_BLACKLISTED) != 0; return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
} }
static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) { static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
@ -82,12 +82,12 @@ class PatriciaTrieReadingUtils {
return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
} }
static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted, static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
const bool hasBigrams, const bool hasMultipleChars, const bool hasBigrams, const bool hasMultipleChars,
const int childrenPositionFieldSize) { const int childrenPositionFieldSize) {
NodeFlags nodeFlags = 0; NodeFlags nodeFlags = 0;
nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags; nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags; nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags; nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags; nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
@ -127,7 +127,7 @@ class PatriciaTrieReadingUtils {
static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS; static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
static const NodeFlags FLAG_HAS_BIGRAMS; static const NodeFlags FLAG_HAS_BIGRAMS;
static const NodeFlags FLAG_IS_NOT_A_WORD; static const NodeFlags FLAG_IS_NOT_A_WORD;
static const NodeFlags FLAG_IS_BLACKLISTED; static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */ #endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */

View File

@ -145,7 +145,18 @@ class PtNodeParams {
} }
AK_FORCE_INLINE bool isBlacklisted() const { AK_FORCE_INLINE bool isBlacklisted() const {
return PatriciaTrieReadingUtils::isBlacklisted(mFlags); // Note: this method will be removed in the next change.
// It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
// * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
// when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
// * getWordAttributes will be changed to always return blacklisted=false and
// isPossiblyOffensive according to the function below (instead of the current
// behaviour of checking if the probability is zero)
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
AK_FORCE_INLINE bool isPossiblyOffensive() const {
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
} }
AK_FORCE_INLINE bool isNotAWord() const { AK_FORCE_INLINE bool isNotAWord() const {

View File

@ -476,8 +476,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
} }
} }
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
HistoricalInfo(), std::move(shortcuts)); ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View File

@ -342,7 +342,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, const bo
// Create node flags and write them. // Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(false /* isNotAWord */, PatriciaTrieReadingUtils::createAndGetFlags(false /* isNotAWord */,
false /* isBlacklisted */, isTerminal, false /* hasShortcutTargets */, false /* isPossiblyOffensive */, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE); false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE);
if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) { if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos); AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);

View File

@ -299,7 +299,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
} }
const UnigramProperty beginningOfSentenceUnigramProperty( const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */, true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); false /* isBlacklisted */, false /* isPossiblyOffensive */,
MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */), if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) { &beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -375,8 +376,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
if (wordId == NOT_A_WORD_ID) { if (wordId == NOT_A_WORD_ID) {
// The word is not in the dictionary. // The word is not in the dictionary.
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY, false /* isNotAWord */, false /* isBlacklisted */, false /* isPossiblyOffensive */,
HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */)); NOT_A_PROBABILITY, HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */,
0 /* count */));
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
AKLOGE("Cannot add unigarm entry in updateEntriesForWordWithNgramContext()."); AKLOGE("Cannot add unigarm entry in updateEntriesForWordWithNgramContext().");
return false; return false;
@ -391,7 +393,7 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
&& ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) { && ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const UnigramProperty beginningOfSentenceUnigramProperty( const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* representsBeginningOfSentence */,
true /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY, true /* isNotAWord */, false /* isPossiblyOffensive */, NOT_A_PROBABILITY,
HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */)); HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */), if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) { &beginningOfSentenceUnigramProperty)) {
@ -532,7 +534,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
} }
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(), const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts)); probabilityEntry.isPossiblyOffensive(), probabilityEntry.getProbability(),
*historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View File

@ -684,8 +684,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */, binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, true /* isBeginningOfSentence */, true /* isNotAWord */,
mCurrentTime); false /* isPossiblyOffensive */, mCurrentTime);
final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */); onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */);
assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa"));

View File

@ -200,7 +200,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// Too long short cut. // Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
10 /* shortcutProbability */, false /* isBeginningOfSentence */, 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability); addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200; final int updatedProbability = 200;
@ -221,7 +221,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
} }
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -971,11 +972,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = CodePointUtils.generateWord(random, codePointSet); final String word = CodePointUtils.generateWord(random, codePointSet);
final int unigramProbability = random.nextInt(0xFF); final int unigramProbability = random.nextInt(0xFF);
final boolean isNotAWord = random.nextBoolean(); final boolean isNotAWord = random.nextBoolean();
final boolean isBlacklisted = random.nextBoolean(); final boolean isPossiblyOffensive = random.nextBoolean();
// TODO: Add tests for historical info. // TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability, binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
@ -987,7 +988,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(word, wordProperty.mWord); assertEquals(word, wordProperty.mWord);
assertTrue(wordProperty.isValid()); assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord); assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
assertEquals(false, wordProperty.mHasNgrams); assertEquals(false, wordProperty.mHasNgrams);
assertEquals(false, wordProperty.mHasShortcuts); assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
@ -1142,7 +1143,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int shortcutProbability = 10; final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isBeginningOfSentence */, shortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals(1, wordProperty.mShortcutTargets.size());
@ -1151,7 +1152,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int updatedShortcutProbability = 2; final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isBeginningOfSentence */, updatedShortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa", wordProperty = binaryDictionary.getWordProperty("aaa",
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals(1, wordProperty.mShortcutTargets.size());
@ -1160,7 +1161,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mShortcutTargets.get(0).getProbability()); wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */); false /* isPossiblyOffensive */, 0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<>(); final HashMap<String, Integer> shortcutTargets = new HashMap<>();
shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability); shortcutTargets.put("yyy", shortcutProbability);
@ -1223,7 +1224,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = unigramProbabilities.get(word); final int unigramProbability = unigramProbabilities.get(word);
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */); false /* isPossiblyOffensive */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) { if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@ -1255,6 +1256,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} }
} }
public void testPossiblyOffensiveAttributeMaintained() {
final BinaryDictionary binaryDictionary =
getEmptyBinaryDictionary(FormatSpec.VERSION4_DEV);
binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY,
false, true, true, 0);
WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
public void testDictMigration() { public void testDictMigration() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) { for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
@ -1271,10 +1281,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int shortcutProbability = 10; final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */); false /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE, binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE,
"aaa", bigramProbability, 0 /* timestamp */); "aaa", bigramProbability, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
@ -1298,7 +1308,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
wordProperty = binaryDictionary.getWordProperty("ddd", wordProperty = binaryDictionary.getWordProperty("ddd",
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
assertTrue(wordProperty.mIsBlacklistEntry); assertTrue(wordProperty.mIsPossiblyOffensive);
assertTrue(wordProperty.mIsNotAWord); assertTrue(wordProperty.mIsNotAWord);
} }

View File

@ -35,16 +35,20 @@ public class FusionDictionaryTests extends AndroidTestCase {
FusionDictionary dict = new FusionDictionary(new PtNodeArray(), FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String,String>())); new DictionaryOptions(new HashMap<String,String>()));
dict.add("abc", new ProbabilityInfo(10), null, false /* isNotAWord */); dict.add("abc", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
dict.add("aa", new ProbabilityInfo(10), null, false /* isNotAWord */); dict.add("aa", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
dict.add("babcd", new ProbabilityInfo(10), null, false /* isNotAWord */); dict.add("babcd", new ProbabilityInfo(10), null, false /* isNotAWord */,
dict.add("bacde", new ProbabilityInfo(10), null, false /* isNotAWord */); false /* isPossiblyOffensive */);
dict.add("bacde", new ProbabilityInfo(10), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));

View File

@ -149,7 +149,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
} }
dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */,
false /* isPossiblyOffensive */);
} }
} }

View File

@ -572,12 +572,12 @@ public class BinaryDictEncoderUtils {
* @param hasShortcuts whether the PtNode has shortcuts. * @param hasShortcuts whether the PtNode has shortcuts.
* @param hasBigrams whether the PtNode has bigrams. * @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the PtNode is not a word. * @param isNotAWord whether the PtNode is not a word.
* @param isBlackListEntry whether the PtNode is a blacklist entry. * @param isPossiblyOffensive whether the PtNode is a possibly offensive entry.
* @return the flags * @return the flags
*/ */
static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal, static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams, final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isBlackListEntry) { final boolean isNotAWord, final boolean isPossiblyOffensive) {
byte flags = 0; byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS; if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL; if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
@ -600,7 +600,7 @@ public class BinaryDictEncoderUtils {
if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS; if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS; if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD; if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
if (isBlackListEntry) flags |= FormatSpec.FLAG_IS_BLACKLISTED; if (isPossiblyOffensive) flags |= FormatSpec.FLAG_IS_POSSIBLY_OFFENSIVE;
return flags; return flags;
} }
@ -609,7 +609,7 @@ public class BinaryDictEncoderUtils {
getByteSize(childrenOffset), getByteSize(childrenOffset),
node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(), node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
node.mBigrams != null && !node.mBigrams.isEmpty(), node.mBigrams != null && !node.mBigrams.isEmpty(),
node.mIsNotAWord, node.mIsBlacklistEntry); node.mIsNotAWord, node.mIsPossiblyOffensive);
} }
/** /**

View File

@ -89,7 +89,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal. int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal.
PtNodeArray mChildren; PtNodeArray mChildren;
boolean mIsNotAWord; // Only a shortcut boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry; boolean mIsPossiblyOffensive;
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary // mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
// generation. Before and After always hold the same value except during dictionary // generation. Before and After always hold the same value except during dictionary
// address compression, where the update process needs to know about both values at the // address compression, where the update process needs to know about both values at the
@ -102,7 +102,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
final boolean isNotAWord, final boolean isBlacklistEntry) { final boolean isNotAWord, final boolean isPossiblyOffensive) {
mChars = chars; mChars = chars;
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability; mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability;
@ -110,12 +110,12 @@ public final class FusionDictionary implements Iterable<WordProperty> {
mBigrams = bigrams; mBigrams = bigrams;
mChildren = null; mChildren = null;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsPossiblyOffensive = isPossiblyOffensive;
} }
public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
final boolean isNotAWord, final boolean isBlacklistEntry, final boolean isNotAWord, final boolean isPossiblyOffensive,
final PtNodeArray children) { final PtNodeArray children) {
mChars = chars; mChars = chars;
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
@ -123,7 +123,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
mBigrams = bigrams; mBigrams = bigrams;
mChildren = children; mChildren = children;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsPossiblyOffensive = isPossiblyOffensive;
} }
public void addChild(PtNode n) { public void addChild(PtNode n) {
@ -153,8 +153,8 @@ public final class FusionDictionary implements Iterable<WordProperty> {
return mIsNotAWord; return mIsNotAWord;
} }
public boolean getIsBlacklistEntry() { public boolean getIsPossiblyOffensive() {
return mIsBlacklistEntry; return mIsPossiblyOffensive;
} }
public ArrayList<WeightedString> getShortcutTargets() { public ArrayList<WeightedString> getShortcutTargets() {
@ -238,7 +238,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
private void update(final ProbabilityInfo probabilityInfo, private void update(final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) { final boolean isNotAWord, final boolean isPossiblyOffensive) {
mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo); mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo);
if (shortcutTargets != null) { if (shortcutTargets != null) {
if (mShortcutTargets == null) { if (mShortcutTargets == null) {
@ -275,7 +275,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
} }
} }
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsPossiblyOffensive = isPossiblyOffensive;
} }
} }
@ -323,24 +323,12 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* @param probabilityInfo probability information of the word. * @param probabilityInfo probability information of the word.
* @param shortcutTargets a list of shortcut targets for this word, or null. * @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this should not be considered a word (e.g. shortcut only) * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
* @param isPossiblyOffensive true if this word is possibly offensive
*/ */
public void add(final String word, final ProbabilityInfo probabilityInfo, public void add(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord,
add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, final boolean isPossiblyOffensive) {
false /* isBlacklistEntry */); add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, isPossiblyOffensive);
}
/**
* Helper method to add a blacklist entry as a string.
*
* @param word the word to add as a blacklist entry.
* @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
*/
public void addBlacklistEntry(final String word,
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
add(getCodePoints(word), new ProbabilityInfo(0), shortcutTargets, isNotAWord,
true /* isBlacklistEntry */);
} }
/** /**
@ -375,7 +363,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1); final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
if (ptNode1 == null) { if (ptNode1 == null) {
add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */, add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */,
false /* isBlacklistEntry */); false /* isPossiblyOffensive */);
// The PtNode for the first word may have moved by the above insertion, // The PtNode for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been // if word1 and word2 share a common stem that happens not to have been
// a cutting point until now. In this case, we need to refresh ptNode. // a cutting point until now. In this case, we need to refresh ptNode.
@ -397,11 +385,11 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* @param probabilityInfo the probability information of the word. * @param probabilityInfo the probability information of the word.
* @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
* @param isBlacklistEntry true if this is a blacklisted word, false otherwise * @param isPossiblyOffensive true if this word is possibly offensive
*/ */
private void add(final int[] word, final ProbabilityInfo probabilityInfo, private void add(final int[] word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final boolean isNotAWord, final boolean isBlacklistEntry) { final boolean isNotAWord, final boolean isPossiblyOffensive) {
assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY); assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY);
if (word.length >= Constants.DICTIONARY_MAX_WORD_LENGTH) { if (word.length >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length); MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length);
@ -431,7 +419,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length), final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord,
isBlacklistEntry); isPossiblyOffensive);
currentNodeArray.mData.add(insertionIndex, newPtNode); currentNodeArray.mData.add(insertionIndex, newPtNode);
if (DBG) checkStack(currentNodeArray); if (DBG) checkStack(currentNodeArray);
} else { } else {
@ -442,14 +430,14 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// should end already exists as is. Since the old PtNode was not a terminal, // should end already exists as is. Since the old PtNode was not a terminal,
// make it one by filling in its frequency and other attributes // make it one by filling in its frequency and other attributes
currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord, currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord,
isBlacklistEntry); isPossiblyOffensive);
} else { } else {
// The new word matches the full old word and extends past it. // The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this. // We only have to create a new node and add it to the end of this.
final PtNode newNode = new PtNode( final PtNode newNode = new PtNode(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo, shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry); isNotAWord, isPossiblyOffensive);
currentPtNode.mChildren = new PtNodeArray(); currentPtNode.mChildren = new PtNodeArray();
currentPtNode.mChildren.mData.add(newNode); currentPtNode.mChildren.mData.add(newNode);
} }
@ -459,7 +447,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// new shortcuts to the existing shortcut list if it already exists. // new shortcuts to the existing shortcut list if it already exists.
currentPtNode.update(probabilityInfo, shortcutTargets, null, currentPtNode.update(probabilityInfo, shortcutTargets, null,
currentPtNode.mIsNotAWord && isNotAWord, currentPtNode.mIsNotAWord && isNotAWord,
currentPtNode.mIsBlacklistEntry || isBlacklistEntry); currentPtNode.mIsPossiblyOffensive || isPossiblyOffensive);
} else { } else {
// Partial prefix match only. We have to replace the current node with a node // Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails. // containing the current prefix and create two new ones for the tails.
@ -468,7 +456,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex, Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex,
currentPtNode.mChars.length), currentPtNode.mShortcutTargets, currentPtNode.mChars.length), currentPtNode.mShortcutTargets,
currentPtNode.mBigrams, currentPtNode.mProbabilityInfo, currentPtNode.mBigrams, currentPtNode.mProbabilityInfo,
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry, currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive,
currentPtNode.mChildren); currentPtNode.mChildren);
newChildren.mData.add(newOldWord); newChildren.mData.add(newOldWord);
@ -477,17 +465,17 @@ public final class FusionDictionary implements Iterable<WordProperty> {
newParent = new PtNode( newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
shortcutTargets, null /* bigrams */, probabilityInfo, shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry, newChildren); isNotAWord, isPossiblyOffensive, newChildren);
} else { } else {
newParent = new PtNode( newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
null /* shortcutTargets */, null /* bigrams */, null /* shortcutTargets */, null /* bigrams */,
null /* probabilityInfo */, false /* isNotAWord */, null /* probabilityInfo */, false /* isNotAWord */,
false /* isBlacklistEntry */, newChildren); false /* isPossiblyOffensive */, newChildren);
final PtNode newWord = new PtNode(Arrays.copyOfRange(word, final PtNode newWord = new PtNode(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length), charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, probabilityInfo, shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isBlacklistEntry); isNotAWord, isPossiblyOffensive);
final int addIndex = word[charIndex + differentCharIndex] final int addIndex = word[charIndex + differentCharIndex]
> currentPtNode.mChars[differentCharIndex] ? 1 : 0; > currentPtNode.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
@ -549,7 +537,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
final ArrayList<PtNode> data = nodeArray.mData; final ArrayList<PtNode> data = nodeArray.mData;
final PtNode reference = new PtNode(new int[] { character }, final PtNode reference = new PtNode(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */, null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */,
false /* isNotAWord */, false /* isBlacklistEntry */); false /* isNotAWord */, false /* isPossiblyOffensive */);
int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR); int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR);
return result >= 0 ? result : -result - 1; return result >= 0 ? result : -result - 1;
} }
@ -686,7 +674,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
return new WordProperty(mCurrentString.toString(), return new WordProperty(mCurrentString.toString(),
currentPtNode.mProbabilityInfo, currentPtNode.mProbabilityInfo,
currentPtNode.mShortcutTargets, currentPtNode.mBigrams, currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry); currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive);
} }
} else { } else {
mPositions.removeLast(); mPositions.removeLast();

View File

@ -283,13 +283,9 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
// Insert unigrams into the fusion dictionary. // Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) { for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mIsBlacklistEntry) { fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord,
wordProperty.mIsNotAWord); wordProperty.mIsPossiblyOffensive);
} else {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
}
} }
// Insert bigrams into the fusion dictionary. // Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) { for (final WordProperty wordProperty : wordProperties) {

View File

@ -85,7 +85,8 @@ public class Ver2DictEncoderTests extends AndroidTestCase {
} }
} }
dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
(shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */,
false /* isPossiblyOffensive */);
} }
} }
} }

View File

@ -88,13 +88,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
// Insert unigrams into the fusion dictionary. // Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) { for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mIsBlacklistEntry) { fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord,
wordProperty.mIsNotAWord); wordProperty.mIsPossiblyOffensive);
} else {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
}
} }
// Insert bigrams into the fusion dictionary. // Insert bigrams into the fusion dictionary.
// TODO: Support ngrams. // TODO: Support ngrams.

View File

@ -79,7 +79,7 @@ public class Ver4DictEncoder implements DictEncoder {
if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */, null /* shortcutTarget */, 0 /* shortcutProbability */,
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) { wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) {
MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord); MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord);
} }
} else { } else {
@ -88,7 +88,7 @@ public class Ver4DictEncoder implements DictEncoder {
wordProperty.getProbability(), wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(), shortcutTarget.mWord, shortcutTarget.getProbability(),
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) { wordProperty.mIsPossiblyOffensive, 0 /* timestamp */)) {
MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord
+ ", shortcutTarget: " + shortcutTarget.mWord); + ", shortcutTarget: " + shortcutTarget.mWord);
return; return;

View File

@ -106,7 +106,7 @@ public class CombinedInputOutput {
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) { if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
isNotAWord); isNotAWord, false /* isPossiblyOffensive */);
for (WeightedString s : bigrams) { for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo); dict.setBigram(word, s.mWord, s.mProbabilityInfo);
} }
@ -189,7 +189,8 @@ public class CombinedInputOutput {
} }
} }
if (null != word) { if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
false /* isPossiblyOffensive */);
for (WeightedString s : bigrams) { for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo); dict.setBigram(word, s.mWord, s.mProbabilityInfo);
} }

View File

@ -128,10 +128,10 @@ public class Diff extends Dicttool.Command {
+ word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord()); + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true; hasDifferences = true;
} }
if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) { if (word0Property.mIsPossiblyOffensive != word1PtNode.getIsPossiblyOffensive()) {
System.out.println("Blacklist: " + word0Property.mWord + " " System.out.println("Possibly-offensive: " + word0Property.mWord + " "
+ word0Property.mIsBlacklistEntry + " -> " + word0Property.mIsPossiblyOffensive + " -> "
+ word1PtNode.getIsBlacklistEntry()); + word1PtNode.getIsPossiblyOffensive());
hasDifferences = true; hasDifferences = true;
} }
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,

View File

@ -76,8 +76,8 @@ public class Info extends Dicttool.Command {
if (ptNode.getIsNotAWord()) { if (ptNode.getIsNotAWord()) {
System.out.println(" Is not a word"); System.out.println(" Is not a word");
} }
if (ptNode.getIsBlacklistEntry()) { if (ptNode.getIsPossiblyOffensive()) {
System.out.println(" Is a blacklist entry"); System.out.println(" Is possibly offensive");
} }
final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets(); final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets();
if (null == shortcutTargets || shortcutTargets.isEmpty()) { if (null == shortcutTargets || shortcutTargets.isEmpty()) {

View File

@ -90,7 +90,8 @@ public class XmlDictInputOutput {
for (final String shortcutOnly : mShortcutsMap.keySet()) { for (final String shortcutOnly : mShortcutsMap.keySet()) {
if (dict.hasWord(shortcutOnly)) continue; if (dict.hasWord(shortcutOnly)) continue;
dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY), dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY),
mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); mShortcutsMap.get(shortcutOnly), true /* isNotAWord */,
false /* isPossiblyOffensive */);
} }
mDictionary = null; mDictionary = null;
mShortcutsMap.clear(); mShortcutsMap.clear();
@ -138,7 +139,7 @@ public class XmlDictInputOutput {
public void endElement(String uri, String localName, String qName) { public void endElement(String uri, String localName, String qName) {
if (WORD == mState) { if (WORD == mState) {
mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord), mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord),
false /* isNotAWord */); false /* isNotAWord */, false /* isPossiblyOffensive */);
mState = START; mState = START;
} }
} }

View File

@ -54,11 +54,16 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE); testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE);
testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID); testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID);
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions);
dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */); dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */,
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); false /* isPossiblyOffensive */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */,
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
final File dst = File.createTempFile("testGetRawDict", ".tmp"); final File dst = File.createTempFile("testGetRawDict", ".tmp");
dst.deleteOnExit(); dst.deleteOnExit();

View File

@ -33,11 +33,16 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
public void testFlattenNodes() { public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>())); new DictionaryOptions(new HashMap<String, String>()));
dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */); dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */,
dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); false /* isPossiblyOffensive */);
dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */,
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
final ArrayList<PtNodeArray> result = final ArrayList<PtNodeArray> result =
BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
assertEquals(4, result.size()); assertEquals(4, result.size());

View File

@ -101,7 +101,8 @@ public class FusionDictionaryTest extends TestCase {
prepare(time); prepare(time);
for (int i = 0; i < sWords.size(); ++i) { for (int i = 0; i < sWords.size(); ++i) {
System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i))); System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i)));
dict.add(sWords.get(i), new ProbabilityInfo(180), null, false); dict.add(sWords.get(i), new ProbabilityInfo(180), null, false,
false /* isPossiblyOffensive */);
dumpDict(dict); dumpDict(dict);
checkDictionary(dict, sWords, i); checkDictionary(dict, sWords, i);
} }