Reinstate the shortcut-only attribute

Also add the blacklist attribute

Bug: 7005742
Bug: 2704000
Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9d
main
Jean Chalard 2012-08-31 15:24:39 +09:00
parent 49d8af8a4e
commit 72b1c93941
11 changed files with 125 additions and 42 deletions

View File

@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
// considering performance regression. // considering performance regression.
protected void addWord(final String word, final String shortcutTarget, final int frequency) { protected void addWord(final String word, final String shortcutTarget, final int frequency) {
if (shortcutTarget == null) { if (shortcutTarget == null) {
mFusionDictionary.add(word, frequency, null); mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
} else { } else {
// TODO: Do this in the subclass, with this class taking an arraylist. // TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList(); final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
shortcutTargets.add(new WeightedString(shortcutTarget, frequency)); shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
mFusionDictionary.add(word, frequency, shortcutTargets); mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
} }
} }

View File

@ -55,6 +55,8 @@ public class BinaryDictInputOutput {
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL * s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
* *
* c | IF FLAG_HAS_MULTIPLE_CHARS * c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@ -153,6 +155,8 @@ public class BinaryDictInputOutput {
private static final int FLAG_IS_TERMINAL = 0x10; private static final int FLAG_IS_TERMINAL = 0x10;
private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
private static final int FLAG_HAS_BIGRAMS = 0x04; private static final int FLAG_HAS_BIGRAMS = 0x04;
private static final int FLAG_IS_NOT_A_WORD = 0x02;
private static final int FLAG_IS_BLACKLISTED = 0x01;
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@ -778,6 +782,12 @@ public class BinaryDictInputOutput {
} }
flags |= FLAG_HAS_BIGRAMS; flags |= FLAG_HAS_BIGRAMS;
} }
if (group.mIsNotAWord) {
flags |= FLAG_IS_NOT_A_WORD;
}
if (group.mIsBlacklistEntry) {
flags |= FLAG_IS_BLACKLISTED;
}
return flags; return flags;
} }
@ -1352,12 +1362,14 @@ public class BinaryDictInputOutput {
buffer.position(currentPosition); buffer.position(currentPosition);
} }
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
bigrams, info.mFrequency, children)); 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
} else { } else {
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
bigrams, info.mFrequency)); 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
} }
groupOffset = info.mEndAddress; groupOffset = info.mEndAddress;
} }
@ -1478,7 +1490,11 @@ public class BinaryDictInputOutput {
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG))); 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
if (null != dict) { if (null != dict) {
for (final Word w : dict) { for (final Word w : dict) {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets); if (w.mIsBlacklistEntry) {
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
} else {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
}
} }
for (final Word w : dict) { for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to // By construction a binary dictionary may not have bigrams pointing to

View File

@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mBigrams; ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren; Node mChildren;
boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry;
// The two following members to help with binary generation // The two following members to help with binary generation
int mCachedSize; int mCachedSize;
int mCachedAddress; int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency) { final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = null; mChildren = null;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
} }
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final Node children) { final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = children; mChildren = children;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
} }
public void addChild(CharGroup n) { public void addChild(CharGroup n) {
@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
* updated if they are higher than the existing ones. * updated if they are higher than the existing ones.
*/ */
public void update(int frequency, ArrayList<WeightedString> shortcutTargets, public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
ArrayList<WeightedString> bigrams) { final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) {
if (frequency > mFrequency) { if (frequency > mFrequency) {
mFrequency = frequency; mFrequency = frequency;
} }
@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
} }
} }
} }
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
} }
} }
@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word to add. * @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255]. * @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets a list of shortcut targets for this word, or null. * @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
*/ */
public void add(final String word, final int frequency, public void add(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets) { final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
add(getCodePoints(word), frequency, shortcutTargets); add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
false /* isBlacklistEntry */);
}
/**
* Helper method to add a blacklist entry as a string.
*
* @param word the word to add as a blacklist entry.
* @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
*/
public void addBlacklistEntry(final String word,
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
} }
/** /**
@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
if (charGroup != null) { if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2); final CharGroup charGroup2 = findWordInTree(mRoot, word2);
if (charGroup2 == null) { if (charGroup2 == null) {
add(getCodePoints(word2), 0, null); add(getCodePoints(word2), 0, null, false /* isNotAWord */,
false /* isBlacklistEntry */);
} }
charGroup.addBigram(word2, frequency); charGroup.addBigram(word2, frequency);
} else { } else {
@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word, as an int array. * @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255]. * @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
* @param isBlacklistEntry true if this is a blacklisted word, false otherwise
*/ */
private void add(final int[] word, final int frequency, private void add(final int[] word, final int frequency,
final ArrayList<WeightedString> shortcutTargets) { final ArrayList<WeightedString> shortcutTargets,
final boolean isNotAWord, final boolean isBlacklistEntry) {
assert(frequency >= 0 && frequency <= 255); assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot; Node currentNode = mRoot;
int charIndex = 0; int charIndex = 0;
@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup( final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length), Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, frequency); shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup); currentNode.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode); if (DBG) checkStack(currentNode);
} else { } else {
@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
// The new word is a prefix of an existing word, but the node on which it // The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal, // should end already exists as is. Since the old CharNode was not a terminal,
// make it one by filling in its frequency and other attributes // make it one by filling in its frequency and other attributes
currentGroup.update(frequency, shortcutTargets, null); currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
isBlacklistEntry);
} else { } else {
// The new word matches the full old word and extends past it. // The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this. // We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup( final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency); shortcutTargets, null /* bigrams */, frequency, isNotAWord,
isBlacklistEntry);
currentGroup.mChildren = new Node(); currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode); currentGroup.mChildren.mData.add(newNode);
} }
@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
if (0 == differentCharIndex) { if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the // Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists. // new shortcuts to the existing shortcut list if it already exists.
currentGroup.update(frequency, shortcutTargets, null); currentGroup.update(frequency, shortcutTargets, null,
currentGroup.mIsNotAWord && isNotAWord,
currentGroup.mIsBlacklistEntry || isBlacklistEntry);
} else { } else {
// Partial prefix match only. We have to replace the current node with a node // Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails. // containing the current prefix and create two new ones for the tails.
@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup( final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets, currentGroup.mChars.length), currentGroup.mShortcutTargets,
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren); currentGroup.mBigrams, currentGroup.mFrequency,
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
currentGroup.mChildren);
newChildren.mData.add(newOldWord); newChildren.mData.add(newOldWord);
final CharGroup newParent; final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
shortcutTargets, null /* bigrams */, frequency, newChildren); shortcutTargets, null /* bigrams */, frequency,
isNotAWord, isBlacklistEntry, newChildren);
} else { } else {
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
null /* shortcutTargets */, null /* bigrams */, -1, newChildren); null /* shortcutTargets */, null /* bigrams */, -1,
false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length), charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency); shortcutTargets, null /* bigrams */, frequency,
isNotAWord, isBlacklistEntry);
final int addIndex = word[charIndex + differentCharIndex] final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0; > currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
private static int findInsertionIndex(final Node node, int character) { private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData; final ArrayList<CharGroup> data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character }, final CharGroup reference = new CharGroup(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, 0); null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
false /* isBlacklistEntry */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1; return result >= 0 ? result : -result - 1;
} }
@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
} }
if (currentGroup.mFrequency >= 0) if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency, return new Word(mCurrentString.toString(), currentGroup.mFrequency,
currentGroup.mShortcutTargets, currentGroup.mBigrams); currentGroup.mShortcutTargets, currentGroup.mBigrams,
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
} else { } else {
mPositions.removeLast(); mPositions.removeLast();
currentPos = mPositions.getLast(); currentPos = mPositions.getLast();

View File

@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
public final int mFrequency; public final int mFrequency;
public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams; public final ArrayList<WeightedString> mBigrams;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
private int mHashCode = 0; private int mHashCode = 0;
public Word(final String word, final int frequency, public Word(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams) { final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isBlacklistEntry) {
mWord = word; mWord = word;
mFrequency = frequency; mFrequency = frequency;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
} }
private static int computeHashCode(Word word) { private static int computeHashCode(Word word) {
@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
word.mWord, word.mWord,
word.mFrequency, word.mFrequency,
word.mShortcutTargets.hashCode(), word.mShortcutTargets.hashCode(),
word.mBigrams.hashCode() word.mBigrams.hashCode(),
word.mIsNotAWord,
word.mIsBlacklistEntry
}); });
} }
@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
Word w = (Word)o; Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord) return mFrequency == w.mFrequency && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets) && mShortcutTargets.equals(w.mShortcutTargets)
&& mBigrams.equals(w.mBigrams); && mBigrams.equals(w.mBigrams)
&& mIsNotAWord == w.mIsNotAWord
&& mIsBlacklistEntry == w.mIsBlacklistEntry;
} }
@Override @Override

View File

@ -43,6 +43,10 @@ class BinaryFormat {
static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence // Flag for bigram presence
static const int FLAG_HAS_BIGRAMS = 0x04; static const int FLAG_HAS_BIGRAMS = 0x04;
// Flag for non-words (typically, shortcut only entries)
static const int FLAG_IS_NOT_A_WORD = 0x02;
// Flag for blacklist
static const int FLAG_IS_BLACKLISTED = 0x01;
// Attribute (bigram/shortcut) related flags: // Attribute (bigram/shortcut) related flags:
// Flag for presence of more attributes // Flag for presence of more attributes

View File

@ -72,6 +72,10 @@ class TerminalAttributes {
return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
} }
bool isBlacklistedOrNotAWord() const {
return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD);
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const uint8_t *const mDict; const uint8_t *const mDict;

View File

@ -391,9 +391,11 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int finalProbability = const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength); correction->getFinalProbability(probability, &wordPointer, &wordLength);
if (0 != finalProbability) { if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
// If the probability is 0, we don't want to add this word. However we still // If the probability is 0, we don't want to add this word. However we still
// want to add its shortcuts (including a possible whitelist entry) if any. // want to add its shortcuts (including a possible whitelist entry) if any.
// Furthermore, if this is not a word (shortcut only for example) or a blacklisted
// entry then we never want to suggest this.
addWord(wordPointer, wordLength, finalProbability, masterQueue, addWord(wordPointer, wordLength, finalProbability, masterQueue,
Dictionary::KIND_CORRECTION); Dictionary::KIND_CORRECTION);
} }
@ -841,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
// If this is not a word, or if it's a blacklisted entry, it should behave as
// having no frequency outside of the suggestion process (where it should be used
// for shortcuts).
return NOT_A_PROBABILITY;
}
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
if (hasMultipleChars) { if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos); pos = BinaryFormat::skipOtherCharacters(root, pos);

View File

@ -80,7 +80,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
final List<String> words) { final List<String> words) {
for (int i = 0; i < number; ++i) { for (int i = 0; i < number; ++i) {
final String word = words.get(i); final String word = words.get(i);
dict.add(word, UNIGRAM_FREQ, null); dict.add(word, UNIGRAM_FREQ, null, false /* isNotAWord */);
} }
} }

View File

@ -31,16 +31,16 @@ public class FusionDictionaryTests extends AndroidTestCase {
FusionDictionary dict = new FusionDictionary(new Node(), FusionDictionary dict = new FusionDictionary(new Node(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abc", 10, null); dict.add("abc", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc")); assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
dict.add("aa", 10, null); dict.add("aa", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa")); assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
dict.add("babcd", 10, null); dict.add("babcd", 10, null, false /* isNotAWord */);
dict.add("bacde", 10, null); dict.add("bacde", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba")); assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd")); assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde")); assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));

View File

@ -50,6 +50,7 @@ public class XmlDictInputOutput {
private static final String SHORTCUT_TAG = "shortcut"; private static final String SHORTCUT_TAG = "shortcut";
private static final String FREQUENCY_ATTR = "f"; private static final String FREQUENCY_ATTR = "f";
private static final String WORD_ATTR = "word"; private static final String WORD_ATTR = "word";
private static final String NOT_A_WORD_ATTR = "not_a_word";
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1; private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
@ -92,7 +93,7 @@ public class XmlDictInputOutput {
final FusionDictionary dict = mDictionary; final FusionDictionary dict = mDictionary;
for (final String shortcutOnly : mShortcutsMap.keySet()) { for (final String shortcutOnly : mShortcutsMap.keySet()) {
if (dict.hasWord(shortcutOnly)) continue; if (dict.hasWord(shortcutOnly)) continue;
dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly)); dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
} }
mDictionary = null; mDictionary = null;
mShortcutsMap.clear(); mShortcutsMap.clear();
@ -144,7 +145,7 @@ public class XmlDictInputOutput {
@Override @Override
public void endElement(String uri, String localName, String qName) { public void endElement(String uri, String localName, String qName) {
if (WORD == mState) { if (WORD == mState) {
mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord)); mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */);
mState = START; mState = START;
} }
} }
@ -345,7 +346,8 @@ public class XmlDictInputOutput {
destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
for (Word word : set) { for (Word word : set) {
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" " destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
+ FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">"); + FREQUENCY_ATTR + "=\"" + word.mFrequency
+ (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
if (null != word.mShortcutTargets) { if (null != word.mShortcutTargets) {
destination.write("\n"); destination.write("\n");
for (WeightedString target : word.mShortcutTargets) { for (WeightedString target : word.mShortcutTargets) {

View File

@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase {
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new Node(),
new DictionaryOptions(new HashMap<String, String>(), new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", 1, null); dict.add("foo", 1, null, false /* isNotAWord */);
dict.add("fta", 1, null); dict.add("fta", 1, null, false /* isNotAWord */);
dict.add("ftb", 1, null); dict.add("ftb", 1, null, false /* isNotAWord */);
dict.add("bar", 1, null); dict.add("bar", 1, null, false /* isNotAWord */);
dict.add("fool", 1, null); dict.add("fool", 1, null, false /* isNotAWord */);
final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot); final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
assertEquals(4, result.size()); assertEquals(4, result.size());
while (!result.isEmpty()) { while (!result.isEmpty()) {