Reinstate the shortcut-only attribute
Also add the blacklist attribute Bug: 7005742 Bug: 2704000 Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9dmain
parent
49d8af8a4e
commit
72b1c93941
|
@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
// considering performance regression.
|
||||
protected void addWord(final String word, final String shortcutTarget, final int frequency) {
|
||||
if (shortcutTarget == null) {
|
||||
mFusionDictionary.add(word, frequency, null);
|
||||
mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
|
||||
} else {
|
||||
// TODO: Do this in the subclass, with this class taking an arraylist.
|
||||
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
|
||||
shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
|
||||
mFusionDictionary.add(word, frequency, shortcutTargets);
|
||||
mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,6 +55,8 @@ public class BinaryDictInputOutput {
|
|||
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
|
||||
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
|
||||
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
|
||||
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
|
||||
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
|
||||
*
|
||||
* c | IF FLAG_HAS_MULTIPLE_CHARS
|
||||
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
|
||||
|
@ -153,6 +155,8 @@ public class BinaryDictInputOutput {
|
|||
private static final int FLAG_IS_TERMINAL = 0x10;
|
||||
private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
|
||||
private static final int FLAG_HAS_BIGRAMS = 0x04;
|
||||
private static final int FLAG_IS_NOT_A_WORD = 0x02;
|
||||
private static final int FLAG_IS_BLACKLISTED = 0x01;
|
||||
|
||||
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
||||
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
||||
|
@ -778,6 +782,12 @@ public class BinaryDictInputOutput {
|
|||
}
|
||||
flags |= FLAG_HAS_BIGRAMS;
|
||||
}
|
||||
if (group.mIsNotAWord) {
|
||||
flags |= FLAG_IS_NOT_A_WORD;
|
||||
}
|
||||
if (group.mIsBlacklistEntry) {
|
||||
flags |= FLAG_IS_BLACKLISTED;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
@ -1352,12 +1362,14 @@ public class BinaryDictInputOutput {
|
|||
buffer.position(currentPosition);
|
||||
}
|
||||
nodeContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets,
|
||||
bigrams, info.mFrequency, children));
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
|
||||
0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
|
||||
} else {
|
||||
nodeContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets,
|
||||
bigrams, info.mFrequency));
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
|
||||
0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
|
||||
}
|
||||
groupOffset = info.mEndAddress;
|
||||
}
|
||||
|
@ -1478,7 +1490,11 @@ public class BinaryDictInputOutput {
|
|||
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
|
||||
if (null != dict) {
|
||||
for (final Word w : dict) {
|
||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
|
||||
if (w.mIsBlacklistEntry) {
|
||||
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
||||
} else {
|
||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
||||
}
|
||||
}
|
||||
for (final Word w : dict) {
|
||||
// By construction a binary dictionary may not have bigrams pointing to
|
||||
|
|
|
@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
ArrayList<WeightedString> mBigrams;
|
||||
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
||||
Node mChildren;
|
||||
boolean mIsNotAWord; // Only a shortcut
|
||||
boolean mIsBlacklistEntry;
|
||||
// The two following members to help with binary generation
|
||||
int mCachedSize;
|
||||
int mCachedAddress;
|
||||
|
||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<WeightedString> bigrams, final int frequency) {
|
||||
final ArrayList<WeightedString> bigrams, final int frequency,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||
mChars = chars;
|
||||
mFrequency = frequency;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
mBigrams = bigrams;
|
||||
mChildren = null;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
}
|
||||
|
||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
|
||||
final ArrayList<WeightedString> bigrams, final int frequency,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
|
||||
mChars = chars;
|
||||
mFrequency = frequency;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
mBigrams = bigrams;
|
||||
mChildren = children;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
}
|
||||
|
||||
public void addChild(CharGroup n) {
|
||||
|
@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
|
||||
* updated if they are higher than the existing ones.
|
||||
*/
|
||||
public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
|
||||
ArrayList<WeightedString> bigrams) {
|
||||
public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<WeightedString> bigrams,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||
if (frequency > mFrequency) {
|
||||
mFrequency = frequency;
|
||||
}
|
||||
|
@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
}
|
||||
}
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
* @param word the word to add.
|
||||
* @param frequency the frequency of the word, in the range [0..255].
|
||||
* @param shortcutTargets a list of shortcut targets for this word, or null.
|
||||
* @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
|
||||
*/
|
||||
public void add(final String word, final int frequency,
|
||||
final ArrayList<WeightedString> shortcutTargets) {
|
||||
add(getCodePoints(word), frequency, shortcutTargets);
|
||||
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
|
||||
add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
|
||||
false /* isBlacklistEntry */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to add a blacklist entry as a string.
|
||||
*
|
||||
* @param word the word to add as a blacklist entry.
|
||||
* @param shortcutTargets a list of shortcut targets for this word, or null.
|
||||
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
|
||||
*/
|
||||
public void addBlacklistEntry(final String word,
|
||||
final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
|
||||
add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
if (charGroup != null) {
|
||||
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
|
||||
if (charGroup2 == null) {
|
||||
add(getCodePoints(word2), 0, null);
|
||||
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
|
||||
false /* isBlacklistEntry */);
|
||||
}
|
||||
charGroup.addBigram(word2, frequency);
|
||||
} else {
|
||||
|
@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
* @param word the word, as an int array.
|
||||
* @param frequency the frequency of the word, in the range [0..255].
|
||||
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
|
||||
* @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
|
||||
* @param isBlacklistEntry true if this is a blacklisted word, false otherwise
|
||||
*/
|
||||
private void add(final int[] word, final int frequency,
|
||||
final ArrayList<WeightedString> shortcutTargets) {
|
||||
final ArrayList<WeightedString> shortcutTargets,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||
assert(frequency >= 0 && frequency <= 255);
|
||||
Node currentNode = mRoot;
|
||||
int charIndex = 0;
|
||||
|
@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
|
||||
final CharGroup newGroup = new CharGroup(
|
||||
Arrays.copyOfRange(word, charIndex, word.length),
|
||||
shortcutTargets, null /* bigrams */, frequency);
|
||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
|
||||
currentNode.mData.add(insertionIndex, newGroup);
|
||||
if (DBG) checkStack(currentNode);
|
||||
} else {
|
||||
|
@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
// The new word is a prefix of an existing word, but the node on which it
|
||||
// should end already exists as is. Since the old CharNode was not a terminal,
|
||||
// make it one by filling in its frequency and other attributes
|
||||
currentGroup.update(frequency, shortcutTargets, null);
|
||||
currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
|
||||
isBlacklistEntry);
|
||||
} else {
|
||||
// The new word matches the full old word and extends past it.
|
||||
// We only have to create a new node and add it to the end of this.
|
||||
final CharGroup newNode = new CharGroup(
|
||||
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
||||
shortcutTargets, null /* bigrams */, frequency);
|
||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord,
|
||||
isBlacklistEntry);
|
||||
currentGroup.mChildren = new Node();
|
||||
currentGroup.mChildren.mData.add(newNode);
|
||||
}
|
||||
|
@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
if (0 == differentCharIndex) {
|
||||
// Exact same word. Update the frequency if higher. This will also add the
|
||||
// new shortcuts to the existing shortcut list if it already exists.
|
||||
currentGroup.update(frequency, shortcutTargets, null);
|
||||
currentGroup.update(frequency, shortcutTargets, null,
|
||||
currentGroup.mIsNotAWord && isNotAWord,
|
||||
currentGroup.mIsBlacklistEntry || isBlacklistEntry);
|
||||
} else {
|
||||
// Partial prefix match only. We have to replace the current node with a node
|
||||
// containing the current prefix and create two new ones for the tails.
|
||||
|
@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
final CharGroup newOldWord = new CharGroup(
|
||||
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
||||
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
||||
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
|
||||
currentGroup.mBigrams, currentGroup.mFrequency,
|
||||
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
|
||||
currentGroup.mChildren);
|
||||
newChildren.mData.add(newOldWord);
|
||||
|
||||
final CharGroup newParent;
|
||||
if (charIndex + differentCharIndex >= word.length) {
|
||||
newParent = new CharGroup(
|
||||
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
||||
shortcutTargets, null /* bigrams */, frequency, newChildren);
|
||||
shortcutTargets, null /* bigrams */, frequency,
|
||||
isNotAWord, isBlacklistEntry, newChildren);
|
||||
} else {
|
||||
newParent = new CharGroup(
|
||||
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
||||
null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
|
||||
null /* shortcutTargets */, null /* bigrams */, -1,
|
||||
false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
|
||||
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
|
||||
charIndex + differentCharIndex, word.length),
|
||||
shortcutTargets, null /* bigrams */, frequency);
|
||||
shortcutTargets, null /* bigrams */, frequency,
|
||||
isNotAWord, isBlacklistEntry);
|
||||
final int addIndex = word[charIndex + differentCharIndex]
|
||||
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
||||
newChildren.mData.add(addIndex, newWord);
|
||||
|
@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
private static int findInsertionIndex(final Node node, int character) {
|
||||
final ArrayList<CharGroup> data = node.mData;
|
||||
final CharGroup reference = new CharGroup(new int[] { character },
|
||||
null /* shortcutTargets */, null /* bigrams */, 0);
|
||||
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
|
||||
false /* isBlacklistEntry */);
|
||||
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
|
||||
return result >= 0 ? result : -result - 1;
|
||||
}
|
||||
|
@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
if (currentGroup.mFrequency >= 0)
|
||||
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
|
||||
currentGroup.mShortcutTargets, currentGroup.mBigrams);
|
||||
currentGroup.mShortcutTargets, currentGroup.mBigrams,
|
||||
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
|
||||
} else {
|
||||
mPositions.removeLast();
|
||||
currentPos = mPositions.getLast();
|
||||
|
|
|
@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
|
|||
public final int mFrequency;
|
||||
public final ArrayList<WeightedString> mShortcutTargets;
|
||||
public final ArrayList<WeightedString> mBigrams;
|
||||
public final boolean mIsNotAWord;
|
||||
public final boolean mIsBlacklistEntry;
|
||||
|
||||
private int mHashCode = 0;
|
||||
|
||||
public Word(final String word, final int frequency,
|
||||
final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<WeightedString> bigrams) {
|
||||
final ArrayList<WeightedString> bigrams,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||
mWord = word;
|
||||
mFrequency = frequency;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
mBigrams = bigrams;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
}
|
||||
|
||||
private static int computeHashCode(Word word) {
|
||||
|
@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
|
|||
word.mWord,
|
||||
word.mFrequency,
|
||||
word.mShortcutTargets.hashCode(),
|
||||
word.mBigrams.hashCode()
|
||||
word.mBigrams.hashCode(),
|
||||
word.mIsNotAWord,
|
||||
word.mIsBlacklistEntry
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
|
|||
Word w = (Word)o;
|
||||
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
|
||||
&& mShortcutTargets.equals(w.mShortcutTargets)
|
||||
&& mBigrams.equals(w.mBigrams);
|
||||
&& mBigrams.equals(w.mBigrams)
|
||||
&& mIsNotAWord == w.mIsNotAWord
|
||||
&& mIsBlacklistEntry == w.mIsBlacklistEntry;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,10 @@ class BinaryFormat {
|
|||
static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
|
||||
// Flag for bigram presence
|
||||
static const int FLAG_HAS_BIGRAMS = 0x04;
|
||||
// Flag for non-words (typically, shortcut only entries)
|
||||
static const int FLAG_IS_NOT_A_WORD = 0x02;
|
||||
// Flag for blacklist
|
||||
static const int FLAG_IS_BLACKLISTED = 0x01;
|
||||
|
||||
// Attribute (bigram/shortcut) related flags:
|
||||
// Flag for presence of more attributes
|
||||
|
|
|
@ -72,6 +72,10 @@ class TerminalAttributes {
|
|||
return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
|
||||
}
|
||||
|
||||
bool isBlacklistedOrNotAWord() const {
|
||||
return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
|
||||
const uint8_t *const mDict;
|
||||
|
|
|
@ -391,9 +391,11 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
|||
const int finalProbability =
|
||||
correction->getFinalProbability(probability, &wordPointer, &wordLength);
|
||||
|
||||
if (0 != finalProbability) {
|
||||
if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
|
||||
// If the probability is 0, we don't want to add this word. However we still
|
||||
// want to add its shortcuts (including a possible whitelist entry) if any.
|
||||
// Furthermore, if this is not a word (shortcut only for example) or a blacklisted
|
||||
// entry then we never want to suggest this.
|
||||
addWord(wordPointer, wordLength, finalProbability, masterQueue,
|
||||
Dictionary::KIND_CORRECTION);
|
||||
}
|
||||
|
@ -841,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
|
|||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
|
||||
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
||||
// having no frequency outside of the suggestion process (where it should be used
|
||||
// for shortcuts).
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
if (hasMultipleChars) {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
|
|
|
@ -80,7 +80,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||
final List<String> words) {
|
||||
for (int i = 0; i < number; ++i) {
|
||||
final String word = words.get(i);
|
||||
dict.add(word, UNIGRAM_FREQ, null);
|
||||
dict.add(word, UNIGRAM_FREQ, null, false /* isNotAWord */);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,16 +31,16 @@ public class FusionDictionaryTests extends AndroidTestCase {
|
|||
FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||
|
||||
dict.add("abc", 10, null);
|
||||
dict.add("abc", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
|
||||
|
||||
dict.add("aa", 10, null);
|
||||
dict.add("aa", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
|
||||
|
||||
dict.add("babcd", 10, null);
|
||||
dict.add("bacde", 10, null);
|
||||
dict.add("babcd", 10, null, false /* isNotAWord */);
|
||||
dict.add("bacde", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));
|
||||
|
|
|
@ -50,6 +50,7 @@ public class XmlDictInputOutput {
|
|||
private static final String SHORTCUT_TAG = "shortcut";
|
||||
private static final String FREQUENCY_ATTR = "f";
|
||||
private static final String WORD_ATTR = "word";
|
||||
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
||||
|
||||
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
|
||||
|
||||
|
@ -92,7 +93,7 @@ public class XmlDictInputOutput {
|
|||
final FusionDictionary dict = mDictionary;
|
||||
for (final String shortcutOnly : mShortcutsMap.keySet()) {
|
||||
if (dict.hasWord(shortcutOnly)) continue;
|
||||
dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly));
|
||||
dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
|
||||
}
|
||||
mDictionary = null;
|
||||
mShortcutsMap.clear();
|
||||
|
@ -144,7 +145,7 @@ public class XmlDictInputOutput {
|
|||
@Override
|
||||
public void endElement(String uri, String localName, String qName) {
|
||||
if (WORD == mState) {
|
||||
mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord));
|
||||
mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */);
|
||||
mState = START;
|
||||
}
|
||||
}
|
||||
|
@ -345,7 +346,8 @@ public class XmlDictInputOutput {
|
|||
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
|
||||
for (Word word : set) {
|
||||
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
|
||||
+ FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
|
||||
+ FREQUENCY_ATTR + "=\"" + word.mFrequency
|
||||
+ (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
|
||||
if (null != word.mShortcutTargets) {
|
||||
destination.write("\n");
|
||||
for (WeightedString target : word.mShortcutTargets) {
|
||||
|
|
|
@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase {
|
|||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
new DictionaryOptions(new HashMap<String, String>(),
|
||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||
dict.add("foo", 1, null);
|
||||
dict.add("fta", 1, null);
|
||||
dict.add("ftb", 1, null);
|
||||
dict.add("bar", 1, null);
|
||||
dict.add("fool", 1, null);
|
||||
dict.add("foo", 1, null, false /* isNotAWord */);
|
||||
dict.add("fta", 1, null, false /* isNotAWord */);
|
||||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||
dict.add("bar", 1, null, false /* isNotAWord */);
|
||||
dict.add("fool", 1, null, false /* isNotAWord */);
|
||||
final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
|
||||
assertEquals(4, result.size());
|
||||
while (!result.isEmpty()) {
|
||||
|
|
Loading…
Reference in New Issue