am 19560508
: Move flags belonging to BinaryFormat to the right place.
* commit '195605084ed156b58f0bae002f121d98c1ace867': Move flags belonging to BinaryFormat to the right place.
This commit is contained in:
commit
47e6cf3695
5 changed files with 80 additions and 81 deletions
|
@ -126,7 +126,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
|
||||
// codesSize == 0 means we are trying to find bigram predictions.
|
||||
if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) {
|
||||
const int bigramFreqTemp = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||
const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||
// Due to space constraints, the frequency for bigrams is approximate - the lower the
|
||||
// unigram frequency, the worse the precision. The theoritical maximum error in
|
||||
// resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
|
||||
|
@ -139,7 +139,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
++bigramCount;
|
||||
}
|
||||
}
|
||||
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
} while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
return bigramCount;
|
||||
}
|
||||
|
||||
|
@ -154,8 +154,8 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
|||
|
||||
if (NOT_VALID_WORD == pos) return 0;
|
||||
const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
if (0 == (flags & UnigramDictionary::FLAG_HAS_BIGRAMS)) return 0;
|
||||
if (0 == (flags & UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
} else {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
|
@ -182,12 +182,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
|
|||
int bigramFlags;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||
const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||
&pos);
|
||||
(*map)[bigramPos] = frequency;
|
||||
setInFilter(filter, bigramPos);
|
||||
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||
}
|
||||
|
||||
bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
|
||||
|
@ -223,7 +223,7 @@ bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const in
|
|||
if (bigramPos == nextWordPos) {
|
||||
return true;
|
||||
}
|
||||
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
} while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,13 +18,47 @@
|
|||
#define LATINIME_BINARY_FORMAT_H
|
||||
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include "bloom_filter.h"
|
||||
#include "char_utils.h"
|
||||
#include "unigram_dictionary.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BinaryFormat {
|
||||
public:
|
||||
// Mask and flags for children address type selection.
|
||||
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
|
||||
|
||||
// Flag for single/multiple char group
|
||||
static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
|
||||
|
||||
// Flag for terminal groups
|
||||
static const int FLAG_IS_TERMINAL = 0x10;
|
||||
|
||||
// Flag for shortcut targets presence
|
||||
static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
|
||||
// Flag for bigram presence
|
||||
static const int FLAG_HAS_BIGRAMS = 0x04;
|
||||
|
||||
// Attribute (bigram/shortcut) related flags:
|
||||
// Flag for presence of more attributes
|
||||
static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
||||
// Flag for sign of offset. If this flag is set, the offset value must be negated.
|
||||
static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
||||
|
||||
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
|
||||
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
|
||||
|
||||
// Mask and flags for attribute address type selection.
|
||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
|
||||
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
|
||||
|
@ -174,13 +208,13 @@ inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const in
|
|||
|
||||
static inline int attributeAddressSize(const uint8_t flags) {
|
||||
static const int ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||
return (flags & UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
|
||||
return (flags & BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
|
||||
/* Note: this is a value-dependant optimization of what may probably be
|
||||
more readably written this way:
|
||||
switch (flags * UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
|
||||
switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
|
||||
default: return 0;
|
||||
}
|
||||
*/
|
||||
|
@ -189,7 +223,7 @@ static inline int attributeAddressSize(const uint8_t flags) {
|
|||
static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) {
|
||||
int currentPos = pos;
|
||||
uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos);
|
||||
while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) {
|
||||
while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
|
||||
currentPos += attributeAddressSize(flags);
|
||||
flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos);
|
||||
}
|
||||
|
@ -199,7 +233,7 @@ static inline int skipExistingBigrams(const uint8_t *const dict, const int pos)
|
|||
|
||||
static inline int childrenAddressSize(const uint8_t flags) {
|
||||
static const int CHILDREN_ADDRESS_SHIFT = 6;
|
||||
return (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
|
||||
return (BinaryFormat::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
|
||||
/* See the note in attributeAddressSize. The same applies here */
|
||||
}
|
||||
|
||||
|
@ -212,12 +246,12 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos
|
|||
}
|
||||
|
||||
inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
|
||||
return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
|
||||
return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
|
||||
}
|
||||
|
||||
inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
|
||||
const int pos) {
|
||||
if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
|
||||
if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
|
||||
return pos + shortcutByteSize(dict, pos);
|
||||
} else {
|
||||
return pos;
|
||||
|
@ -226,7 +260,7 @@ inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t
|
|||
|
||||
inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
|
||||
const int pos) {
|
||||
if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
|
||||
if (FLAG_HAS_BIGRAMS & flags) {
|
||||
return skipExistingBigrams(dict, pos);
|
||||
} else {
|
||||
return pos;
|
||||
|
@ -253,15 +287,15 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
|
|||
inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags,
|
||||
const int pos) {
|
||||
int offset = 0;
|
||||
switch (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) {
|
||||
case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||
switch (MASK_GROUP_ADDRESS_TYPE & flags) {
|
||||
case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = dict[pos];
|
||||
break;
|
||||
case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||
case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = dict[pos] << 8;
|
||||
offset += dict[pos + 1];
|
||||
break;
|
||||
case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||
case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = dict[pos] << 16;
|
||||
offset += dict[pos + 1] << 8;
|
||||
offset += dict[pos + 2];
|
||||
|
@ -275,32 +309,31 @@ inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const u
|
|||
}
|
||||
|
||||
inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
|
||||
return (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
||||
!= (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
|
||||
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
|
||||
}
|
||||
|
||||
inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
|
||||
const uint8_t flags, int *pos) {
|
||||
int offset = 0;
|
||||
const int origin = *pos;
|
||||
switch (UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = dict[origin];
|
||||
*pos = origin + 1;
|
||||
break;
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = dict[origin] << 8;
|
||||
offset += dict[origin + 1];
|
||||
*pos = origin + 2;
|
||||
break;
|
||||
case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = dict[origin] << 16;
|
||||
offset += dict[origin + 1] << 8;
|
||||
offset += dict[origin + 2];
|
||||
*pos = origin + 3;
|
||||
break;
|
||||
}
|
||||
if (UnigramDictionary::FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
|
||||
if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
|
||||
return origin - offset;
|
||||
} else {
|
||||
return origin + offset;
|
||||
|
@ -332,7 +365,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
// char within a node, so either we found our match in this node, or there is
|
||||
// no match and we can return NOT_VALID_WORD. So we will check all the characters
|
||||
// in this character group indeed does match.
|
||||
if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
while (NOT_A_CHARACTER != character) {
|
||||
++wordPos;
|
||||
|
@ -350,14 +383,13 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
// If we don't match the length AND don't have children, then a word in the
|
||||
// dictionary fully matches a prefix of the searched word but not the full word.
|
||||
++wordPos;
|
||||
if (UnigramDictionary::FLAG_IS_TERMINAL & flags) {
|
||||
if (FLAG_IS_TERMINAL & flags) {
|
||||
if (wordPos == length) {
|
||||
return charGroupPos;
|
||||
}
|
||||
pos = BinaryFormat::skipFrequency(UnigramDictionary::FLAG_IS_TERMINAL, pos);
|
||||
pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
|
||||
}
|
||||
if (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
||||
== (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)) {
|
||||
if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
|
||||
return NOT_VALID_WORD;
|
||||
}
|
||||
// We have children and we are still shorter than the word we are searching for, so
|
||||
|
@ -367,7 +399,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
break;
|
||||
} else {
|
||||
// This chargroup does not match, so skip the remaining part and go to the next.
|
||||
if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
}
|
||||
pos = BinaryFormat::skipFrequency(flags, pos);
|
||||
|
@ -420,7 +452,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
// We found the address. Copy the rest of the word in the buffer and return
|
||||
// the length.
|
||||
outWord[wordPos] = character;
|
||||
if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
|
||||
// We count chars in order to avoid infinite loops if the file is broken or
|
||||
// if there is some other bug
|
||||
|
@ -435,7 +467,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
}
|
||||
// We need to skip past this char group, so skip any remaining chars after the
|
||||
// first and possibly the frequency.
|
||||
if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
pos = skipOtherCharacters(root, pos);
|
||||
}
|
||||
pos = skipFrequency(flags, pos);
|
||||
|
@ -443,8 +475,8 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
// The fact that this group has children is very important. Since we already know
|
||||
// that this group does not match, if it has no children we know it is irrelevant
|
||||
// to what we are searching for.
|
||||
const bool hasChildren = (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
|
||||
(UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
|
||||
const bool hasChildren = (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
|
||||
(MASK_GROUP_ADDRESS_TYPE & flags));
|
||||
// We will write in `found' whether we have passed the children address we are
|
||||
// searching for. For example if we search for "beer", the children of b are less
|
||||
// than the address we are searching for and the children of c are greater. When we
|
||||
|
@ -484,7 +516,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
// We copy all the characters in this group to the buffer
|
||||
outWord[wordPos] = lastChar;
|
||||
if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
||||
int32_t nextChar =
|
||||
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
int charCount = maxDepth;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
|
||||
#define LATINIME_TERMINAL_ATTRIBUTES_H
|
||||
|
||||
#include "unigram_dictionary.h"
|
||||
#include "binary_format.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -36,7 +36,7 @@ class TerminalAttributes {
|
|||
public:
|
||||
ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) : mDict(dict),
|
||||
mPos(pos) {
|
||||
mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
|
||||
mHasNextShortcutTarget = (0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS));
|
||||
}
|
||||
|
||||
inline bool hasNextShortcutTarget() const {
|
||||
|
@ -49,7 +49,7 @@ class TerminalAttributes {
|
|||
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) {
|
||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
||||
mHasNextShortcutTarget =
|
||||
0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
unsigned int i;
|
||||
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
|
||||
const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos);
|
||||
|
|
|
@ -707,7 +707,7 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
|||
const uint8_t *const root, const int startPos,
|
||||
const uint16_t *const inWord, const int startInputIndex,
|
||||
int32_t *outNewWord, int *outInputIndex, int *outPos) {
|
||||
const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
int pos = startPos;
|
||||
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
int32_t baseChar = toBaseLowerCase(character);
|
||||
|
@ -780,7 +780,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
|
|||
// into inputIndex if there is a match.
|
||||
const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
|
||||
inputIndex, newWord, &inputIndex, &pos);
|
||||
if (isAlike && (FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
|
||||
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
|
||||
const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
|
||||
onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
|
||||
}
|
||||
|
@ -823,7 +823,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
|
|||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
if (hasMultipleChars) {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
} else {
|
||||
|
@ -871,8 +871,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
// - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children)
|
||||
// - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos);
|
||||
const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool isTerminalNode = (0 != (FLAG_IS_TERMINAL & flags));
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||
|
||||
bool needsToInvokeOnTerminal = false;
|
||||
|
||||
|
|
|
@ -32,39 +32,6 @@ class UnigramDictionary {
|
|||
typedef struct { int first; int second; int replacement; } digraph_t;
|
||||
|
||||
public:
|
||||
// Mask and flags for children address type selection.
|
||||
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
|
||||
static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
|
||||
|
||||
// Flag for single/multiple char group
|
||||
static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
|
||||
|
||||
// Flag for terminal groups
|
||||
static const int FLAG_IS_TERMINAL = 0x10;
|
||||
|
||||
// Flag for shortcut targets presence
|
||||
static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
|
||||
// Flag for bigram presence
|
||||
static const int FLAG_HAS_BIGRAMS = 0x04;
|
||||
|
||||
// Attribute (bigram/shortcut) related flags:
|
||||
// Flag for presence of more attributes
|
||||
static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
||||
// Flag for sign of offset. If this flag is set, the offset value must be negated.
|
||||
static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
||||
|
||||
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
|
||||
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
|
||||
|
||||
// Mask and flags for attribute address type selection.
|
||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
|
||||
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
|
||||
|
||||
// Error tolerances
|
||||
static const int DEFAULT_MAX_ERRORS = 2;
|
||||
static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
|
||||
|
|
Loading…
Reference in a new issue