am 202e416b
: Add bigram writing methods.
* commit '202e416b51ef4cf3553afeb305ca4b14dd6105e5': Add bigram writing methods.
This commit is contained in:
commit
b5a3d3be07
8 changed files with 269 additions and 50 deletions
|
@ -68,6 +68,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
suggest/core/session/dic_traverse_session.cpp \
|
suggest/core/session/dic_traverse_session.cpp \
|
||||||
$(addprefix suggest/policyimpl/dictionary/, \
|
$(addprefix suggest/policyimpl/dictionary/, \
|
||||||
bigram/bigram_list_reading_utils.cpp \
|
bigram/bigram_list_reading_utils.cpp \
|
||||||
|
bigram/dynamic_bigram_list_policy.cpp \
|
||||||
header/header_policy.cpp \
|
header/header_policy.cpp \
|
||||||
header/header_reading_utils.cpp \
|
header/header_reading_utils.cpp \
|
||||||
shortcut/shortcut_list_reading_utils.cpp \
|
shortcut/shortcut_list_reading_utils.cpp \
|
||||||
|
|
|
@ -117,9 +117,15 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
||||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
const int length = mDictionaryStructurePolicy->
|
if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const int codePointCount = mDictionaryStructurePolicy->
|
||||||
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
||||||
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
|
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
|
||||||
|
if (codePointCount <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
||||||
// unigram probability, the worse the precision. The theoritical maximum error in
|
// unigram probability, the worse the precision. The theoritical maximum error in
|
||||||
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
|
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
|
||||||
|
@ -127,8 +133,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
||||||
// here, but it can't get too bad.
|
// here, but it can't get too bad.
|
||||||
const int probability = ProbabilityUtils::computeProbabilityForBigram(
|
const int probability = ProbabilityUtils::computeProbabilityForBigram(
|
||||||
unigramProbability, bigramsIt.getProbability());
|
unigramProbability, bigramsIt.getProbability());
|
||||||
addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
|
addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability,
|
||||||
outputTypes);
|
outBigramCodePoints, outputTypes);
|
||||||
++bigramCount;
|
++bigramCount;
|
||||||
}
|
}
|
||||||
return min(bigramCount, MAX_RESULTS);
|
return min(bigramCount, MAX_RESULTS);
|
||||||
|
|
|
@ -73,6 +73,9 @@ class MultiBigramMap {
|
||||||
bigramsListPos);
|
bigramsListPos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
|
if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
||||||
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
|
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,16 +33,16 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
const BigramListReadingUtils::BigramFlags flags =
|
const BigramListReadWriteUtils::BigramFlags flags =
|
||||||
BigramListReadingUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
|
BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
|
||||||
*outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
|
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
mBigramsBuf, flags, pos);
|
mBigramsBuf, flags, pos);
|
||||||
*outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||||
*outHasNext = BigramListReadingUtils::hasNext(flags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
void skipAllBigrams(int *const pos) const {
|
||||||
BigramListReadingUtils::skipExistingBigrams(mBigramsBuf, pos);
|
BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -20,24 +20,25 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const BigramListReadingUtils::BigramFlags BigramListReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
|
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
|
||||||
0x30;
|
0x30;
|
||||||
const BigramListReadingUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
|
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
|
||||||
const BigramListReadingUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
|
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
|
||||||
const BigramListReadingUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
|
BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
|
||||||
const BigramListReadingUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
||||||
// Flag for presence of more attributes
|
// Flag for presence of more attributes
|
||||||
const BigramListReadingUtils::BigramFlags BigramListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
|
||||||
|
0x80;
|
||||||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||||
const BigramListReadingUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
const int BigramListReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
|
|
||||||
/* static */ int BigramListReadingUtils::getBigramAddressAndForwardPointer(
|
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
|
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
const int origin = *pos;
|
const int origin = *pos;
|
||||||
|
@ -52,6 +53,9 @@ const int BigramListReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
|
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (offset == 0) {
|
||||||
|
return NOT_A_VALID_WORD_POS;
|
||||||
|
}
|
||||||
if (isOffsetNegative(flags)) {
|
if (isOffsetNegative(flags)) {
|
||||||
return origin - offset;
|
return origin - offset;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -14,9 +14,10 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LATINIME_BIGRAM_LIST_READING_UTILS_H
|
#ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||||
#define LATINIME_BIGRAM_LIST_READING_UTILS_H
|
#define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -24,7 +25,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BigramListReadingUtils {
|
// TODO: Change name of this file according to the class name.
|
||||||
|
class BigramListReadWriteUtils {
|
||||||
public:
|
public:
|
||||||
typedef uint8_t BigramFlags;
|
typedef uint8_t BigramFlags;
|
||||||
|
|
||||||
|
@ -55,8 +57,62 @@ public:
|
||||||
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
|
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
|
||||||
const BigramFlags flags, int *const pos);
|
const BigramFlags flags, int *const pos);
|
||||||
|
|
||||||
|
// Returns the size of the bigram position field that is stored in bigram flags.
|
||||||
|
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
||||||
|
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
|
||||||
|
/* Note: this is a value-dependant optimization of what may probably be
|
||||||
|
more readably written this way:
|
||||||
|
switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||||
|
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
|
||||||
|
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
|
||||||
|
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) {
|
||||||
|
return flags | FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if the bigram entry is valid and put entry values into out*.
|
||||||
|
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||||
|
const int entryPos, const int targetPos, const int probability, const bool hasNext,
|
||||||
|
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
|
||||||
|
int *const outOffsetFieldSize) {
|
||||||
|
if (targetPos == NOT_A_VALID_WORD_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
||||||
|
if (hasNext) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
|
}
|
||||||
|
const int targetFieldPos = entryPos + 1;
|
||||||
|
const int offset = targetPos - targetFieldPos;
|
||||||
|
if (offset < 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
||||||
|
}
|
||||||
|
const uint32_t absOffest = abs(offset);
|
||||||
|
if ((absOffest >> 24) != 0) {
|
||||||
|
// Offset is too large.
|
||||||
|
return false;
|
||||||
|
} else if ((absOffest >> 16) != 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||||
|
*outOffsetFieldSize = 3;
|
||||||
|
} else if ((absOffest >> 8) != 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
||||||
|
*outOffsetFieldSize = 2;
|
||||||
|
} else {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||||
|
*outOffsetFieldSize = 1;
|
||||||
|
}
|
||||||
|
*outBigramFlags = flags;
|
||||||
|
*outOffset = absOffest;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||||
|
|
||||||
static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
|
static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
|
||||||
static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||||
|
@ -70,19 +126,6 @@ private:
|
||||||
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
||||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
|
||||||
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
|
|
||||||
/* Note: this is a value-dependant optimization of what may probably be
|
|
||||||
more readably written this way:
|
|
||||||
switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_BIGRAM_LIST_READING_UTILS_H
|
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
||||||
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
|
do {
|
||||||
|
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
|
||||||
|
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
|
buffer, flags, fromPos);
|
||||||
|
if (bigramPos == NOT_A_VALID_WORD_POS) {
|
||||||
|
// skip invalid bigram entry.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
bigramPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||||
|
uint32_t newBigramOffset;
|
||||||
|
int newBigramOffsetFieldSize;
|
||||||
|
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||||
|
*toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
|
||||||
|
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
|
||||||
|
&newBigramOffsetFieldSize)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Write bigram entry. Target buffer is always the additional buffer.
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
||||||
|
toPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*fromPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability,
|
||||||
|
int *const pos) {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
|
do {
|
||||||
|
int entryPos = *pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
entryPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
|
BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos);
|
||||||
|
if (BigramListReadWriteUtils::hasNext(flags)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// The current last entry is found.
|
||||||
|
// First, update the flags of the last entry.
|
||||||
|
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||||
|
BigramListReadWriteUtils::setHasNextFlag(flags);
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Then, add a new entry after the last entry.
|
||||||
|
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||||
|
uint32_t newBigramOffset;
|
||||||
|
int newBigramOffsetFieldSize;
|
||||||
|
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||||
|
*pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
|
||||||
|
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
|
||||||
|
&newBigramOffsetFieldSize)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int newEntryPos = *pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
newEntryPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Write bigram flags.
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,
|
||||||
|
&newEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write bigram positon offset.
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
||||||
|
&newEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
||||||
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
int pos = bigramListPos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
|
do {
|
||||||
|
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
|
||||||
|
int bigramOffsetFieldPos = pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
|
buffer, flags, &pos);
|
||||||
|
if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
|
||||||
|
bigramPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
if (bigramPos != targetBigramPos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
|
||||||
|
const int bigramOffsetFieldSize =
|
||||||
|
BigramListReadWriteUtils::attributeAddressSize(flags);
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
|
||||||
|
&bigramOffsetFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -31,7 +31,7 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicBigramListPolicy(const BufferWithExtendableBuffer *const buffer)
|
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
|
||||||
: mBuffer(buffer) {}
|
: mBuffer(buffer) {}
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
@ -43,15 +43,15 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const BigramListReadingUtils::BigramFlags flags =
|
const BigramListReadWriteUtils::BigramFlags flags =
|
||||||
BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
*outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
|
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
buffer, flags, pos);
|
buffer, flags, pos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
|
||||||
*outBigramPos += mBuffer->getOriginalBufferSize();
|
*outBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
*outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||||
*outHasNext = BigramListReadingUtils::hasNext(flags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
@ -63,16 +63,25 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
BigramListReadingUtils::skipExistingBigrams(buffer, pos);
|
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
||||||
|
// positions after bigram lists. This method skips invalid bigram entries.
|
||||||
|
bool copyAllBigrams(int *const fromPos, int *const toPos);
|
||||||
|
|
||||||
|
bool addBigramEntry(const int bigramPos, const int probability, int *const pos);
|
||||||
|
|
||||||
|
// Return if targetBigramPos is found or not.
|
||||||
|
bool removeBigram(const int bigramListPos, const int targetBigramPos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||||
|
|
Loading…
Reference in a new issue