Merge "Implement addBigramWords() for DynamicPatriciaTrie."
commit
8bdb2707a8
|
@ -62,6 +62,11 @@ public:
|
||||||
return flags | FLAG_ATTRIBUTE_HAS_NEXT;
|
return flags | FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
|
||||||
|
const int probability) {
|
||||||
|
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true if the bigram entry is valid and put entry values into out*.
|
// Returns true if the bigram entry is valid and put entry values into out*.
|
||||||
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||||
const int entryPos, const int targetPos, const int probability, const bool hasNext,
|
const int entryPos, const int targetPos, const int probability, const bool hasNext,
|
||||||
|
|
|
@ -98,8 +98,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability,
|
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
|
||||||
int *const pos) {
|
const int probability, int *const pos) {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
@ -113,7 +113,17 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos);
|
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
|
buffer, flags, pos);
|
||||||
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
|
||||||
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
|
||||||
|
// Update this bigram entry.
|
||||||
|
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||||
|
BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
|
||||||
|
return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
|
||||||
|
}
|
||||||
if (BigramListReadWriteUtils::hasNext(flags)) {
|
if (BigramListReadWriteUtils::hasNext(flags)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -124,33 +134,35 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
|
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Then, add a new entry after the last entry.
|
|
||||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
|
||||||
uint32_t newBigramOffset;
|
|
||||||
int newBigramOffsetFieldSize;
|
|
||||||
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
|
||||||
*pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
|
|
||||||
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
|
|
||||||
&newBigramOffsetFieldSize)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int newEntryPos = *pos;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
newEntryPos += mBuffer->getOriginalBufferSize();
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
|
||||||
// Write bigram flags.
|
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,
|
|
||||||
&newEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write bigram positon offset.
|
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
|
||||||
&newEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
// Then, add a new entry after the last entry.
|
||||||
|
return writeNewBigramEntry(bigramPos, probability, pos);
|
||||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||||
if (usesAdditionalBuffer) {
|
// We return directly from the while loop.
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
|
||||||
|
int *const writingPos) {
|
||||||
|
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||||
|
uint32_t newBigramOffset;
|
||||||
|
int newBigramOffsetFieldSize;
|
||||||
|
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||||
|
*writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
|
||||||
|
&newBigramOffset, &newBigramOffsetFieldSize)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write bigram flags.
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write bigram positon offset.
|
||||||
|
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
||||||
|
writingPos)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,7 +48,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
// positions after bigram lists. This method skips invalid bigram entries.
|
// positions after bigram lists. This method skips invalid bigram entries.
|
||||||
bool copyAllBigrams(int *const fromPos, int *const toPos);
|
bool copyAllBigrams(int *const fromPos, int *const toPos);
|
||||||
|
|
||||||
bool addBigramEntry(const int bigramPos, const int probability, int *const pos);
|
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
||||||
|
|
||||||
|
bool writeNewBigramEntry(const int bigramPos, const int probability,
|
||||||
|
int *const writingPos);
|
||||||
|
|
||||||
// Return if targetBigramPos is found or not.
|
// Return if targetBigramPos is found or not.
|
||||||
bool removeBigram(const int bigramListPos, const int targetBigramPos);
|
bool removeBigram(const int bigramListPos, const int targetBigramPos);
|
||||||
|
|
|
@ -69,10 +69,12 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
||||||
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
||||||
mChildrenPos += mBuffer->getOriginalBufferSize();
|
mChildrenPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (mSiblingPos == NOT_A_DICT_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
if (mSiblingPos == NOT_A_DICT_POS) {
|
||||||
mBigramLinkedNodePos = mChildrenPos;
|
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
||||||
} else {
|
mBigramLinkedNodePos = mChildrenPos;
|
||||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
} else {
|
||||||
|
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos += mBuffer->getOriginalBufferSize();
|
pos += mBuffer->getOriginalBufferSize();
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
||||||
|
@ -79,13 +81,44 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
const int probability) {
|
const int probability) {
|
||||||
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
||||||
if (nodeReader.isDeleted()) {
|
mMergedNodeCodePoints);
|
||||||
|
// Move node to add bigram entry.
|
||||||
|
const int newNodePos = mBuffer->getTailPosition();
|
||||||
|
if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
int writingPos = newNodePos;
|
||||||
return false;
|
// Write a new PtNode using original PtNode's info to the tail of the dictionary.
|
||||||
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
|
||||||
|
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
nodeReader.fetchNodeInfoFromBuffer(newNodePos);
|
||||||
|
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
|
||||||
|
// Insert a new bigram entry into the existing bigram list.
|
||||||
|
int bigramListPos = nodeReader.getBigramsPos();
|
||||||
|
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos);
|
||||||
|
} else {
|
||||||
|
// The PtNode doesn't have a bigram list.
|
||||||
|
// First, Write a bigram entry at the tail position of the PtNode.
|
||||||
|
if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Then, Mark as the PtNode having bigram list in the flags.
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
|
||||||
|
nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
|
||||||
|
nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
||||||
|
nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
writingPos = newNodePos;
|
||||||
|
// Write updated flags into the moved PtNode's flags field.
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
|
|
@ -49,6 +49,8 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
|
@ -122,4 +122,41 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(probability, binaryDictionary.getFrequency("a"));
|
assertEquals(probability, binaryDictionary.getFrequency("a"));
|
||||||
assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
|
assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAddBigramWords() {
|
||||||
|
// TODO: Add a test to check the frequency of the bigram score which uses current value
|
||||||
|
// calculated in the native code
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
final int unigramProbability = 100;
|
||||||
|
final int bigramProbability = 10;
|
||||||
|
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
||||||
|
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue