Merge "Groundwork for implementing GC."

This commit is contained in:
Keisuke Kuroyanagi 2013-09-19 06:51:30 +00:00 committed by Android (Google) Code Review
commit 4c5e66341d
6 changed files with 147 additions and 75 deletions

View file

@ -98,6 +98,13 @@ public:
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
*outOffsetFieldSize = 1;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outOffsetFieldSize = 3;
*outBigramFlags = flags;
*outOffset = absOffest;
return true;

View file

@ -54,8 +54,8 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
}
}
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
int *outBigramsCount) {
bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
int *const fromPos, int *const toPos, int *const outBigramsCount) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
@ -86,10 +86,10 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
continue;
}
// Write bigram entry. Target buffer is always the additional buffer.
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
return false;
}
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
toPos)) {
return false;
}

View file

@ -44,10 +44,11 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
// positions after bigram lists. This method skips invalid bigram entries and write the valid
// bigram entry count to outBigramsCount.
bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
// bigram entries and write the valid bigram entry count to outBigramsCount.
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
int *const toPos, int *const outBigramsCount) const;
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);

View file

@ -97,8 +97,8 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
return false;
}
int writingPos = newNodePos;
// Write a new PtNode using original PtNode's info to the tail of the dictionary.
if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
&writingPos)) {
return false;
@ -143,38 +143,20 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
return;
}
const int tmpFileNameBufSize = strlen(fileName)
+ strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
char tmpFileName[tmpFileNameBufSize];
snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
FILE *const file = fopen(tmpFileName, "wb");
if (!file) {
flushAllToFile(fileName, &headerBuffer, mBuffer);
}
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const fileName, const HeaderPolicy *const headerPolicy) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
return;
}
// Write header.
if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */),
headerBuffer.getTailPosition(), 1, file) < 1) {
fclose(file);
remove(tmpFileName);
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
return;
}
// Write data in original buffer.
if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
mBuffer->getOriginalBufferSize(), 1, file) < 1) {
fclose(file);
remove(tmpFileName);
return;
}
// Write data in additional buffer.
if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
fclose(file);
remove(tmpFileName);
return;
}
fclose(file);
rename(tmpFileName, fileName);
flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@ -232,7 +214,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
}
// Write new PtNode at writingPos.
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted,
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
const bool isNotAWord, const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, const int childrenPos,
const int originalBigramListPos, const int originalShortcutListPos,
@ -240,38 +223,39 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int nodePos = *writingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
writingPos)) {
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
0 /* nodeFlags */, writingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBuffer,
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite,
parentOffset, writingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) {
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
codePoints, codePointCount, writingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (probability != NOT_A_PROBABILITY) {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
probability, writingPos)) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
childrenPos, writingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (originalShortcutListPos != NOT_A_DICT_POS) {
int fromPos = originalShortcutListPos;
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(&fromPos, writingPos)) {
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
writingPos)) {
return false;
}
}
@ -279,7 +263,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
return false;
}
}
@ -291,27 +275,29 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos,
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */,
parentPos, codePoints, codePointCount, probability,
return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(),
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
originalNode->getChildrenPos(), originalNode->getBigramsPos(),
originalNode->getShortcutPos(), writingPos);
@ -345,8 +331,9 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) {
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
probability, &movedPos)) {
return false;
}
}
@ -374,8 +361,8 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
1 /* arraySize */, &writingPos)) {
return false;
}
if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability,
&writingPos)) {
if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
probability, &writingPos)) {
return false;
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@ -404,8 +391,9 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints,
overlappingCodePointCount, newProbability, &writingPos)) {
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
&writingPos)) {
return false;
}
const int actualChildrenPos = writingPos;
@ -417,14 +405,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
@ -452,4 +441,64 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
return true;
}
// TODO: Create a struct which contains header, body and etc... and use here as an argument.
void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName,
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody) const {
const int tmpFileNameBufSize = strlen(fileName)
+ strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
// Name of a temporary file used for writing that is a connected string of original name and
// TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
char tmpFileName[tmpFileNameBufSize];
snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
FILE *const file = fopen(tmpFileName, "wb");
if (!file) {
AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName);
ASSERT(false);
return;
}
// Write the dictionary header.
if (!writeBufferToFilePointer(file, dictHeader)) {
remove(tmpFileName);
AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
ASSERT(false);
return;
}
// Write the dictionary body.
if (!writeBufferToFilePointer(file, dictBody)) {
remove(tmpFileName);
AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
ASSERT(false);
return;
}
fclose(file);
rename(tmpFileName, fileName);
}
// This closes file pointer when an error is caused and returns whether the writing was succeeded
// or not.
bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file,
const BufferWithExtendableBuffer *const buffer) const {
const int originalBufSize = buffer->getOriginalBufferSize();
if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
originalBufSize, 1, file) < 1) {
fclose(file);
return false;
}
const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
additionalBufSize, 1, file) < 1) {
fclose(file);
return false;
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
BufferWithExtendableBuffer *const bufferToWrite) {
// TODO: Implement.
return false;
}
} // namespace latinime

View file

@ -17,6 +17,7 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#include <cstdio>
#include <stdint.h>
#include "defines.h"
@ -51,7 +52,8 @@ class DynamicPatriciaTrieWritingHelper {
void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);
void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy);
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
const HeaderPolicy *const headerPolicy);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
@ -66,15 +68,17 @@ class DynamicPatriciaTrieWritingHelper {
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const bool isBlacklisted, const bool isNotAWord,
const int parentPos, const int *const codePoints, const int codePointCount,
const int probability, const int childrenPos, const int originalBigramListPos,
const int originalShortcutListPos, int *const writingPos);
bool writePtNodeToBuffer(const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, int *const writingPos);
bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const int parentPos, const int *const codePoints, const int codePointCount,
const int probability, int *const writingPos);
bool writePtNodeToBufferByCopyingPtNodeInfo(
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos);
@ -97,6 +101,15 @@ class DynamicPatriciaTrieWritingHelper {
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
void flushAllToFile(const char *const fileName,
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody) const;
bool writeBufferToFilePointer(FILE *const file,
const BufferWithExtendableBuffer *const buffer) const;
bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */

View file

@ -31,7 +31,7 @@ namespace latinime {
*/
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
explicit DynamicShortcutListPolicy(BufferWithExtendableBuffer *const buffer)
explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
~DynamicShortcutListPolicy() {}
@ -82,18 +82,20 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
}
}
// Copy shortcuts from the shortcut list that starts at fromPos to toPos and advance these
// positions after the shortcut lists. This returns whether the copy was succeeded or not.
bool copyAllShortcutsAndReturnIfSucceededOrNot(int *const fromPos, int *const toPos) {
// Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
// bufferToWrite and advance these positions after the shortcut lists. This returns whether
// the copy was succeeded or not.
bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
int *const fromPos, int *const toPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
const int shortcutListSize = ShortcutListReadingUtils
::getShortcutListSizeAndForwardPointer(buffer, fromPos);
::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
fromPos);
// Copy shortcut list size.
if (!mBuffer->writeUintAndAdvancePosition(
if (!bufferToWrite->writeUintAndAdvancePosition(
shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
return false;
@ -102,7 +104,7 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
for (int i = 0; i < shortcutListSize; ++i) {
const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
if (!mBuffer->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
return false;
}
}
@ -115,7 +117,7 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
BufferWithExtendableBuffer *const mBuffer;
const BufferWithExtendableBuffer *const mBuffer;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H