Merge "Allow large memory space for GC."
commit
3e6b68bc0e
|
@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
|
||||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
// TODO: Implement more properly.
|
||||||
return false;
|
return mBufferWithExtendableBuffer.isNearSizeLimit();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -35,6 +35,8 @@ namespace latinime {
|
||||||
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
".tmp";
|
".tmp";
|
||||||
|
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
||||||
|
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
|
@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
|
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
|
||||||
|
MAX_DICTIONARY_SIZE);
|
||||||
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
|
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
|
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
|
||||||
|
static const size_t MAX_DICTIONARY_SIZE;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
|
|
|
@ -18,9 +18,10 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
|
|
||||||
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
|
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
|
||||||
|
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
||||||
|
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
||||||
|
|
||||||
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
||||||
int *const pos) {
|
int *const pos) {
|
||||||
|
@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BufferWithExtendableBuffer::extendBuffer() {
|
||||||
|
const size_t sizeAfterExtending =
|
||||||
|
mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||||
|
if (sizeAfterExtending > mMaxAdditionalBufferSize) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
|
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
|
||||||
if (isInAdditionalBuffer(pos)) {
|
if (isInAdditionalBuffer(pos)) {
|
||||||
const int tailPosition = getTailPosition();
|
const int tailPosition = getTailPosition();
|
||||||
|
|
|
@ -32,9 +32,11 @@ namespace latinime {
|
||||||
// raw pointer but provides several methods that handle boundary checking for writing data.
|
// raw pointer but provides several methods that handle boundary checking for writing data.
|
||||||
class BufferWithExtendableBuffer {
|
class BufferWithExtendableBuffer {
|
||||||
public:
|
public:
|
||||||
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
|
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
|
||||||
|
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
|
||||||
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
||||||
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
|
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||||
|
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getTailPosition() const {
|
AK_FORCE_INLINE int getTailPosition() const {
|
||||||
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
||||||
|
@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
|
||||||
return mOriginalBufferSize;
|
return mOriginalBufferSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isNearSizeLimit() const {
|
||||||
|
return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
|
||||||
|
* NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For writing.
|
* For writing.
|
||||||
*
|
*
|
||||||
|
@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
||||||
|
|
||||||
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
|
|
||||||
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
||||||
|
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
|
||||||
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||||
|
|
||||||
uint8_t *const mOriginalBuffer;
|
uint8_t *const mOriginalBuffer;
|
||||||
const int mOriginalBufferSize;
|
const int mOriginalBufferSize;
|
||||||
std::vector<uint8_t> mAdditionalBuffer;
|
std::vector<uint8_t> mAdditionalBuffer;
|
||||||
int mUsedAdditionalBufferSize;
|
int mUsedAdditionalBufferSize;
|
||||||
|
const size_t mMaxAdditionalBufferSize;
|
||||||
|
|
||||||
// Return if the buffer is successfully extended or not.
|
// Return if the buffer is successfully extended or not.
|
||||||
AK_FORCE_INLINE bool extendBuffer() {
|
bool extendBuffer();
|
||||||
if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
|
|
||||||
> MAX_ADDITIONAL_BUFFER_SIZE) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
|
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
|
||||||
// the additional buffer, try extending the buffer.
|
// the additional buffer, try extending the buffer.
|
||||||
AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
|
bool checkAndPrepareWriting(const int pos, const int size);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
|
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
|
||||||
|
|
|
@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAddManyUnigramsAndFlushWithGC() {
|
||||||
|
final int flashWithGCIterationCount = 3;
|
||||||
|
final int codePointSetSize = 50;
|
||||||
|
final int seed = 22360679;
|
||||||
|
|
||||||
|
final Random random = new Random(seed);
|
||||||
|
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
final ArrayList<String> words = new ArrayList<String>();
|
||||||
|
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
|
||||||
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
|
|
||||||
|
BinaryDictionary binaryDictionary;
|
||||||
|
for (int i = 0; i < flashWithGCIterationCount; i++) {
|
||||||
|
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
while(!binaryDictionary.needsToRunGC()) {
|
||||||
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
|
words.add(word);
|
||||||
|
final int unigramProbability = random.nextInt(0xFF);
|
||||||
|
unigramProbabilities.put(word, unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord(word, unigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < words.size(); j++) {
|
||||||
|
final String word = words.get(j);
|
||||||
|
final int unigramProbability = unigramProbabilities.get(word);
|
||||||
|
assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
|
||||||
|
}
|
||||||
|
|
||||||
|
binaryDictionary.flushWithGC();
|
||||||
|
binaryDictionary.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue