Merge "Allow large memory space for GC."

main
Keisuke Kuroyanagi 2013-09-24 10:24:28 +00:00 committed by Android (Google) Code Review
commit 3e6b68bc0e
6 changed files with 79 additions and 17 deletions

View File

@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false; return false;
} }
// TODO: Implement. // TODO: Implement more properly.
return false; return mBufferWithExtendableBuffer.isNearSizeLimit();
} }
} // namespace latinime } // namespace latinime

View File

@ -35,6 +35,8 @@ namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
".tmp"; ".tmp";
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper, DynamicPatriciaTrieReadingHelper *const readingHelper,
@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) { if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
return; return;
} }
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
MAX_DICTIONARY_SIZE);
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
return; return;
} }

View File

@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
static const int CHILDREN_POSITION_FIELD_SIZE; static const int CHILDREN_POSITION_FIELD_SIZE;
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;

View File

@ -18,9 +18,10 @@
namespace latinime { namespace latinime {
const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024; const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) { int *const pos) {
@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return true; return true;
} }
bool BufferWithExtendableBuffer::extendBuffer() {
const size_t sizeAfterExtending =
mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
if (sizeAfterExtending > mMaxAdditionalBufferSize) {
return false;
}
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
return true;
}
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) { bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
if (isInAdditionalBuffer(pos)) { if (isInAdditionalBuffer(pos)) {
const int tailPosition = getTailPosition(); const int tailPosition = getTailPosition();

View File

@ -32,9 +32,11 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data. // raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer { class BufferWithExtendableBuffer {
public: public:
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize) BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {} mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const { AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize; return mOriginalBufferSize + mUsedAdditionalBufferSize;
@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize; return mOriginalBufferSize;
} }
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
* NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
}
/** /**
* For writing. * For writing.
* *
@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
private: private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE; static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer; uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize; const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer; std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize; int mUsedAdditionalBufferSize;
const size_t mMaxAdditionalBufferSize;
// Return if the buffer is successfully extended or not. // Return if the buffer is successfully extended or not.
AK_FORCE_INLINE bool extendBuffer() { bool extendBuffer();
if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
> MAX_ADDITIONAL_BUFFER_SIZE) {
return false;
}
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
return true;
}
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
// the additional buffer, try extending the buffer. // the additional buffer, try extending the buffer.
AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size); bool checkAndPrepareWriting(const int pos, const int size);
}; };
} }
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */ #endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */

View File

@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete(); dictFile.delete();
} }
public void testAddManyUnigramsAndFlushWithGC() {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
final int seed = 22360679;
final Random random = new Random(seed);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
final ArrayList<String> words = new ArrayList<String>();
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
BinaryDictionary binaryDictionary;
for (int i = 0; i < flashWithGCIterationCount; i++) {
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
while(!binaryDictionary.needsToRunGC()) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
for (int j = 0; j < words.size(); j++) {
final String word = words.get(j);
final int unigramProbability = unigramProbabilities.get(word);
assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
}
binaryDictionary.flushWithGC();
binaryDictionary.close();
}
dictFile.delete();
}
} }