am 3e6b68bc: Merge "Allow large memory space for GC."

* commit '3e6b68bc0ef63b7e527976dee00d02770f7d3e5c':
  Allow large memory space for GC.
This commit is contained in:
Keisuke Kuroyanagi 2013-09-24 03:26:37 -07:00 committed by Android Git Automerger
commit 572c1ac5d4
6 changed files with 79 additions and 17 deletions

View file

@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
// TODO: Implement.
return false;
// TODO: Implement more properly.
return mBufferWithExtendableBuffer.isNearSizeLimit();
}
} // namespace latinime

View file

@ -35,6 +35,8 @@ namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
".tmp";
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
return;
}
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
MAX_DICTIONARY_SIZE);
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
return;
}

View file

@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
static const int CHILDREN_POSITION_FIELD_SIZE;
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;

View file

@ -18,9 +18,10 @@
namespace latinime {
const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return true;
}
bool BufferWithExtendableBuffer::extendBuffer() {
const size_t sizeAfterExtending =
mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
if (sizeAfterExtending > mMaxAdditionalBufferSize) {
return false;
}
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
return true;
}
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
if (isInAdditionalBuffer(pos)) {
const int tailPosition = getTailPosition();

View file

@ -32,9 +32,11 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize;
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
* NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
}
/**
* For writing.
*
@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
const size_t mMaxAdditionalBufferSize;
// Return if the buffer is successfully extended or not.
AK_FORCE_INLINE bool extendBuffer() {
if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
> MAX_ADDITIONAL_BUFFER_SIZE) {
return false;
}
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
return true;
}
bool extendBuffer();
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
// the additional buffer, try extending the buffer.
AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
bool checkAndPrepareWriting(const int pos, const int size);
};
}
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */

View file

@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
public void testAddManyUnigramsAndFlushWithGC() {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
final int seed = 22360679;
final Random random = new Random(seed);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
final ArrayList<String> words = new ArrayList<String>();
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
BinaryDictionary binaryDictionary;
for (int i = 0; i < flashWithGCIterationCount; i++) {
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
while(!binaryDictionary.needsToRunGC()) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
for (int j = 0; j < words.size(); j++) {
final String word = words.get(j);
final int unigramProbability = unigramProbabilities.get(word);
assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
}
binaryDictionary.flushWithGC();
binaryDictionary.close();
}
dictFile.delete();
}
}