am 3e6b68bc
: Merge "Allow large memory space for GC."
* commit '3e6b68bc0ef63b7e527976dee00d02770f7d3e5c': Allow large memory space for GC.
This commit is contained in:
commit
572c1ac5d4
6 changed files with 79 additions and 17 deletions
|
@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
|
|||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
// TODO: Implement.
|
||||
return false;
|
||||
// TODO: Implement more properly.
|
||||
return mBufferWithExtendableBuffer.isNearSizeLimit();
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -35,6 +35,8 @@ namespace latinime {
|
|||
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||
".tmp";
|
||||
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
||||
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||
|
@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
|||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
|
||||
return;
|
||||
}
|
||||
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
|
||||
MAX_DICTIONARY_SIZE);
|
||||
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
|
||||
static const size_t MAX_DICTIONARY_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
|
|
|
@ -18,9 +18,10 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
|
||||
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
|
||||
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
|
||||
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
||||
|
||||
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
||||
int *const pos) {
|
||||
|
@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
|
|||
return true;
|
||||
}
|
||||
|
||||
bool BufferWithExtendableBuffer::extendBuffer() {
|
||||
const size_t sizeAfterExtending =
|
||||
mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||
if (sizeAfterExtending > mMaxAdditionalBufferSize) {
|
||||
return false;
|
||||
}
|
||||
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
|
||||
if (isInAdditionalBuffer(pos)) {
|
||||
const int tailPosition = getTailPosition();
|
||||
|
|
|
@ -32,9 +32,11 @@ namespace latinime {
|
|||
// raw pointer but provides several methods that handle boundary checking for writing data.
|
||||
class BufferWithExtendableBuffer {
|
||||
public:
|
||||
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
|
||||
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
|
||||
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
|
||||
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
||||
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
|
||||
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||
|
||||
AK_FORCE_INLINE int getTailPosition() const {
|
||||
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
||||
|
@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
|
|||
return mOriginalBufferSize;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isNearSizeLimit() const {
|
||||
return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
|
||||
* NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* For writing.
|
||||
*
|
||||
|
@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
|
|||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
||||
|
||||
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
|
||||
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
||||
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
|
||||
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||
|
||||
uint8_t *const mOriginalBuffer;
|
||||
const int mOriginalBufferSize;
|
||||
std::vector<uint8_t> mAdditionalBuffer;
|
||||
int mUsedAdditionalBufferSize;
|
||||
const size_t mMaxAdditionalBufferSize;
|
||||
|
||||
// Return if the buffer is successfully extended or not.
|
||||
AK_FORCE_INLINE bool extendBuffer() {
|
||||
if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
|
||||
> MAX_ADDITIONAL_BUFFER_SIZE) {
|
||||
return false;
|
||||
}
|
||||
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
|
||||
return true;
|
||||
}
|
||||
bool extendBuffer();
|
||||
|
||||
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
|
||||
// the additional buffer, try extending the buffer.
|
||||
AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
|
||||
bool checkAndPrepareWriting(const int pos, const int size);
|
||||
};
|
||||
}
|
||||
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
|
||||
|
|
|
@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testAddManyUnigramsAndFlushWithGC() {
|
||||
final int flashWithGCIterationCount = 3;
|
||||
final int codePointSetSize = 50;
|
||||
final int seed = 22360679;
|
||||
|
||||
final Random random = new Random(seed);
|
||||
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
|
||||
final ArrayList<String> words = new ArrayList<String>();
|
||||
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
|
||||
BinaryDictionary binaryDictionary;
|
||||
for (int i = 0; i < flashWithGCIterationCount; i++) {
|
||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
while(!binaryDictionary.needsToRunGC()) {
|
||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||
words.add(word);
|
||||
final int unigramProbability = random.nextInt(0xFF);
|
||||
unigramProbabilities.put(word, unigramProbability);
|
||||
binaryDictionary.addUnigramWord(word, unigramProbability);
|
||||
}
|
||||
|
||||
for (int j = 0; j < words.size(); j++) {
|
||||
final String word = words.get(j);
|
||||
final int unigramProbability = unigramProbabilities.get(word);
|
||||
assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
|
||||
}
|
||||
|
||||
binaryDictionary.flushWithGC();
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue