Implement GC for terminal Id lookup table.

Bug: 11073222
Change-Id: I8889ba808755ac39ae0abcfe2f6f0c4ee3a01a03
This commit is contained in:
Keisuke Kuroyanagi 2013-11-22 20:25:40 +09:00
parent b87971d66b
commit 4095177c4f
3 changed files with 116 additions and 53 deletions

View file

@ -100,7 +100,8 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
bigram_dict_content.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp) \
sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \

View file

@ -0,0 +1,106 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
}
const int terminalPos = getBuffer()->readUint(
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
NOT_A_DICT_POS : terminalPos - mHeaderRegionSize;
}
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
const int terminalId, const int terminalPtNodePos) {
if (terminalId < 0) {
return NOT_A_DICT_POS;
}
while (terminalId >= mSize) {
// Write new entry.
if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
return false;
}
mSize++;
}
if (terminalPtNodePos == NOT_A_DICT_POS) {
return true;
}
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
terminalPtNodePos + mHeaderRegionSize : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
return getWritableBuffer()->writeUint(terminalPos,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath,
const int newHeaderRegionSize) const {
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
// If header region size has been changed, terminal PtNode positions have to be adjusted
// depending on the new header region size.
if (headerRegionSizeDiff != 0) {
TerminalPositionLookupTable lookupTableToWrite;
for (int i = 0; i < mSize; ++i) {
const int terminalPtNodePosition = getTerminalPtNodePosition(i)
+ headerRegionSizeDiff;
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
AKLOGE("Cannot set terminal position to lookupTableToWrite."
" terminalId: %d, position: %d", i, terminalPtNodePosition);
return false;
}
}
return lookupTableToWrite.flush(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
} else {
// We can simply use this lookup table because the header region size has not been
// changed.
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}
}
bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
int removedEntryCount = 0;
int nextNewTerminalId = 0;
for (int i = 0; i < mSize; ++i) {
const int terminalPos = getBuffer()->readUint(
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
// This entry is a garbage.
removedEntryCount++;
} else {
// Give a new terminal id to the entry.
if (!getWritableBuffer()->writeUint(terminalPos,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
getEntryPos(nextNewTerminalId))) {
return false;
}
// Memorize the mapping to the old terminal id to the new terminal id.
terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
nextNewTerminalId++;
}
}
mSize = nextNewTerminalId;
return true;
}
} // namespace latinime

View file

@ -17,18 +17,17 @@
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#include <stdint.h>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime {
class TerminalPositionLookupTable : public SingleDictContent {
public:
typedef hash_map_compat<int, int> TerminalIdMap;
// TODO: Quit using headerRegionSize.
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable,
const int headerRegionSize)
@ -40,60 +39,17 @@ class TerminalPositionLookupTable : public SingleDictContent {
TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
int getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
}
return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
getEntryPos(terminalId)) - mHeaderRegionSize;
}
int getTerminalPtNodePosition(const int terminalId) const;
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) {
if (terminalId < 0) {
return NOT_A_DICT_POS;
}
if (terminalId >= mSize) {
int writingPos = getBuffer()->getTailPosition();
while(writingPos <= getEntryPos(terminalId)) {
// Write new entry.
getWritableBuffer()->writeUintAndAdvancePosition(
Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
}
mSize = getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
}
return getWritableBuffer()->writeUint(terminalPtNodePos + mHeaderRegionSize,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
int getNextTerminalId() const {
return mSize;
}
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const {
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
// If header region size has been changed, terminal PtNode positions have to be adjusted
// depending on the new header region size.
if (headerRegionSizeDiff != 0) {
TerminalPositionLookupTable lookupTableToWrite;
for (int i = 0; i < mSize; ++i) {
const int terminalPtNodePosition = getTerminalPtNodePosition(i)
+ headerRegionSizeDiff;
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
AKLOGE("Cannot set terminal position to lookupTableToWrite."
" terminalId: %d, position: %d", i, terminalPtNodePosition);
return false;
}
}
return lookupTableToWrite.flush(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
} else {
// We can simply use this lookup table because the header region size has not been
// changed.
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}
}
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const;
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
private:
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);