Merge "Remove BinaryDictionaryInfo and introduce MmapedBuffer."

This commit is contained in:
Keisuke Kuroyanagi 2013-08-15 09:34:28 +00:00 committed by Android (Google) Code Review
commit 0663bc047b
10 changed files with 168 additions and 174 deletions

View file

@ -18,42 +18,24 @@
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cerrno>
#include <cstring> // for memset()
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/autocorrection_threshold_utils.h"
namespace latinime {
class ProximityInfo;
// Helper method
static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
int ret = munmap(const_cast<void *>(dictBuf), length);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
ret = close(fd);
if (ret != 0) {
AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
}
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
// TODO: Move dictionary buffer handling to policyimpl.
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
@ -62,41 +44,16 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
int fd = 0;
void *dictBuf = 0;
int offset = 0;
const bool updatableMmap = (isUpdatable == JNI_TRUE);
const int openMode = updatableMmap ? O_RDWR : O_RDONLY;
fd = open(sourceDirChars, openMode);
if (fd < 0) {
AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
sourceDirChars, static_cast<int>(sourceDirUtf8Length),
static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE);
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
int pagesize = getpagesize();
offset = static_cast<int>(dictOffset) % pagesize;
int adjDictOffset = static_cast<int>(dictOffset) - offset;
int adjDictSize = static_cast<int>(dictSize) + offset;
const int protMode = updatableMmap ? PROT_READ | PROT_WRITE : PROT_READ;
dictBuf = mmap(0, adjDictSize, protMode, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
return 0;
}
dictBuf = static_cast<char *>(dictBuf) + offset;
if (!dictBuf) {
AKLOGE("DICT: dictBuf is null");
return 0;
}
Dictionary *dictionary = 0;
if (FormatUtils::UNKNOWN_VERSION
== FormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
static_cast<int>(dictSize))) {
AKLOGE("DICT: dictionary format is unknown, bad magic number");
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
} else {
dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
updatableMmap);
}
Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy);
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
@ -105,13 +62,6 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const BinaryDictionaryInfo *const binaryDictionaryInfo = dictionary->getBinaryDictionaryInfo();
const int dictBufOffset = binaryDictionaryInfo->getDictBufOffset();
const void *dictBuf = binaryDictionaryInfo->getDictBuf();
if (!dictBuf) return;
releaseDictBuf(static_cast<const char *>(dictBuf) - dictBufOffset,
binaryDictionaryInfo->getDictSize() + dictBufOffset,
binaryDictionaryInfo->getMmapFd());
delete dictionary;
}

View file

@ -1,65 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BINARY_DICTIONARY_INFO_H
#define LATINIME_BINARY_DICTIONARY_INFO_H
#include <stdint.h>
#include "defines.h"
namespace latinime {
class BinaryDictionaryInfo {
public:
AK_FORCE_INLINE BinaryDictionaryInfo(const uint8_t *const dictBuf,
const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable) {}
~BinaryDictionaryInfo() {}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
}
AK_FORCE_INLINE int getDictSize() const {
return mDictSize;
}
AK_FORCE_INLINE int getMmapFd() const {
return mMmapFd;
}
AK_FORCE_INLINE int getDictBufOffset() const {
return mDictBufOffset;
}
AK_FORCE_INLINE bool isDynamicallyUpdatable() const {
return mIsUpdatable;
}
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
const uint8_t *const mDictBuf;
const int mDictSize;
const int mMmapFd;
const int mDictBufOffset;
const bool mIsUpdatable;
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */

View file

@ -23,23 +23,19 @@
#include "defines.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
#include "utils/log_utils.h"
namespace latinime {
Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
int dictBufOffset, bool isUpdatable)
: mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable),
mDictionaryStructureWithBufferPolicy(DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructureWithBufferPolicy(
static_cast<const uint8_t *>(dict), dictSize)),
Dictionary::Dictionary(JNIEnv *env,
DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
@ -102,32 +98,17 @@ bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1,
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: Dictionary::addUnigramWord() is called for non-updatable dictionary.");
return;
}
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: Dictionary::addBigramWords() is called for non-updatable dictionary.");
return;
}
mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
probability);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: Dictionary::removeBigramWords() is called for non-updatable dictionary.");
return;
}
mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
}
@ -155,9 +136,8 @@ void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
dateStringCharBuffer, BUFFER_SIZE);
LogUtils::logToJava(env,
"Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer,
mBinaryDictionaryInfo.getDictSize());
"Dictionary info: dictionary = %s ; version = %s ; date = %s",
dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
}
} // namespace latinime

View file

@ -21,7 +21,6 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime {
@ -54,8 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
bool isUpdatable);
Dictionary(JNIEnv *env,
DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@ -78,11 +77,6 @@ class Dictionary {
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
// TODO: Remove.
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const {
return &mBinaryDictionaryInfo;
}
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy;
}
@ -92,7 +86,6 @@ class Dictionary {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
const BinaryDictionaryInfo mBinaryDictionaryInfo;
DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
const BigramDictionary *const mBigramDictionary;
const SuggestInterface *const mGestureSuggest;

View file

@ -22,18 +22,29 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf,
const int dictSize) {
switch (FormatUtils::detectFormatVersion(dictBuf, dictSize)) {
::newDictionaryStructureWithBufferPolicy(const char *const path, const int pathLength,
const int bufOffset, const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
// impl classes of DictionaryStructureWithBufferPolicy.
const MmapedBuffer *const mmapedBuffer = MmapedBuffer::openBuffer(path, pathLength, bufOffset,
size, isUpdatable);
if (!mmapedBuffer) {
return 0;
}
switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
mmapedBuffer->getBufferSize())) {
case FormatUtils::VERSION_2:
return new PatriciaTriePolicy(dictBuf);
return new PatriciaTriePolicy(mmapedBuffer);
case FormatUtils::VERSION_3:
return new DynamicPatriciaTriePolicy(dictBuf);
return new DynamicPatriciaTriePolicy(mmapedBuffer);
default:
AKLOGE("DICT: dictionary format is unknown, bad magic number");
delete mmapedBuffer;
ASSERT(false);
return 0;
}

View file

@ -27,7 +27,8 @@ namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
const uint8_t *const dictBuf, const int dictSize);
const char *const path, const int pathLength, const int bufOffset, const int size,
const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);

View file

@ -233,18 +233,30 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
// TODO: Implement.
return false;
}
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
// TODO: Implement.
return false;
}
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
// TODO: Implement.
return false;
}

View file

@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
namespace latinime {
@ -32,11 +33,14 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
DynamicPatriciaTriePolicy(const uint8_t *const dictBuf)
: mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()),
DynamicPatriciaTriePolicy(const MmapedBuffer *const buffer)
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
~DynamicPatriciaTriePolicy() {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@ -82,6 +86,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
const MmapedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
// TODO: Consolidate mDictRoot.
const uint8_t *const mDictRoot;

View file

@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
namespace latinime {
@ -32,11 +33,14 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(const uint8_t *const dictBuf)
: mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()),
PatriciaTriePolicy(const MmapedBuffer *const buffer)
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
~PatriciaTriePolicy() {}
~PatriciaTriePolicy() {
delete mBuffer;
}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@ -71,25 +75,29 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
bool addUnigramWord(const int *const word, const int length, const int probability) {
// This dictionary format is not updatable.
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
// This dictionary format is not updatable.
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1) {
// This dictionary format is not updatable.
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
const MmapedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const BigramListPolicy mBigramListPolicy;

View file

@ -0,0 +1,99 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_MMAPED_BUFFER_H
#define LATINIME_MMAPED_BUFFER_H
#include <cerrno>
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include "defines.h"
namespace latinime {
class MmapedBuffer {
public:
static MmapedBuffer* openBuffer(const char *const path, const int pathLength,
const int bufOffset, const int size, const bool isUpdatable) {
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
const int fd = open(path, openMode);
if (fd < 0) {
AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
return 0;
}
const int pagesize = getpagesize();
const int offset = bufOffset % pagesize;
int adjOffset = bufOffset - offset;
int adjSize = size + offset;
const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
void *const mmapedBuffer = mmap(0, adjSize, protMode, MAP_PRIVATE, fd, adjOffset);
if (mmapedBuffer == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
close(fd);
return 0;
}
uint8_t *const buffer = static_cast<uint8_t *>(mmapedBuffer) + bufOffset;
if (!buffer) {
AKLOGE("DICT: buffer is null");
close(fd);
return 0;
}
return new MmapedBuffer(buffer, adjSize, fd, adjOffset, isUpdatable);
}
~MmapedBuffer() {
int ret = munmap(static_cast<void *>(mBuffer - mBufferOffset),
mBufferSize + mBufferOffset);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
ret = close(mMmapFd);
if (ret != 0) {
AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
}
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
}
AK_FORCE_INLINE int getBufferSize() const {
return mBufferSize;
}
AK_FORCE_INLINE bool isUpdatable() const {
return mIsUpdatable;
}
private:
AK_FORCE_INLINE MmapedBuffer(uint8_t *const buffer, const int bufferSize, const int mmapFd,
const int bufferOffset, const bool isUpdatable)
: mBuffer(buffer), mBufferSize(bufferSize), mMmapFd(mmapFd),
mBufferOffset(bufferOffset), mIsUpdatable(isUpdatable) {}
DISALLOW_IMPLICIT_CONSTRUCTORS(MmapedBuffer);
uint8_t *const mBuffer;
const int mBufferSize;
const int mMmapFd;
const int mBufferOffset;
const bool mIsUpdatable;
};
}
#endif /* LATINIME_MMAPED_BUFFER_H */