am 7eba0198: Dump binary dictionary information when opening

* commit '7eba0198c0344ce7aac9867711d7944811dd3ad5':
  Dump binary dictionary information when opening
main
Jean Chalard 2013-07-01 02:45:16 -07:00 committed by Android Git Automerger
commit eb7cd63858
9 changed files with 180 additions and 40 deletions

View File

@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
AKLOGE("DICT: dictionary format is unknown, bad magic number"); AKLOGE("DICT: dictionary format is unknown, bad magic number");
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd); releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
} else { } else {
dictionary = new Dictionary( dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap); updatableMmap);
} }
PROF_END(66); PROF_END(66);
PROF_CLOSE; PROF_CLOSE;

View File

@ -35,6 +35,56 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16 #define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
char *dest, const int destSize) {
// We want to always terminate with a 0 char, so stop one short of the length to make
// sure there is room.
const int destLimit = destSize - 1;
int si = 0;
int di = 0;
while (si < sourceSize && di < destLimit && 0 != source[si]) {
const int codePoint = source[si++];
if (codePoint < 0x7F) { // One byte
dest[di++] = codePoint;
} else if (codePoint < 0x7FF) { // Two bytes
if (di + 1 >= destLimit) break;
dest[di++] = 0xC0 + (codePoint >> 6);
dest[di++] = 0x80 + (codePoint & 0x3F);
} else if (codePoint < 0xFFFF) { // Three bytes
if (di + 2 >= destLimit) break;
dest[di++] = 0xE0 + (codePoint >> 12);
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = 0x80 + (codePoint & 0x3F);
} else if (codePoint <= 0x1FFFFF) { // Four bytes
if (di + 3 >= destLimit) break;
dest[di++] = 0xF0 + (codePoint >> 18);
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = 0x80 + (codePoint & 0x3F);
} else if (codePoint <= 0x3FFFFFF) { // Five bytes
if (di + 4 >= destLimit) break;
dest[di++] = 0xF8 + (codePoint >> 24);
dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = codePoint & 0x3F;
} else if (codePoint <= 0x7FFFFFFF) { // Six bytes
if (di + 5 >= destLimit) break;
dest[di++] = 0xFC + (codePoint >> 30);
dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = codePoint & 0x3F;
} else {
// Not a code point... skip.
}
}
dest[di] = 0;
return di;
}
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h> #include <android/log.h>
@ -46,35 +96,13 @@
#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) #define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define INTS_TO_CHARS(input, length, output) do { \ #define INTS_TO_CHARS(input, length, output, outlength) do { \
intArrayToCharArray(input, length, output); } while (0) intArrayToCharArray(input, length, output, outlength); } while (0)
// TODO: Support full UTF-8 conversion
AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
char *dest) {
int si = 0;
int di = 0;
while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
const int codePoint = source[si++];
if (codePoint < 0x7F) {
dest[di++] = codePoint;
} else if (codePoint < 0x7FF) {
dest[di++] = 0xC0 + (codePoint >> 6);
dest[di++] = 0x80 + (codePoint & 0x3F);
} else if (codePoint < 0xFFFF) {
dest[di++] = 0xE0 + (codePoint >> 12);
dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
dest[di++] = 0x80 + (codePoint & 0x3F);
}
}
dest[di] = 0;
return di;
}
static inline void dumpWordInfo(const int *word, const int length, const int rank, static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int probability) { const int probability) {
static char charBuf[50]; static char charBuf[50];
const int N = intArrayToCharArray(word, length, charBuf); const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) { if (N > 1) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability); AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
} }
@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) {
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50]; static char charBuf[50];
const int N = intArrayToCharArray(word, length, charBuf); const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) { if (N > 1) {
AKLOGI("[ %s ]", charBuf); AKLOGI("[ %s ]", charBuf);
} }
@ -304,8 +332,6 @@ static inline void prof_out(void) {
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
// DEBUG // DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1) #define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)

View File

@ -28,15 +28,16 @@
#if DEBUG_DICT #if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \ #define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \ do { char charBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0) getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \ #define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \ do { char charBuf[50]; char prevWordCharBuf[50]; \
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \ INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
NELEMS(prevWordCharBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \

View File

@ -53,6 +53,20 @@ class BinaryDictionaryHeader {
return mMultiWordCostMultiplier; return mMultiWordCostMultiplier;
} }
AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const {
if (outValueSize <= 0) return;
if (outValueSize == 1) {
outValue[0] = '\0';
return;
}
if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
key, outValue, outValueSize)) {
outValue[0] = '?';
outValue[1] = '\0';
}
}
private: private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);

View File

@ -82,8 +82,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
if(ByteArrayUtils::compareStringInBufferWithCharArray( if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
// The key was found. // The key was found.
ByteArrayUtils::readStringAndAdvancePosition( const int length = ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
// Add a 0 terminator to the string.
outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
return true; return true;
} }
ByteArrayUtils::advancePositionToBehindString( ByteArrayUtils::advancePositionToBehindString(

View File

@ -20,23 +20,27 @@
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" #include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
#include "utils/log_utils.h"
namespace latinime { namespace latinime {
class BinaryDictionaryInfo { class BinaryDictionaryInfo {
public: public:
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
const int dictBufOffset, const bool isUpdatable) const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)), mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
mDictionaryFormat)) {} mDictionaryFormat)) {
logDictionaryInfo(env);
}
AK_FORCE_INLINE const uint8_t *getDictBuf() const { AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf; return mDictBuf;
@ -88,6 +92,33 @@ class BinaryDictionaryInfo {
const BinaryDictionaryHeader mDictionaryHeader; const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy; const DictionaryStructurePolicy *const mStructurePolicy;
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
const int BUFFER_SIZE = 16;
int dictionaryIdCodePointBuffer[BUFFER_SIZE];
int versionStringCodePointBuffer[BUFFER_SIZE];
int dateStringCodePointBuffer[BUFFER_SIZE];
mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
dictionaryIdCodePointBuffer, BUFFER_SIZE);
mDictionaryHeader.readHeaderValueOrQuestionMark("version",
versionStringCodePointBuffer, BUFFER_SIZE);
mDictionaryHeader.readHeaderValueOrQuestionMark("date",
dateStringCodePointBuffer, BUFFER_SIZE);
char dictionaryIdCharBuffer[BUFFER_SIZE];
char versionStringCharBuffer[BUFFER_SIZE];
char dateStringCharBuffer[BUFFER_SIZE];
intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
dictionaryIdCharBuffer, BUFFER_SIZE);
intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
versionStringCharBuffer, BUFFER_SIZE);
intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
dateStringCharBuffer, BUFFER_SIZE);
LogUtils::logToJava(env,
"Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
}
}; };
} }
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */

View File

@ -22,6 +22,7 @@
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/session/dic_traverse_session.h"
@ -32,8 +33,9 @@
namespace latinime { namespace latinime {
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
: mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd, int dictBufOffset, bool isUpdatable)
: mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable), dictBufOffset, isUpdatable),
mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),

View File

@ -20,6 +20,7 @@
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime { namespace latinime {
@ -52,7 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable); Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
bool isUpdatable);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,

View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_LOG_UTILS_H
#define LATINIME_LOG_UTILS_H
#include <cstdio>
#include <stdarg.h>
#include "defines.h"
#include "jni.h"
namespace latinime {
class LogUtils {
public:
static void logToJava(JNIEnv *const env, const char *const format, ...)
#ifdef __GNUC__
__attribute__ ((format (printf, 2, 3)))
#endif // __GNUC__
{
static const char *TAG = "LatinIME:LogUtils";
const jclass androidUtilLogClass = env->FindClass("android/util/Log");
const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i",
"(Ljava/lang/String;Ljava/lang/String;)I");
const jstring javaTag = env->NewStringUTF(TAG);
va_list argList;
va_start(argList, format);
// Get the necessary size. Add 1 for the 0 terminator.
const int size = vsnprintf(0, 0, format, argList) + 1;
va_end(argList);
char cString[size];
va_start(argList, format);
vsnprintf(cString, size, format, argList);
va_end(argList);
jstring javaString = env->NewStringUTF(cString);
env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString);
env->DeleteLocalRef(javaString);
env->DeleteLocalRef(javaTag);
env->DeleteLocalRef(androidUtilLogClass);
}
private:
DISALLOW_COPY_AND_ASSIGN(LogUtils);
};
} // namespace latinime
#endif // LATINIME_LOG_UTILS_H