Merge "Pass words as int[] to the native code."
commit
87f7fa12db
|
@ -83,11 +83,11 @@ public class BinaryDictionary extends Dictionary {
|
||||||
private native long openNative(String sourceDir, long dictOffset, long dictSize,
|
private native long openNative(String sourceDir, long dictOffset, long dictSize,
|
||||||
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
||||||
private native void closeNative(long dict);
|
private native void closeNative(long dict);
|
||||||
private native boolean isValidWordNative(long dict, char[] word, int wordLength);
|
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
|
||||||
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
||||||
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
||||||
boolean useFullEditDistance, char[] outputChars, int[] scores);
|
boolean useFullEditDistance, char[] outputChars, int[] scores);
|
||||||
private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
|
private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength,
|
||||||
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
|
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
|
||||||
int maxWordLength, int maxBigrams);
|
int maxWordLength, int maxBigrams);
|
||||||
private static native double calcNormalizedScoreNative(
|
private static native double calcNormalizedScoreNative(
|
||||||
|
@ -105,7 +105,7 @@ public class BinaryDictionary extends Dictionary {
|
||||||
final WordCallback callback) {
|
final WordCallback callback) {
|
||||||
if (mNativeDict == 0) return;
|
if (mNativeDict == 0) return;
|
||||||
|
|
||||||
char[] chars = previousWord.toString().toCharArray();
|
int[] codePoints = StringUtils.toCodePointArray(previousWord.toString());
|
||||||
Arrays.fill(mOutputChars_bigrams, (char) 0);
|
Arrays.fill(mOutputChars_bigrams, (char) 0);
|
||||||
Arrays.fill(mBigramScores, 0);
|
Arrays.fill(mBigramScores, 0);
|
||||||
|
|
||||||
|
@ -115,8 +115,8 @@ public class BinaryDictionary extends Dictionary {
|
||||||
mInputCodes[0] = codes.getCodeAt(0);
|
mInputCodes[0] = codes.getCodeAt(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
|
int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes,
|
||||||
mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
|
codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
|
||||||
if (count > MAX_BIGRAMS) {
|
if (count > MAX_BIGRAMS) {
|
||||||
count = MAX_BIGRAMS;
|
count = MAX_BIGRAMS;
|
||||||
}
|
}
|
||||||
|
@ -200,7 +200,7 @@ public class BinaryDictionary extends Dictionary {
|
||||||
@Override
|
@Override
|
||||||
public boolean isValidWord(CharSequence word) {
|
public boolean isValidWord(CharSequence word) {
|
||||||
if (word == null) return false;
|
if (word == null) return false;
|
||||||
char[] chars = word.toString().toCharArray();
|
int[] chars = StringUtils.toCodePointArray(word.toString());
|
||||||
return isValidWordNative(mNativeDict, chars, chars.length);
|
return isValidWordNative(mNativeDict, chars, chars.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -153,30 +153,30 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
|
static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
|
||||||
jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
|
jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
|
||||||
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) {
|
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) {
|
||||||
Dictionary *dictionary = (Dictionary*)dict;
|
Dictionary *dictionary = (Dictionary*)dict;
|
||||||
if (!dictionary) return 0;
|
if (!dictionary) return 0;
|
||||||
jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0);
|
jint *prevWord = env->GetIntArrayElements(prevWordArray, 0);
|
||||||
int *inputCodes = env->GetIntArrayElements(inputArray, 0);
|
int *inputCodes = env->GetIntArrayElements(inputArray, 0);
|
||||||
jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
|
jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
|
||||||
int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
|
int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
|
||||||
int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
|
int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes,
|
||||||
inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
|
inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
|
||||||
env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
|
env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
|
||||||
env->ReleaseCharArrayElements(outputArray, outputChars, 0);
|
env->ReleaseCharArrayElements(outputArray, outputChars, 0);
|
||||||
env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
|
env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
|
||||||
env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT);
|
env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
|
static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
|
||||||
jcharArray wordArray, jint wordLength) {
|
jintArray wordArray, jint wordLength) {
|
||||||
Dictionary *dictionary = (Dictionary*)dict;
|
Dictionary *dictionary = (Dictionary*)dict;
|
||||||
if (!dictionary) return (jboolean) false;
|
if (!dictionary) return (jboolean) false;
|
||||||
jchar *word = env->GetCharArrayElements(wordArray, 0);
|
jint *word = env->GetIntArrayElements(wordArray, 0);
|
||||||
jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
|
jboolean result = dictionary->isValidWord(word, wordLength);
|
||||||
env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
|
env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,8 +236,8 @@ static JNINativeMethod sMethods[] = {
|
||||||
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
||||||
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
||||||
(void*)latinime_BinaryDictionary_getSuggestions},
|
(void*)latinime_BinaryDictionary_getSuggestions},
|
||||||
{"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
||||||
{"getBigramsNative", "(J[CI[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
||||||
{"calcNormalizedScoreNative", "([CI[CII)D",
|
{"calcNormalizedScoreNative", "([CI[CII)D",
|
||||||
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
||||||
{"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
|
{"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
|
||||||
|
|
|
@ -96,7 +96,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
||||||
* and the bigrams are used to boost unigram result scores, it makes little sense to
|
* and the bigrams are used to boost unigram result scores, it makes little sense to
|
||||||
* reduce their scope to the ones that match the first letter.
|
* reduce their scope to the ones that match the first letter.
|
||||||
*/
|
*/
|
||||||
int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes,
|
int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes,
|
||||||
int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
|
int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
|
||||||
int maxBigrams) {
|
int maxBigrams) {
|
||||||
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
|
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
|
||||||
|
@ -134,7 +134,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
|
||||||
// Returns a pointer to the start of the bigram list.
|
// Returns a pointer to the start of the bigram list.
|
||||||
// If the word is not found or has no bigrams, this function returns 0.
|
// If the word is not found or has no bigrams, this function returns 0.
|
||||||
int BigramDictionary::getBigramListForWord(const uint8_t* const root,
|
int BigramDictionary::getBigramListForWord(const uint8_t* const root,
|
||||||
const unsigned short *prevWord, const int prevWordLength) {
|
const int32_t *prevWord, const int prevWordLength) {
|
||||||
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
|
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
|
||||||
|
|
||||||
if (NOT_VALID_WORD == pos) return 0;
|
if (NOT_VALID_WORD == pos) return 0;
|
||||||
|
|
|
@ -25,10 +25,10 @@ class Dictionary;
|
||||||
class BigramDictionary {
|
class BigramDictionary {
|
||||||
public:
|
public:
|
||||||
BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
|
BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
|
||||||
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
||||||
int getBigramListForWord(const uint8_t* const root,
|
int getBigramListForWord(const uint8_t* const root,
|
||||||
const unsigned short *prevWord, const int prevWordLength);
|
const int32_t *prevWord, const int prevWordLength);
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
private:
|
private:
|
||||||
bool addWordBigram(unsigned short *word, int length, int frequency);
|
bool addWordBigram(unsigned short *word, int length, int frequency);
|
||||||
|
|
|
@ -62,7 +62,7 @@ class BinaryFormat {
|
||||||
static bool hasChildrenInFlags(const uint8_t flags);
|
static bool hasChildrenInFlags(const uint8_t flags);
|
||||||
static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
|
static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
|
||||||
int *pos);
|
int *pos);
|
||||||
static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord,
|
static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
|
||||||
const int length);
|
const int length);
|
||||||
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
|
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
|
||||||
uint16_t* outWord);
|
uint16_t* outWord);
|
||||||
|
@ -304,7 +304,7 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con
|
||||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||||
inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
|
inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
|
||||||
const uint16_t* const inWord, const int length) {
|
const int32_t* const inWord, const int length) {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
|
||||||
|
@ -313,7 +313,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
if (wordPos > length) return NOT_VALID_WORD;
|
if (wordPos > length) return NOT_VALID_WORD;
|
||||||
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
||||||
const uint16_t wChar = inWord[wordPos];
|
const int32_t wChar = inWord[wordPos];
|
||||||
while (true) {
|
while (true) {
|
||||||
// If there are no more character groups in this node, it means we could not
|
// If there are no more character groups in this node, it means we could not
|
||||||
// find a matching character for this depth, therefore there is no match.
|
// find a matching character for this depth, therefore there is no match.
|
||||||
|
|
|
@ -54,7 +54,7 @@ Dictionary::~Dictionary() {
|
||||||
delete mBigramDictionary;
|
delete mBigramDictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidWord(unsigned short *word, int length) {
|
bool Dictionary::isValidWord(const int32_t *word, int length) {
|
||||||
return mUnigramDictionary->isValidWord(word, length);
|
return mUnigramDictionary->isValidWord(word, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,13 +40,13 @@ class Dictionary {
|
||||||
codesSize, useFullEditDistance, outWords, frequencies);
|
codesSize, useFullEditDistance, outWords, frequencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
|
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
|
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
|
||||||
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
||||||
maxWordLength, maxBigrams);
|
maxWordLength, maxBigrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidWord(unsigned short *word, int length);
|
bool isValidWord(const int32_t *word, int length);
|
||||||
void *getDict() { return (void *)mDict; }
|
void *getDict() { return (void *)mDict; }
|
||||||
int getDictSize() { return mDictSize; }
|
int getDictSize() { return mDictSize; }
|
||||||
int getMmapFd() { return mMmapFd; }
|
int getMmapFd() { return mMmapFd; }
|
||||||
|
|
|
@ -730,7 +730,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
|
||||||
return maxFreq;
|
return maxFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const {
|
bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
|
||||||
return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
|
return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,7 +71,7 @@ class UnigramDictionary {
|
||||||
|
|
||||||
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
||||||
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
||||||
bool isValidWord(const uint16_t* const inWord, const int length) const;
|
bool isValidWord(const int32_t* const inWord, const int length) const;
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||||
Correction *correction, const int *xcoordinates,
|
Correction *correction, const int *xcoordinates,
|
||||||
|
|
Loading…
Reference in New Issue