* commit '40ab25cab0e723f34dc2f1442f414761149b2338': Add a JNI to get the frequency
This commit is contained in:
commit
9a989492db
6 changed files with 25 additions and 11 deletions
|
@ -84,7 +84,7 @@ public class BinaryDictionary extends Dictionary {
|
||||||
private native long openNative(String sourceDir, long dictOffset, long dictSize,
|
private native long openNative(String sourceDir, long dictOffset, long dictSize,
|
||||||
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
||||||
private native void closeNative(long dict);
|
private native void closeNative(long dict);
|
||||||
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
|
private native int getFrequencyNative(long dict, int[] word, int wordLength);
|
||||||
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
|
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
|
||||||
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
||||||
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
||||||
|
@ -203,7 +203,8 @@ public class BinaryDictionary extends Dictionary {
|
||||||
public boolean isValidWord(CharSequence word) {
|
public boolean isValidWord(CharSequence word) {
|
||||||
if (word == null) return false;
|
if (word == null) return false;
|
||||||
int[] chars = StringUtils.toCodePointArray(word.toString());
|
int[] chars = StringUtils.toCodePointArray(word.toString());
|
||||||
return isValidWordNative(mNativeDict, chars, chars.length);
|
final int freq = getFrequencyNative(mNativeDict, chars, chars.length);
|
||||||
|
return freq >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
||||||
|
|
|
@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
|
static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict,
|
||||||
jintArray wordArray, jint wordLength) {
|
jintArray wordArray, jint wordLength) {
|
||||||
Dictionary *dictionary = (Dictionary*)dict;
|
Dictionary *dictionary = (Dictionary*)dict;
|
||||||
if (!dictionary) return (jboolean) false;
|
if (!dictionary) return (jboolean) false;
|
||||||
jint *word = env->GetIntArrayElements(wordArray, 0);
|
jint *word = env->GetIntArrayElements(wordArray, 0);
|
||||||
jboolean result = dictionary->isValidWord(word, wordLength);
|
jint result = dictionary->getFrequency(word, wordLength);
|
||||||
env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
|
env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = {
|
||||||
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
||||||
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
||||||
(void*)latinime_BinaryDictionary_getSuggestions},
|
(void*)latinime_BinaryDictionary_getSuggestions},
|
||||||
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
{"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency},
|
||||||
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
|
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
|
||||||
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
||||||
{"calcNormalizedScoreNative", "([CI[CII)F",
|
{"calcNormalizedScoreNative", "([CI[CII)F",
|
||||||
|
|
|
@ -55,8 +55,8 @@ Dictionary::~Dictionary() {
|
||||||
delete mBigramDictionary;
|
delete mBigramDictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidWord(const int32_t *word, int length) {
|
int Dictionary::getFrequency(const int32_t *word, int length) {
|
||||||
return mUnigramDictionary->isValidWord(word, length);
|
return mUnigramDictionary->getFrequency(word, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
||||||
|
|
|
@ -52,7 +52,7 @@ class Dictionary {
|
||||||
maxWordLength, maxBigrams);
|
maxWordLength, maxBigrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidWord(const int32_t *word, int length);
|
int getFrequency(const int32_t *word, int length);
|
||||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
||||||
void *getDict() { return (void *)mDict; }
|
void *getDict() { return (void *)mDict; }
|
||||||
int getDictSize() { return mDictSize; }
|
int getDictSize() { return mDictSize; }
|
||||||
|
|
|
@ -748,8 +748,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
|
||||||
return maxFreq;
|
return maxFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
|
int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
|
||||||
return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
|
const uint8_t* const root = DICT_ROOT;
|
||||||
|
int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
|
||||||
|
if (NOT_VALID_WORD == pos) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
|
const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
|
if (hasMultipleChars) {
|
||||||
|
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||||
|
} else {
|
||||||
|
BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
|
||||||
|
}
|
||||||
|
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
|
||||||
|
return unigramFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove this function.
|
// TODO: remove this function.
|
||||||
|
|
|
@ -72,7 +72,7 @@ class UnigramDictionary {
|
||||||
|
|
||||||
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
||||||
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
||||||
bool isValidWord(const int32_t* const inWord, const int length) const;
|
int getFrequency(const int32_t* const inWord, const int length) const;
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||||
Correction *correction, const int *xcoordinates, const int *ycoordinates,
|
Correction *correction, const int *xcoordinates, const int *ycoordinates,
|
||||||
|
|
Loading…
Reference in a new issue