From 94e4cd25a8f7417d30a0832f7476d39ece1df788 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 3 Sep 2014 18:55:31 +0900 Subject: [PATCH] Use word id to get code ponits of the word. Bug: 14425059 Change-Id: I81accffcdf5abe447c33ffc3a8e8315f9a4cde7f --- .../src/suggest/core/dictionary/dictionary.cpp | 8 ++++---- .../src/suggest/core/dictionary/dictionary.h | 2 +- .../dictionary_structure_with_buffer_policy.h | 2 +- .../backward/v402/ver4_patricia_trie_policy.cpp | 10 ++++++---- .../backward/v402/ver4_patricia_trie_policy.h | 2 +- .../structure/v2/patricia_trie_policy.cpp | 17 +++++++++-------- .../structure/v2/patricia_trie_policy.h | 2 +- .../structure/v4/ver4_patricia_trie_policy.cpp | 15 +++++++-------- .../structure/v4/ver4_patricia_trie_policy.h | 2 +- 9 files changed, 31 insertions(+), 29 deletions(-) diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 956243161..8f9b2aa12 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -67,8 +67,8 @@ Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( mDictStructurePolicy(dictStructurePolicy) {} void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, - const int targetPtNodePos) { - if (targetPtNodePos == NOT_A_DICT_POS) { + const int targetWordId) { + if (targetWordId == NOT_A_WORD_ID) { return; } if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) @@ -78,8 +78,8 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi int targetWordCodePoints[MAX_WORD_LENGTH]; int unigramProbability = 0; const int codePointCount = mDictStructurePolicy-> - getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos, - MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability); + getCodePointsAndProbabilityAndReturnCodePointCount(targetWordId, MAX_WORD_LENGTH, + targetWordCodePoints, &unigramProbability); if (codePointCount <= 0) { return; } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 732d3b199..50951fbc1 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -120,7 +120,7 @@ class Dictionary { NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy); - virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); + virtual void onVisitEntry(const int ngramProbability, const int targetWordId); private: DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 36fa6e854..aeeb66f93 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -50,7 +50,7 @@ class DictionaryStructureWithBufferPolicy { DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; virtual int getWordId(const CodePointArrayView wordCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 28c5eb105..929dc3dc1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -91,9 +91,10 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d } int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); readingHelper.initWithPtNodePos(ptNodePos); const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -492,8 +493,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( // Word (unigram) probability int word1Probability = NOT_A_PROBABILITY; const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( - word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, - &word1Probability); + getWordIdFromTerminalPtNodePos(word1TerminalPtNodePos), MAX_WORD_LENGTH, + bigramWord1CodePoints, &word1Probability); const std::vector word1(bigramWord1CodePoints, bigramWord1CodePoints + codePointCount); const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); @@ -550,7 +551,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( - terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); + getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints, + &unigramProbability); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index bead2ff23..562c219f4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -86,7 +86,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 88fe3efd3..8aa7234c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -57,7 +57,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo } } -// This retrieves code points and the probability of the word by its terminal position. +// This retrieves code points and the probability of the word by its id. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, // it is possible to check for this with advantageous complexity. For each PtNode array, we search // for PtNodes with children and compare the children position with the position we look for. @@ -68,16 +68,16 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo // with a z, it's the last PtNode of the root array, so all children addresses will be smaller // than the position we look for, and we have to descend the z PtNode). /* Parameters : - * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is - * what is stored as the "bigram position" in each bigram) + * wordId: Id of the word we are searching for. * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. * outUnigramProbability: a pointer to an int to write the probability into. * Return value : the code point count, of 0 if the word was not found. */ // TODO: Split this function to be more readable int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { + const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will @@ -410,8 +410,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty( if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { int word1Probability = NOT_A_PROBABILITY; const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( - bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints, - &word1Probability); + getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH, + bigramWord1CodePoints, &word1Probability); const std::vector word1(bigramWord1CodePoints, bigramWord1CodePoints + word1CodePointCount); const int probability = getProbability(word1Probability, bigramsIt.getProbability()); @@ -465,8 +465,9 @@ int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outC } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, - MAX_WORD_LENGTH, outCodePoints, &unigramProbability); + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints, + &unigramProbability); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 08c3e1ddc..66df52779 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -61,7 +61,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index e11b94cdc..094ce4292 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -81,9 +81,11 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d } int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + const int ptNodePos = + mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); readingHelper.initWithPtNodePos(ptNodePos); const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -488,18 +490,13 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( // TODO: Support n-gram. std::vector bigrams; const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId); - const TerminalPositionLookupTable *const terminalPositionLookupTable = - mBuffers->getTerminalPositionLookupTable(); int bigramWord1CodePoints[MAX_WORD_LENGTH]; for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries( prevWordIds)) { - const int word1TerminalPtNodePos = - terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId()); // Word (unigram) probability int word1Probability = NOT_A_PROBABILITY; const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( - word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, - &word1Probability); + entry.getWordId(), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability); const std::vector word1(bigramWord1CodePoints, bigramWord1CodePoints + codePointCount); const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry(); @@ -553,9 +550,11 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const return 0; } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + const PtNodeParams ptNodeParams = + mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(terminalPtNodePos); int unigramProbability = NOT_A_PROBABILITY; *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( - terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); + ptNodeParams.getTerminalId(), MAX_WORD_LENGTH, outCodePoints, &unigramProbability); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 9dc83d8da..0b8eec40b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -63,7 +63,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + const int wordId, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;