am 41b77116: Merge "Refactoring: Move some methods to the reading helper."
* commit '41b77116d30e38314a20daf5694f4d2d32b24002': Refactoring: Move some methods to the reading helper.main
commit
6f48fe456f
|
@ -77,95 +77,17 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons
|
||||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
|
||||||
// node code points are stored in the buffer in the reverse order.
|
|
||||||
int reverseCodePoints[maxCodePointCount];
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
// First, read the terminal node and get its probability.
|
|
||||||
readingHelper.initWithPtNodePos(ptNodePos);
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
|
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
|
||||||
const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
|
outCodePoints, outUnigramProbability);
|
||||||
if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
|
|
||||||
// Node at the ptNodePos is not a valid terminal node.
|
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// Store terminal node probability.
|
|
||||||
*outUnigramProbability = terminalPtNodeParams.getProbability();
|
|
||||||
// Then, following parent node link to the dictionary root and fetch node code points.
|
|
||||||
int totalCodePointCount = 0;
|
|
||||||
while (!readingHelper.isEnd()) {
|
|
||||||
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
|
||||||
totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
|
|
||||||
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
|
|
||||||
// The ptNodePos is not a valid terminal node position in the dictionary.
|
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// Store node code points to buffer in the reverse order.
|
|
||||||
readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
|
|
||||||
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
|
|
||||||
// Follow parent node toward the root node.
|
|
||||||
readingHelper.readParentNode(ptNodeParams);
|
|
||||||
}
|
|
||||||
if (readingHelper.isError()) {
|
|
||||||
// The node position or the dictionary is invalid.
|
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// Reverse the stored code points to output them.
|
|
||||||
for (int i = 0; i < totalCodePointCount; ++i) {
|
|
||||||
outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
|
|
||||||
}
|
|
||||||
return totalCodePointCount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
int searchCodePoints[length];
|
|
||||||
for (int i = 0; i < length; ++i) {
|
|
||||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
while (!readingHelper.isEnd()) {
|
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
|
||||||
if (!ptNodeParams.isValid()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
|
||||||
if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
|
|
||||||
|| !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
|
||||||
searchCodePoints[matchedCodePointCount])) {
|
|
||||||
// Current node has too many code points or its first code point is different from
|
|
||||||
// target code point. Skip this node and read the next sibling node.
|
|
||||||
readingHelper.readNextSiblingNode(ptNodeParams);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Check following merged node code points.
|
|
||||||
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
|
||||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
|
||||||
if (!readingHelper.isMatchedCodePoint(ptNodeParams,
|
|
||||||
j, searchCodePoints[matchedCodePointCount + j])) {
|
|
||||||
// Different code point is found. The given word is not included in the dictionary.
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// All characters are matched.
|
|
||||||
if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
|
|
||||||
// Terminal position is found.
|
|
||||||
return ptNodeParams.getHeadPos();
|
|
||||||
}
|
|
||||||
if (!ptNodeParams.hasChildren()) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
// Advance to the children nodes.
|
|
||||||
readingHelper.readChildNode(ptNodeParams);
|
|
||||||
}
|
|
||||||
// If we already traversed the tree further than the word is long, there means
|
|
||||||
// there was no match (or we would have found it).
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "utils/char_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -168,6 +169,89 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
||||||
return !isError();
|
return !isError();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int DynamicPatriciaTrieReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
|
||||||
|
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||||
|
// node code points are stored in the buffer in the reverse order.
|
||||||
|
int reverseCodePoints[maxCodePointCount];
|
||||||
|
const PtNodeParams terminalPtNodeParams(getPtNodeParams());
|
||||||
|
// First, read the terminal node and get its probability.
|
||||||
|
if (!isValidTerminalNode(terminalPtNodeParams)) {
|
||||||
|
// Node at the ptNodePos is not a valid terminal node.
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// Store terminal node probability.
|
||||||
|
*outUnigramProbability = terminalPtNodeParams.getProbability();
|
||||||
|
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||||
|
int totalCodePointCount = 0;
|
||||||
|
while (!isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(getPtNodeParams());
|
||||||
|
totalCodePointCount = getTotalCodePointCount(ptNodeParams);
|
||||||
|
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
|
||||||
|
// The ptNodePos is not a valid terminal node position in the dictionary.
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// Store node code points to buffer in the reverse order.
|
||||||
|
fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
|
||||||
|
reverseCodePoints);
|
||||||
|
// Follow parent node toward the root node.
|
||||||
|
readParentNode(ptNodeParams);
|
||||||
|
}
|
||||||
|
if (isError()) {
|
||||||
|
// The node position or the dictionary is invalid.
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// Reverse the stored code points to output them.
|
||||||
|
for (int i = 0; i < totalCodePointCount; ++i) {
|
||||||
|
outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
|
||||||
|
}
|
||||||
|
return totalCodePointCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
|
const int length, const bool forceLowerCaseSearch) {
|
||||||
|
int searchCodePoints[length];
|
||||||
|
for (int i = 0; i < length; ++i) {
|
||||||
|
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||||
|
}
|
||||||
|
while (!isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(getPtNodeParams());
|
||||||
|
const int matchedCodePointCount = getPrevTotalCodePointCount();
|
||||||
|
if (getTotalCodePointCount(ptNodeParams) > length
|
||||||
|
|| !isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
||||||
|
searchCodePoints[matchedCodePointCount])) {
|
||||||
|
// Current node has too many code points or its first code point is different from
|
||||||
|
// target code point. Skip this node and read the next sibling node.
|
||||||
|
readNextSiblingNode(ptNodeParams);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Check following merged node code points.
|
||||||
|
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
||||||
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
|
if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
|
||||||
|
// Different code point is found. The given word is not included in the dictionary.
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// All characters are matched.
|
||||||
|
if (length == getTotalCodePointCount(ptNodeParams)) {
|
||||||
|
// Terminal position is found.
|
||||||
|
return ptNodeParams.getHeadPos();
|
||||||
|
}
|
||||||
|
if (!ptNodeParams.hasChildren()) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
// Advance to the children nodes.
|
||||||
|
readChildNode(ptNodeParams);
|
||||||
|
}
|
||||||
|
// If we already traversed the tree further than the word is long, there means
|
||||||
|
// there was no match (or we would have found it).
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
|
||||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||||
// method to avoid an infinite loop.
|
// method to avoid an infinite loop.
|
||||||
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||||
|
|
|
@ -198,6 +198,12 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||||
TraversingEventListener *const listener);
|
TraversingEventListener *const listener);
|
||||||
|
|
||||||
|
int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
|
||||||
|
int *const outCodePoints, int *const outUnigramProbability);
|
||||||
|
|
||||||
|
int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
|
||||||
|
const bool forceLowerCaseSearch);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue