Refactoring: Move some methods to the reading helper.

To handle multiple dictionary formats in helpers.

Bug: 11073222
Change-Id: I376bbdea934e13588feca02b9865105742f1a058
This commit is contained in:
Keisuke Kuroyanagi 2013-10-24 18:02:39 -07:00
parent 8e9df884aa
commit 3f0a8e3fb0
3 changed files with 93 additions and 81 deletions

View file

@ -77,95 +77,17 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount];
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
// First, read the terminal node and get its probability.
readingHelper.initWithPtNodePos(ptNodePos);
const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
// Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store terminal node probability.
*outUnigramProbability = terminalPtNodeParams.getProbability();
// Then, following parent node link to the dictionary root and fetch node code points.
int totalCodePointCount = 0;
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
// The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store node code points to buffer in the reverse order.
readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
// Follow parent node toward the root node.
readingHelper.readParentNode(ptNodeParams);
}
if (readingHelper.isError()) {
// The node position or the dictionary is invalid.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Reverse the stored code points to output them.
for (int i = 0; i < totalCodePointCount; ++i) {
outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
}
return totalCodePointCount;
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
outCodePoints, outUnigramProbability);
}
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
|| !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
searchCodePoints[matchedCodePointCount])) {
// Current node has too many code points or its first code point is different from
// target code point. Skip this node and read the next sibling node.
readingHelper.readNextSiblingNode(ptNodeParams);
continue;
}
// Check following merged node code points.
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) {
if (!readingHelper.isMatchedCodePoint(ptNodeParams,
j, searchCodePoints[matchedCodePointCount + j])) {
// Different code point is found. The given word is not included in the dictionary.
return NOT_A_DICT_POS;
}
}
// All characters are matched.
if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
// Terminal position is found.
return ptNodeParams.getHeadPos();
}
if (!ptNodeParams.hasChildren()) {
return NOT_A_DICT_POS;
}
// Advance to the children nodes.
readingHelper.readChildNode(ptNodeParams);
}
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
return NOT_A_DICT_POS;
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,

View file

@ -19,6 +19,7 @@
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "utils/char_utils.h"
namespace latinime {
@ -168,6 +169,89 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
return !isError();
}
int DynamicPatriciaTrieReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount];
const PtNodeParams terminalPtNodeParams(getPtNodeParams());
// First, read the terminal node and get its probability.
if (!isValidTerminalNode(terminalPtNodeParams)) {
// Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store terminal node probability.
*outUnigramProbability = terminalPtNodeParams.getProbability();
// Then, following parent node link to the dictionary root and fetch node code points.
int totalCodePointCount = 0;
while (!isEnd()) {
const PtNodeParams ptNodeParams(getPtNodeParams());
totalCodePointCount = getTotalCodePointCount(ptNodeParams);
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
// The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store node code points to buffer in the reverse order.
fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
reverseCodePoints);
// Follow parent node toward the root node.
readParentNode(ptNodeParams);
}
if (isError()) {
// The node position or the dictionary is invalid.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Reverse the stored code points to output them.
for (int i = 0; i < totalCodePointCount; ++i) {
outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
}
return totalCodePointCount;
}
int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
}
while (!isEnd()) {
const PtNodeParams ptNodeParams(getPtNodeParams());
const int matchedCodePointCount = getPrevTotalCodePointCount();
if (getTotalCodePointCount(ptNodeParams) > length
|| !isMatchedCodePoint(ptNodeParams, 0 /* index */,
searchCodePoints[matchedCodePointCount])) {
// Current node has too many code points or its first code point is different from
// target code point. Skip this node and read the next sibling node.
readNextSiblingNode(ptNodeParams);
continue;
}
// Check following merged node code points.
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) {
if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
// Different code point is found. The given word is not included in the dictionary.
return NOT_A_DICT_POS;
}
}
// All characters are matched.
if (length == getTotalCodePointCount(ptNodeParams)) {
// Terminal position is found.
return ptNodeParams.getHeadPos();
}
if (!ptNodeParams.hasChildren()) {
return NOT_A_DICT_POS;
}
// Advance to the children nodes.
readChildNode(ptNodeParams);
}
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
return NOT_A_DICT_POS;
}
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {

View file

@ -198,6 +198,12 @@ class DynamicPatriciaTrieReadingHelper {
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
int *const outCodePoints, int *const outUnigramProbability);
int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
const bool forceLowerCaseSearch);
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);