Rename BigramProperty to NgramProperty.

Remaining work is changing bigram to ngram for supporting
ngram entry counting, dumping, and migration.

Bug: 14425059
Change-Id: Ifba288a1166996d62a5e57698f63537ea0a2a8ee
This commit is contained in:
Keisuke Kuroyanagi 2014-09-29 19:10:39 +09:00
parent 6c7a85cb35
commit 79bb37d499
26 changed files with 96 additions and 101 deletions

View file

@ -403,10 +403,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
jsize wordLength = env->GetArrayLength(word); jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength]; int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the bigram has inputted. // Use 1 for count to indicate the ngram has inputted.
const BigramProperty bigramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, timestamp, 0 /* level */, 1 /* count */); probability, timestamp, 0 /* level */, 1 /* count */);
return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
} }
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -501,12 +501,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
if (word0) { if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
// Use 1 for count to indicate the bigram has inputted. // Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty( const NgramProperty ngramProperty(
CodePointArrayView(word1CodePoints, word1Length).toVector(), CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, timestamp, 0 /* level */, 1 /* count */); bigramProbability, timestamp, 0 /* level */, 1 /* count */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
} }
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1; return i + 1;
@ -603,6 +603,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
} while (token != 0); } while (token != 0);
// Add bigrams. // Add bigrams.
// TODO: Support ngrams.
do { do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty( const WordProperty wordProperty = dictionary->getWordProperty(
@ -617,10 +618,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
} }
const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount, const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
wordProperty.getUnigramProperty()->representsBeginningOfSentence()); wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
&bigramProperty)) { &ngramProperty)) {
LogUtils::logToJava(env, "Cannot add bigram to the new dict."); LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false; return false;
} }
} }

View file

@ -144,9 +144,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
} }
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) { const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty);
} }
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,

View file

@ -85,7 +85,7 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints); bool removeUnigramEntry(const CodePointArrayView codePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty); const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints); const CodePointArrayView codePoints);

View file

@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef LATINIME_BIGRAM_PROPERTY_H #ifndef LATINIME_NGRAM_PROPERTY_H
#define LATINIME_BIGRAM_PROPERTY_H #define LATINIME_NGRAM_PROPERTY_H
#include <vector> #include <vector>
@ -23,10 +23,9 @@
namespace latinime { namespace latinime {
// TODO: Change to NgramProperty. class NgramProperty {
class BigramProperty {
public: public:
BigramProperty(const std::vector<int> &&targetCodePoints, const int probability, NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
const int timestamp, const int level, const int count) const int timestamp, const int level, const int count)
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count) {} mTimestamp(timestamp), mLevel(level), mCount(count) {}
@ -53,7 +52,7 @@ class BigramProperty {
private: private:
// Default copy constructor and assign operator are used for using in std::vector. // Default copy constructor and assign operator are used for using in std::vector.
DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty); DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
// TODO: Make members const. // TODO: Make members const.
std::vector<int> mTargetCodePoints; std::vector<int> mTargetCodePoints;
@ -63,4 +62,4 @@ class BigramProperty {
int mCount; int mCount;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H #endif // LATINIME_NGRAM_PROPERTY_H

View file

@ -28,7 +28,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */); false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
!mBigrams.empty(), mUnigramProperty.hasShortcuts(), !mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()}; mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
@ -42,8 +42,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
// Output bigrams. // Output bigrams.
for (const auto &bigramProperty : mBigrams) { // TODO: Support n-gram
const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); for (const auto &ngramProperty : mNgrams) {
const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints();
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
@ -51,9 +52,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
env->DeleteLocalRef(bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray);
int bigramProbabilityInfo[] = {bigramProperty.getProbability(), int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
bigramProperty.getTimestamp(), bigramProperty.getLevel(), ngramProperty.getTimestamp(), ngramProperty.getLevel(),
bigramProperty.getCount()}; ngramProperty.getCount()};
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);

View file

@ -21,7 +21,7 @@
#include "defines.h" #include "defines.h"
#include "jni.h" #include "jni.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
namespace latinime { namespace latinime {
@ -31,12 +31,12 @@ class WordProperty {
public: public:
// Default constructor is used to create an instance that indicates an invalid word. // Default constructor is used to create an instance that indicates an invalid word.
WordProperty() WordProperty()
: mCodePoints(), mUnigramProperty(), mBigrams() {} : mCodePoints(), mUnigramProperty(), mNgrams() {}
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
const std::vector<BigramProperty> *const bigrams) const std::vector<NgramProperty> *const bigrams)
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
mBigrams(*bigrams) {} mNgrams(*bigrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
@ -46,8 +46,8 @@ class WordProperty {
return &mUnigramProperty; return &mUnigramProperty;
} }
const std::vector<BigramProperty> *getBigramProperties() const { const std::vector<NgramProperty> *getNgramProperties() const {
return &mBigrams; return &mNgrams;
} }
private: private:
@ -56,7 +56,7 @@ class WordProperty {
const std::vector<int> mCodePoints; const std::vector<int> mCodePoints;
const UnigramProperty mUnigramProperty; const UnigramProperty mUnigramProperty;
const std::vector<BigramProperty> mBigrams; const std::vector<NgramProperty> mNgrams;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H #endif // LATINIME_WORD_PROPERTY_H

View file

@ -81,7 +81,7 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) = 0; const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,

View file

@ -24,7 +24,7 @@
#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
@ -60,7 +60,7 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
} }
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet. // 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list. // 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed. // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
@ -79,7 +79,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId); newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
bigramProperty); ngramProperty);
// Write an entry. // Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
@ -112,7 +112,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId); newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, bigramProperty); &newBigramEntry, ngramProperty);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false; return false;
} }
@ -138,7 +138,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry updatedBigramEntry = const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, bigramProperty); &updatedBigramEntry, ngramProperty);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
} }
@ -264,18 +264,18 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const BigramEntry *const originalBigramEntry,
const BigramProperty *const bigramProperty) const { const NgramProperty *const ngramProperty) const {
// TODO: Consolidate historical info and probability. // TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), const HistoricalInfo historicalInfoForUpdate(ngramProperty->getTimestamp(),
bigramProperty->getLevel(), bigramProperty->getCount()); ngramProperty->getLevel(), ngramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
&historicalInfoForUpdate, mHeaderPolicy); &historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else { } else {
return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
} }
} }

View file

@ -36,7 +36,7 @@ namespace v402 {
class BigramDictContent; class BigramDictContent;
} // namespace v402 } // namespace v402
} // namespace backward } // namespace backward
class BigramProperty; class NgramProperty;
namespace backward { namespace backward {
namespace v402 { namespace v402 {
} // namespace v402 } // namespace v402
@ -64,7 +64,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
} }
bool addNewEntry(const int terminalId, const int newTargetTerminalId, bool addNewEntry(const int terminalId, const int newTargetTerminalId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId); bool removeEntry(const int terminalId, const int targetTerminalId);
@ -80,7 +80,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int *const outTailEntryPos) const; int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const BigramProperty *const bigramProperty) const; const NgramProperty *const ngramProperty) const;
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);

View file

@ -232,8 +232,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
} }
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) { if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, ngramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d", AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
prevWordIds[0], wordId); prevWordIds[0], wordId);
return false; return false;

View file

@ -94,7 +94,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);

View file

@ -30,7 +30,7 @@
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/session/prev_words_info.h" #include "suggest/core/session/prev_words_info.h"
@ -312,7 +312,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
} }
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) { const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;
@ -326,9 +326,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
return false; return false;
} }
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. " AKLOGE("The word is too long to insert the ngram to the dictionary. "
"length: %zd", bigramProperty->getTargetCodePoints()->size()); "length: %zd", ngramProperty->getTargetCodePoints()->size());
return false; return false;
} }
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
@ -356,7 +356,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
} }
} }
const int wordPos = getTerminalPtNodePosFromWordId(getWordId( const int wordPos = getTerminalPtNodePosFromWordId(getWordId(
CodePointArrayView(*bigramProperty->getTargetCodePoints()), CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */)); false /* forceLowerCaseSearch */));
if (wordPos == NOT_A_DICT_POS) { if (wordPos == NOT_A_DICT_POS) {
return false; return false;
@ -364,7 +364,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
bool addedNewBigram = false; bool addedNewBigram = false;
const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos),
wordPos, bigramProperty, &addedNewBigram)) { wordPos, ngramProperty, &addedNewBigram)) {
if (addedNewBigram) { if (addedNewBigram) {
mBigramCount++; mBigramCount++;
} }
@ -499,7 +499,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information. // Fetch bigram information.
std::vector<BigramProperty> bigrams; std::vector<NgramProperty> ngrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
if (bigramListPos != NOT_A_DICT_POS) { if (bigramListPos != NOT_A_DICT_POS) {
int bigramWord1CodePoints[MAX_WORD_LENGTH]; int bigramWord1CodePoints[MAX_WORD_LENGTH];
@ -526,7 +526,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ForgettingCurveUtils::decodeProbability( ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability(); bigramEntry.getProbability();
bigrams.emplace_back( ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount()); historicalInfo->getCount());
@ -554,7 +554,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts); historicalInfo->getCount(), &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,

View file

@ -59,6 +59,7 @@ namespace backward {
namespace v402 { namespace v402 {
// Word id = Position of a PtNode that represents the word. // Word id = Position of a PtNode that represents the word.
// Max supported n-gram is bigram.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
@ -112,7 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty); const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints); const CodePointArrayView wordCodePoints);

View file

@ -76,6 +76,7 @@ class DynamicPtGcEventListeners {
int mValidUnigramCount; int mValidUnigramCount;
}; };
// TODO: Remove when we stop supporting v402 format.
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries. // entries.
class TraversePolicyToUpdateBigramProbability class TraversePolicyToUpdateBigramProbability

View file

@ -82,7 +82,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readi
} }
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
const int wordPos, const BigramProperty *const bigramProperty, const int wordPos, const NgramProperty *const ngramProperty,
bool *const outAddedNewEntry) { bool *const outAddedNewEntry) {
if (prevWordsPtNodePos.empty()) { if (prevWordsPtNodePos.empty()) {
return false; return false;
@ -96,7 +96,7 @@ bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPt
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId = const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry); return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, ngramProperty, outAddedNewEntry);
} }
bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,

View file

@ -23,7 +23,7 @@
namespace latinime { namespace latinime {
class BigramProperty; class NgramProperty;
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class DynamicPtReadingHelper; class DynamicPtReadingHelper;
class PtNodeReader; class PtNodeReader;
@ -46,7 +46,7 @@ class DynamicPtUpdatingHelper {
// TODO: Remove after stopping supporting v402. // TODO: Remove after stopping supporting v402.
// Add an n-gram entry. // Add an n-gram entry.
bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos, bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
// TODO: Remove after stopping supporting v402. // TODO: Remove after stopping supporting v402.
// Remove an n-gram entry. // Remove an n-gram entry.

View file

@ -25,7 +25,7 @@
namespace latinime { namespace latinime {
class BigramProperty; class NgramProperty;
class UnigramProperty; class UnigramProperty;
// Interface class used to write PtNode information. // Interface class used to write PtNode information.
@ -72,7 +72,7 @@ class PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0; const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) = 0;
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0; virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;

View file

@ -436,7 +436,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
const PtNodeParams ptNodeParams = const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
// Fetch bigram information. // Fetch bigram information.
std::vector<BigramProperty> bigrams; std::vector<NgramProperty> ngrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
int bigramWord1CodePoints[MAX_WORD_LENGTH]; int bigramWord1CodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
@ -450,7 +450,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH, getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH,
bigramWord1CodePoints, &word1Probability); bigramWord1CodePoints, &word1Probability);
const int probability = getProbability(word1Probability, bigramsIt.getProbability()); const int probability = getProbability(word1Probability, bigramsIt.getProbability());
bigrams.emplace_back( ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */); probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
} }
@ -478,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,

View file

@ -38,6 +38,7 @@ class DicNode;
class DicNodeVector; class DicNodeVector;
// Word id = Position of a PtNode that represents the word. // Word id = Position of a PtNode that represents the word.
// Max supported n-gram is bigram.
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
@ -93,7 +94,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
} }
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) { const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary. // This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;

View file

@ -21,7 +21,7 @@
#include <cstdint> #include <cstdint>
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h" #include "suggest/policyimpl/dictionary/utils/historical_info.h"
@ -56,12 +56,12 @@ class ProbabilityEntry {
mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
unigramProperty->getCount()) {} unigramProperty->getCount()) {}
// Create from bigram property. // Create from ngram property.
// TODO: Set flags. // TODO: Set flags.
ProbabilityEntry(const BigramProperty *const bigramProperty) ProbabilityEntry(const NgramProperty *const ngramProperty)
: mFlags(0), mProbability(bigramProperty->getProbability()), : mFlags(0), mProbability(ngramProperty->getProbability()),
mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(), mHistoricalInfo(ngramProperty->getTimestamp(), ngramProperty->getLevel(),
bigramProperty->getCount()) {} ngramProperty->getCount()) {}
bool isValid() const { bool isValid() const {
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;

View file

@ -61,6 +61,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
} }
} }
// TODO: Quit using bigramLinkedNodePos.
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
const PtNodeParams *const toBeUpdatedPtNodeParams, const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos) { const int movedPos, const int bigramLinkedNodePos) {
@ -208,15 +209,16 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
terminalId, &probabilityEntryToWrite); terminalId, &probabilityEntryToWrite);
} }
// TODO: Support counting ngram entries.
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { const NgramProperty *const ngramProperty, bool *const outAddedNewBigram) {
LanguageModelDictContent *const languageModelDictContent = LanguageModelDictContent *const languageModelDictContent =
mBuffers->getMutableLanguageModelDictContent(); mBuffers->getMutableLanguageModelDictContent();
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
languageModelDictContent->getNgramProbabilityEntry(prevWordIds, wordId); languageModelDictContent->getNgramProbabilityEntry(prevWordIds, wordId);
const ProbabilityEntry probabilityEntryOfBigramProperty(bigramProperty); const ProbabilityEntry probabilityEntryOfNgramProperty(ngramProperty);
const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom( const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom(
&probabilityEntry, &probabilityEntryOfBigramProperty); &probabilityEntry, &probabilityEntryOfNgramProperty);
if (!languageModelDictContent->setNgramProbabilityEntry( if (!languageModelDictContent->setNgramProbabilityEntry(
prevWordIds, wordId, &updatedProbabilityEntry)) { prevWordIds, wordId, &updatedProbabilityEntry)) {
AKLOGE("Cannot add new ngram entry. prevWordId[0]: %d, prevWordId.size(): %zd, wordId: %d", AKLOGE("Cannot add new ngram entry. prevWordId[0]: %d, prevWordId.size(): %zd, wordId: %d",

View file

@ -74,7 +74,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);

View file

@ -23,7 +23,7 @@
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/session/prev_words_info.h" #include "suggest/core/session/prev_words_info.h"
@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
} }
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) { const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;
@ -280,9 +280,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
return false; return false;
} }
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. " AKLOGE("The word is too long to insert the ngram to the dictionary. "
"length: %zd", bigramProperty->getTargetCodePoints()->size()); "length: %zd", ngramProperty->getTargetCodePoints()->size());
return false; return false;
} }
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
@ -311,13 +311,13 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
// Refresh word ids. // Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} }
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()), const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) { if (wordId == NOT_A_WORD_ID) {
return false; return false;
} }
bool addedNewEntry = false; bool addedNewEntry = false;
if (mNodeWriter.addNgramEntry(prevWordIds, wordId, bigramProperty, &addedNewEntry)) { if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) {
if (addedNewEntry) { if (addedNewEntry) {
mBigramCount++; mBigramCount++;
} }
@ -451,7 +451,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information. // Fetch bigram information.
// TODO: Support n-gram. // TODO: Support n-gram.
std::vector<BigramProperty> bigrams; std::vector<NgramProperty> ngrams;
const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId); const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId);
int bigramWord1CodePoints[MAX_WORD_LENGTH]; int bigramWord1CodePoints[MAX_WORD_LENGTH];
for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries( for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
@ -463,7 +463,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
probabilityEntry.getProbability(); probabilityEntry.getProbability();
bigrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount()); historicalInfo->getCount());
} }
@ -489,7 +489,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
probabilityEntry.getProbability(), historicalInfo->getTimeStamp(), probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,

View file

@ -37,6 +37,7 @@ namespace latinime {
class DicNode; class DicNode;
class DicNodeVector; class DicNodeVector;
// TODO: Support counting ngram entries.
// Word id = Artificial id that is stored in the PtNode looked up by the word. // Word id = Artificial id that is stored in the PtNode looked up by the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
@ -92,7 +93,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty); const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints); const CodePointArrayView wordCodePoints);

View file

@ -114,14 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false; return false;
} }
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) {
return false;
}
// Mapping from positions in mBuffer to positions in bufferToWrite. // Mapping from positions in mBuffer to positions in bufferToWrite.
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);

View file

@ -33,6 +33,7 @@ class Ver4PatriciaTrieWritingHelper {
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {} : mBuffers(buffers) {}
// TODO: Support counting ngram entries.
bool writeToDictFile(const char *const dictDirPath, const int unigramCount, bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
const int bigramCount) const; const int bigramCount) const;
@ -70,11 +71,6 @@ class Ver4PatriciaTrieWritingHelper {
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount); int *const outBigramCount);
bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
bool truncateBigrams(const int maxBigramCount);
Ver4DictBuffers *const mBuffers; Ver4DictBuffers *const mBuffers;
}; };
} // namespace latinime } // namespace latinime