LatinIME/native/jni/src/dictionary/property/ngram_context.h

79 lines
2.9 KiB
C++

/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_NGRAM_CONTEXT_H
#define LATINIME_NGRAM_CONTEXT_H
#include <array>
#include "defines.h"
#include "utils/int_array_view.h"
namespace latinime {
class DictionaryStructureWithBufferPolicy;
class NgramContext {
public:
// No prev word information.
NgramContext();
// Copy constructor to use this class with std::vector and use this class as a return value.
NgramContext(const NgramContext &ngramContext);
// Construct from previous words.
NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
const size_t prevWordCount);
// Construct from a previous word.
NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence);
size_t getPrevWordCount() const {
return mPrevWordCount;
}
bool isValid() const;
template<size_t N>
const WordIdArrayView getPrevWordIds(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
}
return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
}
// n is 1-indexed.
const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
// n is 1-indexed.
bool isNthPrevWordBeginningOfSentence(const size_t n) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
void clear();
const size_t mPrevWordCount;
int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_NGRAM_CONTEXT_H