LatinIME/native/jni/src/dictionary/property/ngram_context.h

/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_NGRAM_CONTEXT_H
#define LATINIME_NGRAM_CONTEXT_H

#include <array>

#include "defines.h"
#include "utils/int_array_view.h"

namespace latinime {

class DictionaryStructureWithBufferPolicy;

class NgramContext {
 public:
    // No prev word information.
    NgramContext();
    // Copy constructor to use this class with std::vector and use this class as a return value.
    NgramContext(const NgramContext &ngramContext);
    // Construct from previous words.
    NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
            const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
            const size_t prevWordCount);
    // Construct from a previous word.
    NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
            const bool isBeginningOfSentence);

    size_t getPrevWordCount() const {
        return mPrevWordCount;
    }
    bool isValid() const;

    template<size_t N>
    const WordIdArrayView getPrevWordIds(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
        for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
            prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
                    mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
        }
        return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
    }

    // n is 1-indexed.
    const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
    // n is 1-indexed.
    bool isNthPrevWordBeginningOfSentence(const size_t n) const;

 private:
    DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);

    static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            const int *const wordCodePoints, const int wordCodePointCount,
            const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
    void clear();

    const size_t mPrevWordCount;
    int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
    int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_NGRAM_CONTEXT_H
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`/*`
			`* Copyright (C) 2014 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`#ifndef LATINIME_NGRAM_CONTEXT_H`
			`#define LATINIME_NGRAM_CONTEXT_H`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00
Use passed previous word count in PrevWordsInfo. Bug: 14425059 Change-Id: I04007bdacf0176a05be7a27ef1c20c5b851d8bed 2014-09-14 08:29:38 +00:00			`#include <array>`

Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`#include "defines.h"`
Use IntArrayView for dictionary structure policy. Change-Id: I0dc94908259d70d5085ff22abf422d90affb1452 2014-08-27 11:28:43 +00:00			`#include "utils/int_array_view.h"`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00
			`namespace latinime {`

Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`class DictionaryStructureWithBufferPolicy;`

Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`class NgramContext {`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`public:`
			`// No prev word information.`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`NgramContext();`
			`// Copy constructor to use this class with std::vector and use this class as a return value.`
			`NgramContext(const NgramContext &ngramContext);`
Extend jni method to use multiple previous words. Bug: 14425059 Change-Id: I8150e8812ee32f8c713315a234b52e5b3a79d7a0 2014-06-26 09:47:25 +00:00			`// Construct from previous words.`
Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],`
Extend jni method to use multiple previous words. Bug: 14425059 Change-Id: I8150e8812ee32f8c713315a234b52e5b3a79d7a0 2014-06-26 09:47:25 +00:00			`const int const prevWordCodePointCount, const bool const isBeginningOfSentence,`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`const size_t prevWordCount);`
Extend jni method to use multiple previous words. Bug: 14425059 Change-Id: I8150e8812ee32f8c713315a234b52e5b3a79d7a0 2014-06-26 09:47:25 +00:00			`// Construct from a previous word.`
Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`const bool isBeginningOfSentence);`
Move prev word related logic to PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I1bbd7ab4ace2c475f27bc468cb7b4d67e1ae2f9f 2014-05-16 13:31:45 +00:00
Implement updateCounter() by using existing entry adding methods. Bug: 14425059 Change-Id: I0b6cb80e1fb8f738e9c7d9e80fbc0c479546b879 2014-10-01 10:59:39 +00:00			`size_t getPrevWordCount() const {`
			`return mPrevWordCount;`
			`}`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`bool isValid() const;`
Use PrevWordsInfo to add/remove n(bi)-gram in native code. Bug: 14119293 Bug: 14425059 Change-Id: I4b9a46bfd670b35195418eaee51456d44fb91b6d 2014-05-21 09:30:34 +00:00
Use passed previous word count in PrevWordsInfo. Bug: 14425059 Change-Id: I04007bdacf0176a05be7a27ef1c20c5b851d8bed 2014-09-14 08:29:38 +00:00			`template<size_t N>`
			`const WordIdArrayView getPrevWordIds(`
			`const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {`
Use passed previous word count in PrevWordsInfo. Bug: 14425059 Change-Id: I04007bdacf0176a05be7a27ef1c20c5b851d8bed 2014-09-14 08:29:38 +00:00			`for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],`
			`mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);`
Move prev word related logic to PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I1bbd7ab4ace2c475f27bc468cb7b4d67e1ae2f9f 2014-05-16 13:31:45 +00:00			`}`
Use passed previous word count in PrevWordsInfo. Bug: 14425059 Change-Id: I04007bdacf0176a05be7a27ef1c20c5b851d8bed 2014-09-14 08:29:38 +00:00			`return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`}`

Use PrevWordsInfo to add/remove n(bi)-gram in native code. Bug: 14119293 Bug: 14425059 Change-Id: I4b9a46bfd670b35195418eaee51456d44fb91b6d 2014-05-21 09:30:34 +00:00			`// n is 1-indexed.`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;`
Enable Beginning-of-Sentence prediction for contextual dict. Bug: 14161647 Bug: 14119293 Change-Id: I0c00f13966db88e4de85e245e7bced43c9d474b2 2014-06-12 03:26:18 +00:00			`// n is 1-indexed.`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`bool isNthPrevWordBeginningOfSentence(const size_t n) const;`
Enable Beginning-of-Sentence prediction for contextual dict. Bug: 14161647 Bug: 14119293 Change-Id: I0c00f13966db88e4de85e245e7bced43c9d474b2 2014-06-12 03:26:18 +00:00
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`private:`
Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00
Use word id for methods related to n-grams. Bug: 14425059 Change-Id: I81e5d3793527776d3c9faa5594005ddbd4a71354 2014-09-03 07:32:43 +00:00			`static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,`
Move prev word related logic to PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I1bbd7ab4ace2c475f27bc468cb7b4d67e1ae2f9f 2014-05-16 13:31:45 +00:00			`const int *const wordCodePoints, const int wordCodePointCount,`
Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a 2014-10-21 08:04:56 +00:00			`const bool isBeginningOfSentence, const bool tryLowerCaseSearch);`
			`void clear();`
Introduce MAX_PREV_WORD_COUNT_FOR_N_GRAM. Bug: 14425059 Change-Id: I587f90df026a14359d2ee452040bbfaf02c1ae51 2014-05-15 09:45:49 +00:00
Use passed previous word count in PrevWordsInfo. Bug: 14425059 Change-Id: I04007bdacf0176a05be7a27ef1c20c5b851d8bed 2014-09-14 08:29:38 +00:00			`const size_t mPrevWordCount;`
Extend jni method to use multiple previous words. Bug: 14425059 Change-Id: I8150e8812ee32f8c713315a234b52e5b3a79d7a0 2014-06-26 09:47:25 +00:00			`int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];`
Introduce MAX_PREV_WORD_COUNT_FOR_N_GRAM. Bug: 14425059 Change-Id: I587f90df026a14359d2ee452040bbfaf02c1ae51 2014-05-15 09:45:49 +00:00			`int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];`
			`bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];`
Refactoring: Move prev word information into PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c 2014-05-15 09:39:23 +00:00			`};`
			`} // namespace latinime`
Rename PrevWordsInfo to NgramContext. Bug: 14425059 Change-Id: I30703fc80e9450d4e2dbfec965e7f9f4468f6a11 2014-10-08 03:05:13 +00:00			`#endif // LATINIME_NGRAM_CONTEXT_H`