2011-07-15 04:49:00 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2011 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef LATINIME_CORRECTION_STATE_H
|
|
|
|
#define LATINIME_CORRECTION_STATE_H
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#include "defines.h"
|
|
|
|
|
|
|
|
namespace latinime {
|
|
|
|
|
|
|
|
class ProximityInfo;
|
|
|
|
|
|
|
|
class CorrectionState {
|
2011-08-01 10:35:27 +00:00
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
public:
|
2011-08-03 14:27:32 +00:00
|
|
|
typedef enum {
|
2011-08-04 09:31:57 +00:00
|
|
|
TRAVERSE_ALL_ON_TERMINAL,
|
|
|
|
TRAVERSE_ALL_NOT_ON_TERMINAL,
|
2011-08-03 14:27:32 +00:00
|
|
|
UNRELATED,
|
2011-08-04 09:31:57 +00:00
|
|
|
ON_TERMINAL,
|
|
|
|
NOT_ON_TERMINAL
|
2011-08-03 14:27:32 +00:00
|
|
|
} CorrectionStateType;
|
|
|
|
|
2011-08-01 10:35:27 +00:00
|
|
|
CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier);
|
2011-08-04 09:31:57 +00:00
|
|
|
void initCorrectionState(
|
|
|
|
const ProximityInfo *pi, const int inputLength, const int maxWordLength);
|
2011-08-01 10:35:27 +00:00
|
|
|
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
|
|
|
|
const int spaceProximityPos, const int missingSpacePos);
|
2011-07-15 04:49:00 +00:00
|
|
|
void checkState();
|
2011-08-04 09:31:57 +00:00
|
|
|
void initProcessState(const int matchCount, const int inputIndex, const int outputIndex,
|
|
|
|
const bool traverseAllNodes, const int diffs);
|
|
|
|
void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
|
|
|
|
bool *traverseAllNodes, int *diffs);
|
2011-08-03 14:27:32 +00:00
|
|
|
int getOutputIndex();
|
|
|
|
int getInputIndex();
|
2011-08-04 09:31:57 +00:00
|
|
|
bool needsToTraverseAll();
|
2011-08-03 14:27:32 +00:00
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
virtual ~CorrectionState();
|
2011-08-04 09:31:57 +00:00
|
|
|
int getSpaceProximityPos() const {
|
|
|
|
return mSpaceProximityPos;
|
|
|
|
}
|
|
|
|
int getMissingSpacePos() const {
|
|
|
|
return mMissingSpacePos;
|
|
|
|
}
|
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
int getSkipPos() const {
|
|
|
|
return mSkipPos;
|
|
|
|
}
|
2011-08-04 09:31:57 +00:00
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
int getExcessivePos() const {
|
|
|
|
return mExcessivePos;
|
|
|
|
}
|
2011-08-04 09:31:57 +00:00
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
int getTransposedPos() const {
|
|
|
|
return mTransposedPos;
|
|
|
|
}
|
2011-08-04 09:31:57 +00:00
|
|
|
|
|
|
|
bool needsToPrune() const;
|
|
|
|
|
2011-08-01 10:35:27 +00:00
|
|
|
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
|
2011-08-04 09:31:57 +00:00
|
|
|
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
|
|
|
|
|
|
|
|
CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
2011-08-01 10:35:27 +00:00
|
|
|
|
2011-08-04 09:31:57 +00:00
|
|
|
int getDiffs() const {
|
|
|
|
return mDiffs;
|
|
|
|
}
|
2011-07-15 04:49:00 +00:00
|
|
|
private:
|
2011-08-04 09:31:57 +00:00
|
|
|
void charMatched();
|
|
|
|
void incrementInputIndex();
|
|
|
|
void incrementOutputIndex();
|
|
|
|
void startTraverseAll();
|
|
|
|
|
|
|
|
// TODO: remove
|
|
|
|
|
|
|
|
void incrementDiffs() {
|
|
|
|
++mDiffs;
|
|
|
|
}
|
2011-08-01 10:35:27 +00:00
|
|
|
|
|
|
|
const int TYPED_LETTER_MULTIPLIER;
|
|
|
|
const int FULL_WORD_MULTIPLIER;
|
2011-08-02 17:19:44 +00:00
|
|
|
|
2011-07-15 04:49:00 +00:00
|
|
|
const ProximityInfo *mProximityInfo;
|
2011-08-04 09:31:57 +00:00
|
|
|
|
|
|
|
int mMaxEditDistance;
|
|
|
|
int mMaxDepth;
|
2011-07-15 04:49:00 +00:00
|
|
|
int mInputLength;
|
|
|
|
int mSkipPos;
|
|
|
|
int mExcessivePos;
|
|
|
|
int mTransposedPos;
|
2011-08-01 10:35:27 +00:00
|
|
|
int mSpaceProximityPos;
|
|
|
|
int mMissingSpacePos;
|
|
|
|
|
2011-08-02 17:19:44 +00:00
|
|
|
int mMatchedCharCount;
|
2011-08-03 14:27:32 +00:00
|
|
|
int mInputIndex;
|
|
|
|
int mOutputIndex;
|
2011-08-04 09:31:57 +00:00
|
|
|
int mDiffs;
|
|
|
|
bool mTraverseAllNodes;
|
|
|
|
CorrectionStateType mCurrentStateType;
|
|
|
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
|
|
|
|
|
|
|
inline bool needsToSkipCurrentNode(const unsigned short c);
|
2011-08-02 17:19:44 +00:00
|
|
|
|
2011-08-01 10:35:27 +00:00
|
|
|
class RankingAlgorithm {
|
|
|
|
public:
|
|
|
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
|
|
|
const int matchCount, const int freq, const bool sameLength,
|
|
|
|
const CorrectionState* correctionState);
|
|
|
|
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
|
|
|
const CorrectionState* correctionState);
|
|
|
|
};
|
2011-07-15 04:49:00 +00:00
|
|
|
};
|
|
|
|
} // namespace latinime
|
|
|
|
#endif // LATINIME_CORRECTION_INFO_H
|