2013-12-13 08:09:16 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2013, The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef LATINIME_PROBABILITY_ENTRY_H
|
|
|
|
#define LATINIME_PROBABILITY_ENTRY_H
|
|
|
|
|
2014-08-05 03:38:55 +00:00
|
|
|
#include <climits>
|
|
|
|
#include <cstdint>
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
#include "defines.h"
|
2014-12-17 07:02:09 +00:00
|
|
|
#include "dictionary/property/historical_info.h"
|
|
|
|
#include "dictionary/property/ngram_property.h"
|
|
|
|
#include "dictionary/property/unigram_property.h"
|
|
|
|
#include "dictionary/structure/v4/ver4_dict_constants.h"
|
2013-12-13 08:09:16 +00:00
|
|
|
|
|
|
|
namespace latinime {
|
|
|
|
|
|
|
|
class ProbabilityEntry {
|
|
|
|
public:
|
|
|
|
ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
|
|
|
|
: mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
|
|
|
|
mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
|
|
|
|
|
2020-07-31 15:33:35 +00:00
|
|
|
// Placeholder entry
|
2013-12-13 08:09:16 +00:00
|
|
|
ProbabilityEntry()
|
2014-09-16 09:10:56 +00:00
|
|
|
: mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
|
|
|
|
mHistoricalInfo() {}
|
2013-12-13 08:09:16 +00:00
|
|
|
|
|
|
|
// Entry without historical information
|
|
|
|
ProbabilityEntry(const int flags, const int probability)
|
|
|
|
: mFlags(flags), mProbability(probability), mHistoricalInfo() {}
|
|
|
|
|
|
|
|
// Entry with historical information.
|
2014-08-19 02:49:05 +00:00
|
|
|
ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
|
|
|
|
: mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
|
2013-12-13 08:09:16 +00:00
|
|
|
|
2014-08-12 11:32:42 +00:00
|
|
|
// Create from unigram property.
|
|
|
|
ProbabilityEntry(const UnigramProperty *const unigramProperty)
|
2014-09-24 05:15:34 +00:00
|
|
|
: mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
|
|
|
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
|
|
|
unigramProperty->isPossiblyOffensive())),
|
2014-08-19 02:49:05 +00:00
|
|
|
mProbability(unigramProperty->getProbability()),
|
2014-10-01 02:39:33 +00:00
|
|
|
mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
|
2014-08-12 11:32:42 +00:00
|
|
|
|
2014-09-29 10:10:39 +00:00
|
|
|
// Create from ngram property.
|
2014-08-12 11:32:42 +00:00
|
|
|
// TODO: Set flags.
|
2014-09-29 10:10:39 +00:00
|
|
|
ProbabilityEntry(const NgramProperty *const ngramProperty)
|
|
|
|
: mFlags(0), mProbability(ngramProperty->getProbability()),
|
2014-10-01 02:39:33 +00:00
|
|
|
mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
|
2014-08-12 11:32:42 +00:00
|
|
|
|
|
|
|
bool isValid() const {
|
2014-09-16 09:10:56 +00:00
|
|
|
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
|
2014-08-12 11:32:42 +00:00
|
|
|
}
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
bool hasHistoricalInfo() const {
|
|
|
|
return mHistoricalInfo.isValid();
|
|
|
|
}
|
|
|
|
|
2014-08-19 02:49:05 +00:00
|
|
|
uint8_t getFlags() const {
|
2013-12-13 08:09:16 +00:00
|
|
|
return mFlags;
|
|
|
|
}
|
|
|
|
|
|
|
|
int getProbability() const {
|
|
|
|
return mProbability;
|
|
|
|
}
|
|
|
|
|
|
|
|
const HistoricalInfo *getHistoricalInfo() const {
|
|
|
|
return &mHistoricalInfo;
|
|
|
|
}
|
|
|
|
|
2014-08-19 02:49:05 +00:00
|
|
|
bool representsBeginningOfSentence() const {
|
|
|
|
return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
|
|
|
|
}
|
|
|
|
|
2014-09-24 05:15:34 +00:00
|
|
|
bool isNotAWord() const {
|
|
|
|
return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isBlacklisted() const {
|
|
|
|
return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isPossiblyOffensive() const {
|
|
|
|
return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
|
|
|
|
}
|
|
|
|
|
2014-08-05 03:38:55 +00:00
|
|
|
uint64_t encode(const bool hasHistoricalInfo) const {
|
2014-10-15 09:23:00 +00:00
|
|
|
uint64_t encodedEntry = static_cast<uint8_t>(mFlags);
|
2014-08-05 03:38:55 +00:00
|
|
|
if (hasHistoricalInfo) {
|
|
|
|
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
|
2014-10-15 09:23:00 +00:00
|
|
|
| static_cast<uint32_t>(mHistoricalInfo.getTimestamp());
|
2014-08-05 03:38:55 +00:00
|
|
|
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
|
2014-10-15 09:23:00 +00:00
|
|
|
| static_cast<uint8_t>(mHistoricalInfo.getLevel());
|
2014-08-05 03:38:55 +00:00
|
|
|
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
|
2014-10-30 00:56:36 +00:00
|
|
|
| static_cast<uint16_t>(mHistoricalInfo.getCount());
|
2014-08-05 03:38:55 +00:00
|
|
|
} else {
|
|
|
|
encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
|
2014-10-15 09:23:00 +00:00
|
|
|
| static_cast<uint8_t>(mProbability);
|
2014-08-05 03:38:55 +00:00
|
|
|
}
|
|
|
|
return encodedEntry;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
|
|
|
|
if (hasHistoricalInfo) {
|
|
|
|
const int flags = readFromEncodedEntry(encodedEntry,
|
2014-08-12 11:32:42 +00:00
|
|
|
Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
|
2014-08-05 03:38:55 +00:00
|
|
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE
|
|
|
|
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
|
|
|
|
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
|
|
|
|
const int timestamp = readFromEncodedEntry(encodedEntry,
|
|
|
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
|
|
|
|
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
|
|
|
|
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
|
|
|
|
const int level = readFromEncodedEntry(encodedEntry,
|
|
|
|
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
|
|
|
|
Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
|
|
|
|
const int count = readFromEncodedEntry(encodedEntry,
|
|
|
|
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
|
|
|
|
const HistoricalInfo historicalInfo(timestamp, level, count);
|
2014-08-19 02:49:05 +00:00
|
|
|
return ProbabilityEntry(flags, &historicalInfo);
|
2014-08-05 03:38:55 +00:00
|
|
|
} else {
|
|
|
|
const int flags = readFromEncodedEntry(encodedEntry,
|
2014-08-12 11:32:42 +00:00
|
|
|
Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
|
2014-08-05 03:38:55 +00:00
|
|
|
Ver4DictConstants::PROBABILITY_SIZE);
|
|
|
|
const int probability = readFromEncodedEntry(encodedEntry,
|
|
|
|
Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
|
|
|
|
return ProbabilityEntry(flags, probability);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-13 08:09:16 +00:00
|
|
|
private:
|
|
|
|
// Copy constructor is public to use this class as a type of return value.
|
|
|
|
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
|
|
|
|
|
2014-08-19 02:49:05 +00:00
|
|
|
const uint8_t mFlags;
|
2013-12-13 08:09:16 +00:00
|
|
|
const int mProbability;
|
|
|
|
const HistoricalInfo mHistoricalInfo;
|
2014-08-05 03:38:55 +00:00
|
|
|
|
|
|
|
static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
|
|
|
|
return static_cast<int>(
|
|
|
|
(encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
|
|
|
|
}
|
2014-08-19 02:49:05 +00:00
|
|
|
|
2014-09-24 05:15:34 +00:00
|
|
|
static uint8_t createFlags(const bool representsBeginningOfSentence,
|
|
|
|
const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
|
2014-08-19 02:49:05 +00:00
|
|
|
uint8_t flags = 0;
|
|
|
|
if (representsBeginningOfSentence) {
|
2014-09-24 05:15:34 +00:00
|
|
|
flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
|
|
|
|
}
|
|
|
|
if (isNotAWord) {
|
|
|
|
flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
|
|
|
|
}
|
|
|
|
if (isBlacklisted) {
|
|
|
|
flags |= Ver4DictConstants::FLAG_BLACKLISTED;
|
|
|
|
}
|
|
|
|
if (isPossiblyOffensive) {
|
|
|
|
flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
|
2014-08-19 02:49:05 +00:00
|
|
|
}
|
|
|
|
return flags;
|
|
|
|
}
|
2013-12-13 08:09:16 +00:00
|
|
|
};
|
|
|
|
} // namespace latinime
|
|
|
|
#endif /* LATINIME_PROBABILITY_ENTRY_H */
|