am 99927f86: am de8a9a82: Small cleanups

* commit '99927f865faff5a9f31de315620447fd963f3dff': Small cleanups
2012-08-16 21:18:58 -07:00 · 2012-08-16 21:18:58 -07:00 · aca6bef515
commit aca6bef515
parent 667108d71d 99927f865f
4 changed files with 17 additions and 16 deletions
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@ -29,8 +29,6 @@ class BigramDictionary {
    BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
    int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
            unsigned short *outWords, int *frequencies, int *outputTypes) const;
    int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
            const bool forceLowerCaseSearch) const;
    void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
            std::map<int, int> *map, uint8_t *filter) const;
    bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
@ -45,6 +43,8 @@ class BigramDictionary {
    bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
    bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
    bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
    int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
            const bool forceLowerCaseSearch) const;
    const unsigned char *DICT;
    const int MAX_WORD_LENGTH;
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@ -61,13 +61,6 @@ class BinaryFormat {
    static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
    static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
    const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
    const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
    const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
 public:
    const static int UNKNOWN_FORMAT = -1;
    // Originally, format version 1 had a 16-bit magic number, then the version number `01'
    // then options that must be 0. Hence the first 32-bits of the format are always as follow
@ -94,7 +87,6 @@ class BinaryFormat {
    static int skipFrequency(const uint8_t flags, const int pos);
    static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
    static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
    static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
    static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
            const int pos);
    static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
@ -118,6 +110,13 @@ class BinaryFormat {
        REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
    };
    const static unsigned int NO_FLAGS = 0;
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
    const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
    const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
    const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
    static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
 };
 inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@ -889,7 +889,7 @@ static int compare_pair_capital(const void *a, const void *b) {
            - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
 }
-unsigned short latin_tolower(unsigned short c) {
+unsigned short latin_tolower(const unsigned short c) {
    struct LatinCapitalSmallPair *p =
            static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
                    sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]),
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@ -17,21 +17,23 @@
 #ifndef LATINIME_CHAR_UTILS_H
 #define LATINIME_CHAR_UTILS_H
 #include <cctype>
 namespace latinime {
-inline static int isAsciiUpper(unsigned short c) {
+inline static bool isAsciiUpper(unsigned short c) {
-    return c >= 'A' && c <= 'Z';
+    return isupper(static_cast<int>(c)) != 0;
 }
 inline static unsigned short toAsciiLower(unsigned short c) {
    return c - 'A' + 'a';
 }
-inline static int isAscii(unsigned short c) {
+inline static bool isAscii(unsigned short c) {
-    return c <= 127;
+    return isascii(static_cast<int>(c)) != 0;
 }
-unsigned short latin_tolower(unsigned short c);
+unsigned short latin_tolower(const unsigned short c);
 /**
 * Table mapping most combined Latin, Greek, and Cyrillic characters