Add a flag for bigram presence in the header

This is a cherry-pick of Icb602762 onto jb-dev. Bug: 6355745 Change-Id: Icb602762bb0d81472f024fa491571062ec1fc4e9
2012-04-25 18:49:31 +09:00 · 2012-04-25 18:49:31 +09:00 · 20a6dea1ca
commit 20a6dea1ca
parent 329c8d7bcc
2 changed files with 34 additions and 4 deletions
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@ -131,6 +131,7 @@ public class BinaryDictInputOutput {
    // These options need to be the same numeric values as the one in the native reading code.
    private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
    private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+    private static final int CONTAINS_BIGRAMS_FLAG = 0x8;

    // TODO: Make this value adaptative to content data, store it in the header, and
    // use it in the reading code.
@ -752,9 +753,12 @@ public class BinaryDictInputOutput {
    /**
     * Makes the 2-byte value for options flags.
     */
-    private static final int makeOptionsValue(final DictionaryOptions options) {
+    private static final int makeOptionsValue(final FusionDictionary dictionary) {
+        final DictionaryOptions options = dictionary.mOptions;
+        final boolean hasBigrams = dictionary.hasBigrams();
        return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
-                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0);
+                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
+                + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
    }

    /**
@ -970,7 +974,7 @@ public class BinaryDictInputOutput {
            headerBuffer.write((byte) (0xFF & version));
        }
        // Options flags
-        final int options = makeOptionsValue(dict.mOptions);
+        final int options = makeOptionsValue(dict);
        headerBuffer.write((byte) (0xFF & (options >> 8)));
        headerBuffer.write((byte) (0xFF & options));
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable<Word> {
     * Recursively count the number of nodes in a given branch of the trie.
     *
     * @param node the node to count.
-     * @result the number of nodes in this branch.
+     * @return the number of nodes in this branch.
     */
    public static int countNodes(final Node node) {
        int size = 1;
@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable<Word> {
        return size;
    }

+    // Recursively find out whether there are any bigrams.
+    // This can be pretty expensive especially if there aren't any (we return as soon
+    // as we find one, so it's much cheaper if there are bigrams)
+    private static boolean hasBigramsInternal(final Node node) {
+        if (null == node) return false;
+        for (int i = node.mData.size() - 1; i >= 0; --i) {
+            CharGroup group = node.mData.get(i);
+            if (null != group.mBigrams) return true;
+            if (hasBigramsInternal(group.mChildren)) return true;
+        }
+        return false;
+    }
+
+    /**
+     * Finds out whether there are any bigrams in this dictionary.
+     *
+     * @return true if there is any bigram, false otherwise.
+     */
+    // TODO: this is expensive especially for large dictionaries without any bigram.
+    // The up side is, this is always accurate and correct and uses no memory. We should
+    // find a more efficient way of doing this, without compromising too much on memory
+    // and ease of use.
+    public boolean hasBigrams() {
+        return hasBigramsInternal(mRoot);
+    }
+
    // Historically, the tails of the words were going to be merged to save space.
    // However, that would prevent the code to search for a specific address in log(n)
    // time so this was abandoned.