From 20a6dea1cabfd8822824f7dca828d898e5b91cbc Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Wed, 25 Apr 2012 18:49:31 +0900 Subject: [PATCH] Add a flag for bigram presence in the header This is a cherry-pick of Icb602762 onto jb-dev. Bug: 6355745 Change-Id: Icb602762bb0d81472f024fa491571062ec1fc4e9 --- .../latin/makedict/BinaryDictInputOutput.java | 10 +++++-- .../latin/makedict/FusionDictionary.java | 28 ++++++++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 88da7b0d8..d82d503c4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -131,6 +131,7 @@ public class BinaryDictInputOutput { // These options need to be the same numeric values as the one in the native reading code. private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; + private static final int CONTAINS_BIGRAMS_FLAG = 0x8; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. @@ -752,9 +753,12 @@ public class BinaryDictInputOutput { /** * Makes the 2-byte value for options flags. */ - private static final int makeOptionsValue(final DictionaryOptions options) { + private static final int makeOptionsValue(final FusionDictionary dictionary) { + final DictionaryOptions options = dictionary.mOptions; + final boolean hasBigrams = dictionary.hasBigrams(); return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0) - + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0); + + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0) + + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0); } /** @@ -970,7 +974,7 @@ public class BinaryDictInputOutput { headerBuffer.write((byte) (0xFF & version)); } // Options flags - final int options = makeOptionsValue(dict.mOptions); + final int options = makeOptionsValue(dict); headerBuffer.write((byte) (0xFF & (options >> 8))); headerBuffer.write((byte) (0xFF & options)); if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index c293b2ba4..b08702e47 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable { * Recursively count the number of nodes in a given branch of the trie. * * @param node the node to count. - * @result the number of nodes in this branch. + * @return the number of nodes in this branch. */ public static int countNodes(final Node node) { int size = 1; @@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable { return size; } + // Recursively find out whether there are any bigrams. + // This can be pretty expensive especially if there aren't any (we return as soon + // as we find one, so it's much cheaper if there are bigrams) + private static boolean hasBigramsInternal(final Node node) { + if (null == node) return false; + for (int i = node.mData.size() - 1; i >= 0; --i) { + CharGroup group = node.mData.get(i); + if (null != group.mBigrams) return true; + if (hasBigramsInternal(group.mChildren)) return true; + } + return false; + } + + /** + * Finds out whether there are any bigrams in this dictionary. + * + * @return true if there is any bigram, false otherwise. + */ + // TODO: this is expensive especially for large dictionaries without any bigram. + // The up side is, this is always accurate and correct and uses no memory. We should + // find a more efficient way of doing this, without compromising too much on memory + // and ease of use. + public boolean hasBigrams() { + return hasBigramsInternal(mRoot); + } + // Historically, the tails of the words were going to be merged to save space. // However, that would prevent the code to search for a specific address in log(n) // time so this was abandoned.