Add a flag for bigram presence in the header

This is a cherry-pick of Icb602762 onto jb-dev.

Bug: 6355745
Change-Id: Icb602762bb0d81472f024fa491571062ec1fc4e9
main
Jean Chalard 2012-04-25 18:49:31 +09:00
parent 329c8d7bcc
commit 20a6dea1ca
2 changed files with 34 additions and 4 deletions

View File

@ -131,6 +131,7 @@ public class BinaryDictInputOutput {
// These options need to be the same numeric values as the one in the native reading code.
private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
private static final int CONTAINS_BIGRAMS_FLAG = 0x8;
// TODO: Make this value adaptative to content data, store it in the header, and
// use it in the reading code.
@ -752,9 +753,12 @@ public class BinaryDictInputOutput {
/**
* Makes the 2-byte value for options flags.
*/
private static final int makeOptionsValue(final DictionaryOptions options) {
private static final int makeOptionsValue(final FusionDictionary dictionary) {
final DictionaryOptions options = dictionary.mOptions;
final boolean hasBigrams = dictionary.hasBigrams();
return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
+ (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0);
+ (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
+ (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
}
/**
@ -970,7 +974,7 @@ public class BinaryDictInputOutput {
headerBuffer.write((byte) (0xFF & version));
}
// Options flags
final int options = makeOptionsValue(dict.mOptions);
final int options = makeOptionsValue(dict);
headerBuffer.write((byte) (0xFF & (options >> 8)));
headerBuffer.write((byte) (0xFF & options));
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {

View File

@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable<Word> {
* Recursively count the number of nodes in a given branch of the trie.
*
* @param node the node to count.
* @result the number of nodes in this branch.
* @return the number of nodes in this branch.
*/
public static int countNodes(final Node node) {
int size = 1;
@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable<Word> {
return size;
}
// Recursively find out whether there are any bigrams.
// This can be pretty expensive especially if there aren't any (we return as soon
// as we find one, so it's much cheaper if there are bigrams)
private static boolean hasBigramsInternal(final Node node) {
if (null == node) return false;
for (int i = node.mData.size() - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i);
if (null != group.mBigrams) return true;
if (hasBigramsInternal(group.mChildren)) return true;
}
return false;
}
/**
* Finds out whether there are any bigrams in this dictionary.
*
* @return true if there is any bigram, false otherwise.
*/
// TODO: this is expensive especially for large dictionaries without any bigram.
// The up side is, this is always accurate and correct and uses no memory. We should
// find a more efficient way of doing this, without compromising too much on memory
// and ease of use.
public boolean hasBigrams() {
return hasBigramsInternal(mRoot);
}
// Historically, the tails of the words were going to be merged to save space.
// However, that would prevent the code to search for a specific address in log(n)
// time so this was abandoned.