Merge "Return the correct bigram frequency"

main
Jean Chalard 2012-10-23 01:19:27 -07:00 committed by Android (Google) Code Review
commit 5e5be5a2d2
1 changed files with 21 additions and 12 deletions

View File

@ -1374,7 +1374,8 @@ public final class BinaryDictInputOutput {
// of this method. Since it performs direct, unbuffered random access to the file and // of this method. Since it performs direct, unbuffered random access to the file and
// may be called hundreds of thousands of times, the resulting performance is not // may be called hundreds of thousands of times, the resulting performance is not
// reasonable without some kind of cache. Thus: // reasonable without some kind of cache. Thus:
private static TreeMap<Integer, String> wordCache = new TreeMap<Integer, String>(); private static TreeMap<Integer, WeightedString> wordCache =
new TreeMap<Integer, WeightedString>();
/** /**
* Finds, as a string, the word at the address passed as an argument. * Finds, as a string, the word at the address passed as an argument.
* *
@ -1382,15 +1383,15 @@ public final class BinaryDictInputOutput {
* @param headerSize the size of the header. * @param headerSize the size of the header.
* @param address the address to seek. * @param address the address to seek.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the word, as a string. * @return the word with its frequency, as a weighted string.
*/ */
/* packages for tests */ static String getWordAtAddress( /* package for tests */ static WeightedString getWordAtAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final String cachedString = wordCache.get(address); final WeightedString cachedString = wordCache.get(address);
if (null != cachedString) return cachedString; if (null != cachedString) return cachedString;
final String result; final WeightedString result;
final int originalPointer = buffer.position(); final int originalPointer = buffer.position();
buffer.position(address); buffer.position(address);
@ -1406,14 +1407,17 @@ public final class BinaryDictInputOutput {
return result; return result;
} }
// TODO: static!? This will behave erratically when used in multi-threaded code.
// We need to fix this
private static int[] sGetWordBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; private static int[] sGetWordBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
private static String getWordAtAddressWithParentAddress( private static WeightedString getWordAtAddressWithParentAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions options) { final FormatOptions options) {
final StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
int currentAddress = address; int currentAddress = address;
int index = FormatSpec.MAX_WORD_LENGTH - 1; int index = FormatSpec.MAX_WORD_LENGTH - 1;
int frequency = Integer.MIN_VALUE;
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
CharGroupInfo currentInfo; CharGroupInfo currentInfo;
@ -1428,6 +1432,7 @@ public final class BinaryDictInputOutput {
MakedictLog.d("Too many jumps - probably a bug"); MakedictLog.d("Too many jumps - probably a bug");
} }
} while (isMovedGroup(currentInfo.mFlags, options)); } while (isMovedGroup(currentInfo.mFlags, options));
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
for (int i = 0; i < currentInfo.mCharacters.length; ++i) { for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
sGetWordBuffer[index--] = sGetWordBuffer[index--] =
currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
@ -1436,17 +1441,19 @@ public final class BinaryDictInputOutput {
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
} }
return new String(sGetWordBuffer, index + 1, FormatSpec.MAX_WORD_LENGTH - index - 1); return new WeightedString(
new String(sGetWordBuffer, index + 1, FormatSpec.MAX_WORD_LENGTH - index - 1),
frequency);
} }
private static String getWordAtAddressWithoutParentAddress( private static WeightedString getWordAtAddressWithoutParentAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions options) { final FormatOptions options) {
buffer.position(headerSize); buffer.position(headerSize);
final int count = readCharGroupCount(buffer); final int count = readCharGroupCount(buffer);
int groupOffset = getGroupCountSize(count); int groupOffset = getGroupCountSize(count);
final StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
String result = null; WeightedString result = null;
CharGroupInfo last = null; CharGroupInfo last = null;
for (int i = count - 1; i >= 0; --i) { for (int i = count - 1; i >= 0; --i) {
@ -1454,7 +1461,7 @@ public final class BinaryDictInputOutput {
groupOffset = info.mEndAddress; groupOffset = info.mEndAddress;
if (info.mOriginalAddress == address) { if (info.mOriginalAddress == address) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
result = builder.toString(); result = new WeightedString(builder.toString(), info.mFrequency);
break; // and return break; // and return
} }
if (hasChildrenAddress(info.mChildrenAddress)) { if (hasChildrenAddress(info.mChildrenAddress)) {
@ -1515,9 +1522,11 @@ public final class BinaryDictInputOutput {
if (null != info.mBigrams) { if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>(); bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) { for (PendingAttribute bigram : info.mBigrams) {
final String word = getWordAtAddress( final WeightedString word = getWordAtAddress(
buffer, headerSize, bigram.mAddress, options); buffer, headerSize, bigram.mAddress, options);
bigrams.add(new WeightedString(word, bigram.mFrequency)); final int reconstructedFrequency =
reconstructBigramFrequency(word.mFrequency, bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
} }
} }
if (hasChildrenAddress(info.mChildrenAddress)) { if (hasChildrenAddress(info.mChildrenAddress)) {