Merge "Get stats from dictionary."

main
Keisuke Kuroyanagi 2014-09-25 08:24:54 +00:00 committed by Android (Google) Code Review
commit 951252bf47
5 changed files with 60 additions and 26 deletions

View File

@ -598,7 +598,7 @@ public final class BinaryDictionary extends Dictionary {
} }
@UsedForTesting @UsedForTesting
public String getPropertyForTest(final String query) { public String getPropertyForGettingStats(final String query) {
if (!isValidDictionary()) return ""; if (!isValidDictionary()) return "";
return getPropertyNative(mNativeDict, query); return getPropertyNative(mNativeDict, query);
} }

View File

@ -20,16 +20,24 @@ import java.io.File;
import java.util.Locale; import java.util.Locale;
public class DictionaryStats { public class DictionaryStats {
public static final int NOT_AN_ENTRY_COUNT = -1;
public final Locale mLocale; public final Locale mLocale;
public final String mDictName; public final String mDictName;
public final String mDictFilePath; public final String mDictFilePath;
public final long mDictFileSize; public final long mDictFileSize;
public final int mUnigramCount;
public final int mNgramCount;
// TODO: Add more members. // TODO: Add more members.
public DictionaryStats(final Locale locale, final String dictName, final File dictFile) { public DictionaryStats(final Locale locale, final String dictName, final File dictFile,
final int unigramCount, final int ngramCount) {
mLocale = locale; mLocale = locale;
mDictName = dictName; mDictName = dictName;
mDictFilePath = dictFile.getAbsolutePath(); mDictFilePath = dictFile.getAbsolutePath();
mDictFileSize = dictFile.length(); mDictFileSize = dictFile.length();
mUnigramCount = unigramCount;
mNgramCount = ngramCount;
} }
} }

View File

@ -644,14 +644,36 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
}); });
} }
private static int parseEntryCount(final String entryCountStr) {
int entryCount;
try {
entryCount = Integer.parseInt(entryCountStr);
} catch (final NumberFormatException e) {
entryCount = DictionaryStats.NOT_AN_ENTRY_COUNT;
}
return entryCount;
}
public DictionaryStats getDictionaryStats() { public DictionaryStats getDictionaryStats() {
reloadDictionaryIfRequired(); reloadDictionaryIfRequired();
final AsyncResultHolder<DictionaryStats> result = new AsyncResultHolder<>(); final AsyncResultHolder<DictionaryStats> result = new AsyncResultHolder<>();
asyncExecuteTaskWithLock(mLock.readLock(), mDictName /* executorName */, new Runnable() { asyncExecuteTaskWithLock(mLock.readLock(), mDictName /* executorName */, new Runnable() {
@Override @Override
public void run() { public void run() {
// TODO: Get stats from the dictionary. if (mBinaryDictionary == null) {
result.set(new DictionaryStats(mLocale, mDictName, mDictFile)); result.set(new DictionaryStats(mLocale, mDictName, mDictFile,
DictionaryStats.NOT_AN_ENTRY_COUNT,
DictionaryStats.NOT_AN_ENTRY_COUNT));
}
final int unigramCount = parseEntryCount(
mBinaryDictionary.getPropertyForGettingStats(
BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
// TODO: Get dedicated entry counts for bigram, trigram, and so on.
final int ngramCount = parseEntryCount(mBinaryDictionary.getPropertyForGettingStats(
BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
// TODO: Get more information from dictionary.
result.set(new DictionaryStats(mLocale, mDictName, mDictFile, unigramCount,
ngramCount));
} }
}); });
return result.get(null /* defaultValue */, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS); return result.get(null /* defaultValue */, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS);

View File

@ -342,31 +342,31 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
} }
final int maxUnigramCount = Integer.parseInt( final int maxUnigramCount = Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
for (int i = 0; i < unigramTypedCount; i++) { for (int i = 0; i < unigramTypedCount; i++) {
final String word = words.get(random.nextInt(words.size())); final String word = words.get(random.nextInt(words.size()));
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final int unigramCountBeforeGC = final int unigramCountBeforeGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)); BinaryDictionary.UNIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
forcePassingShortTime(binaryDictionary); forcePassingShortTime(binaryDictionary);
} }
final int unigramCountAfterGC = final int unigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)); BinaryDictionary.UNIGRAM_COUNT_QUERY));
assertTrue(unigramCountBeforeGC > unigramCountAfterGC); assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
} }
} }
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0); BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount); BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
forcePassingLongTime(binaryDictionary); forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest( assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY))); BinaryDictionary.UNIGRAM_COUNT_QUERY)));
} }
@ -415,13 +415,13 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
} }
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final int unigramCountBeforeGC = final int unigramCountBeforeGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)); BinaryDictionary.UNIGRAM_COUNT_QUERY));
assertTrue(binaryDictionary.isValidWord(strong)); assertTrue(binaryDictionary.isValidWord(strong));
assertTrue(binaryDictionary.isValidWord(weak)); assertTrue(binaryDictionary.isValidWord(weak));
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
final int unigramCountAfterGC = final int unigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)); BinaryDictionary.UNIGRAM_COUNT_QUERY));
assertTrue(unigramCountBeforeGC > unigramCountAfterGC); assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
assertFalse(binaryDictionary.isValidWord(weak)); assertFalse(binaryDictionary.isValidWord(weak));
@ -477,7 +477,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
} }
final int maxBigramCount = Integer.parseInt( final int maxBigramCount = Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY)); binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
for (int i = 0; i < bigramTypedCount; ++i) { for (int i = 0; i < bigramTypedCount; ++i) {
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size())); final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY);
@ -486,24 +486,24 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final int bigramCountBeforeGC = final int bigramCountBeforeGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)); BinaryDictionary.BIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
forcePassingShortTime(binaryDictionary); forcePassingShortTime(binaryDictionary);
} }
final int bigramCountAfterGC = final int bigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)); BinaryDictionary.BIGRAM_COUNT_QUERY));
assertTrue(bigramCountBeforeGC > bigramCountAfterGC); assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
} }
} }
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0); BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount); BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
forcePassingLongTime(binaryDictionary); forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest( assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY))); BinaryDictionary.BIGRAM_COUNT_QUERY)));
} }
@ -574,11 +574,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
} }
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final int bigramCountBeforeGC = final int bigramCountBeforeGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)); BinaryDictionary.BIGRAM_COUNT_QUERY));
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
final int bigramCountAfterGC = final int bigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTest( Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)); BinaryDictionary.BIGRAM_COUNT_QUERY));
assertTrue(bigramCountBeforeGC > bigramCountAfterGC); assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
assertTrue(isValidBigram(binaryDictionary, strong, target)); assertTrue(isValidBigram(binaryDictionary, strong, target));

View File

@ -968,14 +968,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, word0, word1, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability);
} }
assertEquals(new HashSet<>(words).size(), Integer.parseInt( assertEquals(new HashSet<>(words).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
assertEquals(new HashSet<>(words).size(), Integer.parseInt( assertEquals(new HashSet<>(words).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(
BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.close(); binaryDictionary.close();
} }
@ -1510,7 +1514,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
} }
assertEquals(unigramProbabilities.size(), Integer.parseInt( assertEquals(unigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
for (final Pair<String, String> bigram : bigrams) { for (final Pair<String, String> bigram : bigrams) {
if (canCheckBigramProbability(toFormatVersion)) { if (canCheckBigramProbability(toFormatVersion)) {
@ -1520,7 +1524,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
} }
assertEquals(bigramProbabilities.size(), Integer.parseInt( assertEquals(bigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForGettingStats(BinaryDictionary.BIGRAM_COUNT_QUERY)));
} }
public void testBeginningOfSentence() { public void testBeginningOfSentence() {