am 951252bf
: Merge "Get stats from dictionary."
* commit '951252bf478c924372397ecfc0fdf5a7d6445bff': Get stats from dictionary.
This commit is contained in:
commit
54357ccb72
5 changed files with 60 additions and 26 deletions
|
@ -598,7 +598,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public String getPropertyForTest(final String query) {
|
public String getPropertyForGettingStats(final String query) {
|
||||||
if (!isValidDictionary()) return "";
|
if (!isValidDictionary()) return "";
|
||||||
return getPropertyNative(mNativeDict, query);
|
return getPropertyNative(mNativeDict, query);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,16 +20,24 @@ import java.io.File;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
public class DictionaryStats {
|
public class DictionaryStats {
|
||||||
|
public static final int NOT_AN_ENTRY_COUNT = -1;
|
||||||
|
|
||||||
public final Locale mLocale;
|
public final Locale mLocale;
|
||||||
public final String mDictName;
|
public final String mDictName;
|
||||||
public final String mDictFilePath;
|
public final String mDictFilePath;
|
||||||
public final long mDictFileSize;
|
public final long mDictFileSize;
|
||||||
|
|
||||||
|
public final int mUnigramCount;
|
||||||
|
public final int mNgramCount;
|
||||||
// TODO: Add more members.
|
// TODO: Add more members.
|
||||||
|
|
||||||
public DictionaryStats(final Locale locale, final String dictName, final File dictFile) {
|
public DictionaryStats(final Locale locale, final String dictName, final File dictFile,
|
||||||
|
final int unigramCount, final int ngramCount) {
|
||||||
mLocale = locale;
|
mLocale = locale;
|
||||||
mDictName = dictName;
|
mDictName = dictName;
|
||||||
mDictFilePath = dictFile.getAbsolutePath();
|
mDictFilePath = dictFile.getAbsolutePath();
|
||||||
mDictFileSize = dictFile.length();
|
mDictFileSize = dictFile.length();
|
||||||
|
mUnigramCount = unigramCount;
|
||||||
|
mNgramCount = ngramCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -644,14 +644,36 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int parseEntryCount(final String entryCountStr) {
|
||||||
|
int entryCount;
|
||||||
|
try {
|
||||||
|
entryCount = Integer.parseInt(entryCountStr);
|
||||||
|
} catch (final NumberFormatException e) {
|
||||||
|
entryCount = DictionaryStats.NOT_AN_ENTRY_COUNT;
|
||||||
|
}
|
||||||
|
return entryCount;
|
||||||
|
}
|
||||||
|
|
||||||
public DictionaryStats getDictionaryStats() {
|
public DictionaryStats getDictionaryStats() {
|
||||||
reloadDictionaryIfRequired();
|
reloadDictionaryIfRequired();
|
||||||
final AsyncResultHolder<DictionaryStats> result = new AsyncResultHolder<>();
|
final AsyncResultHolder<DictionaryStats> result = new AsyncResultHolder<>();
|
||||||
asyncExecuteTaskWithLock(mLock.readLock(), mDictName /* executorName */, new Runnable() {
|
asyncExecuteTaskWithLock(mLock.readLock(), mDictName /* executorName */, new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
// TODO: Get stats from the dictionary.
|
if (mBinaryDictionary == null) {
|
||||||
result.set(new DictionaryStats(mLocale, mDictName, mDictFile));
|
result.set(new DictionaryStats(mLocale, mDictName, mDictFile,
|
||||||
|
DictionaryStats.NOT_AN_ENTRY_COUNT,
|
||||||
|
DictionaryStats.NOT_AN_ENTRY_COUNT));
|
||||||
|
}
|
||||||
|
final int unigramCount = parseEntryCount(
|
||||||
|
mBinaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||||
|
// TODO: Get dedicated entry counts for bigram, trigram, and so on.
|
||||||
|
final int ngramCount = parseEntryCount(mBinaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
|
||||||
|
// TODO: Get more information from dictionary.
|
||||||
|
result.set(new DictionaryStats(mLocale, mDictName, mDictFile, unigramCount,
|
||||||
|
ngramCount));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return result.get(null /* defaultValue */, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS);
|
return result.get(null /* defaultValue */, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS);
|
||||||
|
|
|
@ -342,31 +342,31 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int maxUnigramCount = Integer.parseInt(
|
final int maxUnigramCount = Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||||
for (int i = 0; i < unigramTypedCount; i++) {
|
for (int i = 0; i < unigramTypedCount; i++) {
|
||||||
final String word = words.get(random.nextInt(words.size()));
|
final String word = words.get(random.nextInt(words.size()));
|
||||||
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, word, DUMMY_PROBABILITY);
|
||||||
|
|
||||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
final int unigramCountBeforeGC =
|
final int unigramCountBeforeGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
forcePassingShortTime(binaryDictionary);
|
forcePassingShortTime(binaryDictionary);
|
||||||
}
|
}
|
||||||
final int unigramCountAfterGC =
|
final int unigramCountAfterGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
|
||||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
|
||||||
forcePassingLongTime(binaryDictionary);
|
forcePassingLongTime(binaryDictionary);
|
||||||
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,13 +415,13 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
final int unigramCountBeforeGC =
|
final int unigramCountBeforeGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
assertTrue(binaryDictionary.isValidWord(strong));
|
assertTrue(binaryDictionary.isValidWord(strong));
|
||||||
assertTrue(binaryDictionary.isValidWord(weak));
|
assertTrue(binaryDictionary.isValidWord(weak));
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
final int unigramCountAfterGC =
|
final int unigramCountAfterGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
||||||
assertFalse(binaryDictionary.isValidWord(weak));
|
assertFalse(binaryDictionary.isValidWord(weak));
|
||||||
|
@ -477,7 +477,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int maxBigramCount = Integer.parseInt(
|
final int maxBigramCount = Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
|
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
|
||||||
for (int i = 0; i < bigramTypedCount; ++i) {
|
for (int i = 0; i < bigramTypedCount; ++i) {
|
||||||
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
|
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
|
||||||
addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY);
|
addUnigramWord(binaryDictionary, bigram.first, DUMMY_PROBABILITY);
|
||||||
|
@ -486,24 +486,24 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
|
||||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
final int bigramCountBeforeGC =
|
final int bigramCountBeforeGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||||
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
forcePassingShortTime(binaryDictionary);
|
forcePassingShortTime(binaryDictionary);
|
||||||
}
|
}
|
||||||
final int bigramCountAfterGC =
|
final int bigramCountAfterGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||||
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
|
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
|
||||||
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
|
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
|
||||||
forcePassingLongTime(binaryDictionary);
|
forcePassingLongTime(binaryDictionary);
|
||||||
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTest(
|
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -574,11 +574,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
final int bigramCountBeforeGC =
|
final int bigramCountBeforeGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
final int bigramCountAfterGC =
|
final int bigramCountAfterGC =
|
||||||
Integer.parseInt(binaryDictionary.getPropertyForTest(
|
Integer.parseInt(binaryDictionary.getPropertyForGettingStats(
|
||||||
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
BinaryDictionary.BIGRAM_COUNT_QUERY));
|
||||||
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
assertTrue(bigramCountBeforeGC > bigramCountAfterGC);
|
||||||
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
assertTrue(isValidBigram(binaryDictionary, strong, target));
|
||||||
|
|
|
@ -968,14 +968,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
|
||||||
}
|
}
|
||||||
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
|
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||||
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
|
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
|
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||||
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
|
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(
|
||||||
|
BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1510,7 +1514,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
|
assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
|
||||||
}
|
}
|
||||||
assertEquals(unigramProbabilities.size(), Integer.parseInt(
|
assertEquals(unigramProbabilities.size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
|
||||||
|
|
||||||
for (final Pair<String, String> bigram : bigrams) {
|
for (final Pair<String, String> bigram : bigrams) {
|
||||||
if (canCheckBigramProbability(toFormatVersion)) {
|
if (canCheckBigramProbability(toFormatVersion)) {
|
||||||
|
@ -1520,7 +1524,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
|
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
|
||||||
}
|
}
|
||||||
assertEquals(bigramProbabilities.size(), Integer.parseInt(
|
assertEquals(bigramProbabilities.size(), Integer.parseInt(
|
||||||
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBeginningOfSentence() {
|
public void testBeginningOfSentence() {
|
||||||
|
|
Loading…
Reference in a new issue