add reconstructBigramFrequency

Change-Id: Iff20dcb9ca0d6064bb118247887fe24b812c0c61
main
Yuichiro Hanada 2012-08-16 18:40:14 +09:00
parent e2ac3932e3
commit c0a75c8ecb
2 changed files with 25 additions and 8 deletions

View File

@ -52,14 +52,14 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private static final int FREQUENCY_FOR_TYPED = 2; private static final int FREQUENCY_FOR_TYPED = 2;
/** Maximum number of pairs. Pruning will start when databases goes above this number. */ /** Maximum number of pairs. Pruning will start when databases goes above this number. */
private static int sMaxHistoryBigrams = 10000; public static final int sMaxHistoryBigrams = 10000;
/** /**
* When it hits maximum bigram pair, it will delete until you are left with * When it hits maximum bigram pair, it will delete until you are left with
* only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
* Do not keep this number small to avoid deleting too often. * Do not keep this number small to avoid deleting too often.
*/ */
private static int sDeleteHistoryBigrams = 1000; public static final int sDeleteHistoryBigrams = 1000;
/** /**
* Database version should increase if the database structure changes * Database version should increase if the database structure changes
@ -109,12 +109,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private static DatabaseHelper sOpenHelper = null; private static DatabaseHelper sOpenHelper = null;
public void setDatabaseMax(int maxHistoryBigram) { public String getLocale() {
sMaxHistoryBigrams = maxHistoryBigram; return mLocale;
}
public void setDatabaseDelete(int deleteHistoryBigram) {
sDeleteHistoryBigrams = deleteHistoryBigram;
} }
public synchronized static UserHistoryDictionary getInstance( public synchronized static UserHistoryDictionary getInstance(
@ -502,9 +498,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
needsToSave(fc, isValid, addLevel0Bigram)) { needsToSave(fc, isValid, addLevel0Bigram)) {
freq = fc; freq = fc;
} else { } else {
// Delete this entry
freq = -1; freq = -1;
} }
} else { } else {
// Delete this entry
freq = -1; freq = -1;
} }
} }
@ -541,6 +539,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
getContentValues(word1, word2, mLocale)); getContentValues(word1, word2, mLocale));
pairId = pairIdLong.intValue(); pairId = pairIdLong.intValue();
} }
// Eliminate freq == 0 because that word is profanity.
if (freq > 0) { if (freq > 0) {
if (PROFILE_SAVE_RESTORE) { if (PROFILE_SAVE_RESTORE) {
++profInsert; ++profInsert;

View File

@ -1410,4 +1410,22 @@ public class BinaryDictInputOutput {
return false; return false;
} }
} }
/**
* Calculate bigram frequency from compressed value
*
* @see #makeBigramFlags
*
* @param unigramFrequency
* @param bigramFrequency compressed frequency
* @return approximate bigram frequency
*/
public static int reconstructBigramFrequency(final int unigramFrequency,
final int bigramFrequency) {
final float stepSize = (MAX_TERMINAL_FREQUENCY - unigramFrequency)
/ (1.5f + MAX_BIGRAM_FREQUENCY);
final float resultFreqFloat = (float)unigramFrequency
+ stepSize * (bigramFrequency + 1.0f);
return (int)resultFreqFloat;
}
} }