From cea80fd9554a9db2a8421d267a57999f4f3c53b4 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Thu, 4 Jul 2013 16:59:40 +0900 Subject: [PATCH] Have random words stick to a restricted (random) charset Change-Id: Ib4045ebc9659f1b60183f2356e60e449d62c5be9 --- .../latin/makedict/BinaryDictIOTests.java | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java index 87acafee6..d667db298 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java @@ -52,6 +52,7 @@ import java.util.Set; public class BinaryDictIOTests extends AndroidTestCase { private static final String TAG = BinaryDictIOTests.class.getSimpleName(); private static final int DEFAULT_MAX_UNIGRAMS = 100; + private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; private static final int UNIGRAM_FREQ = 10; private static final int BIGRAM_FREQ = 50; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; @@ -81,7 +82,8 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.e(TAG, "Testing dictionary: seed is " + seed); final Random random = new Random(seed); sWords.clear(); - generateWords(maxUnigrams, random); + final int[] codePointSet = generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, random); + generateWords(maxUnigrams, random, codePointSet); for (int i = 0; i < sWords.size(); ++i) { sChainBigrams.put(i, new ArrayList()); @@ -96,6 +98,23 @@ public class BinaryDictIOTests extends AndroidTestCase { } } + private int[] generateCodePointSet(final int codePointSetSize, final Random random) { + final int[] codePointSet = new int[codePointSetSize]; + for (int i = codePointSet.length - 1; i >= 0; ) { + final int r = Math.abs(random.nextInt()); + if (r < 0) continue; + // Don't insert 0~0x20, but insert any other code point. + // Code points are in the range 0~0x10FFFF. + final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20)); + // Code points between MIN_ and MAX_SURROGATE are not valid on their own. + if (candidateCodePoint >= Character.MIN_SURROGATE + && candidateCodePoint <= Character.MAX_SURROGATE) continue; + codePointSet[i] = candidateCodePoint; + --i; + } + return codePointSet; + } + // Utilities for test /** @@ -131,28 +150,20 @@ public class BinaryDictIOTests extends AndroidTestCase { /** * Generates a random word. */ - private String generateWord(final Random random) { + private String generateWord(final Random random, final int[] codePointSet) { StringBuilder builder = new StringBuilder("a"); int count = random.nextInt() % 30; // Arbitrarily 30 chars max while (count > 0) { - final long r = Math.abs(random.nextInt()); - if (r < 0) continue; - // Don't insert 0~0x20, but insert any other code point. - // Code points are in the range 0~0x10FFFF. - final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20)); - // Code points between MIN_ and MAX_SURROGATE are not valid on their own. - if (candidateCodePoint >= Character.MIN_SURROGATE - && candidateCodePoint <= Character.MAX_SURROGATE) continue; - builder.appendCodePoint(candidateCodePoint); + builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]); --count; } return builder.toString(); } - private void generateWords(final int number, final Random random) { + private void generateWords(final int number, final Random random, final int[] codePointSet) { final Set wordSet = CollectionUtils.newHashSet(); while (wordSet.size() < number) { - wordSet.add(generateWord(random)); + wordSet.add(generateWord(random, codePointSet)); } sWords.addAll(wordSet); } @@ -560,8 +571,9 @@ public class BinaryDictIOTests extends AndroidTestCase { // Test a word that isn't contained within the dictionary. final Random random = new Random((int)System.currentTimeMillis()); + final int[] codePointSet = generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, random); for (int i = 0; i < 1000; ++i) { - final String word = generateWord(random); + final String word = generateWord(random, codePointSet); if (sWords.indexOf(word) != -1) continue; runGetTerminalPosition(buffer, word, i, false); }