From 88b22e255291c708005f2fce70310a7a4b036bcb Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 15 Aug 2011 14:24:37 +0900 Subject: [PATCH] Remove the old dictionary maker ...and also rename the new one into "makedict". Bug: 5151604 Change-Id: I44a8ab640d408f630f20f502787a967791796dc6 --- tools/makedict/Android.mk | 5 +- tools/makedict/etc/Android.mk | 5 +- tools/makedict/etc/makedict | 4 +- tools/makedict/etc/manifest.txt | 2 +- .../latin/BinaryDictInputOutput.java | 0 .../inputmethod/latin/CharGroupInfo.java | 0 .../inputmethod/latin/DictionaryMaker.java | 2 +- .../inputmethod/latin/FusionDictionary.java | 0 .../inputmethod/latin/MakedictLog.java | 0 .../inputmethod/latin/PendingAttribute.java | 0 .../latin/UnsupportedFormatException.java | 0 .../com/android/inputmethod/latin/Word.java | 0 .../inputmethod/latin/XmlDictInputOutput.java | 0 .../android/tools/dict/BigramDictionary.java | 286 ----------- .../tools/dict/MakeBinaryDictionary.java | 445 ------------------ .../latin/BinaryDictInputOutputTest.java | 0 tools/makedict2/Android.mk | 27 -- tools/makedict2/etc/Android.mk | 21 - tools/makedict2/etc/makedict2 | 63 --- tools/makedict2/etc/manifest.txt | 1 - 20 files changed, 11 insertions(+), 850 deletions(-) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/BinaryDictInputOutput.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/CharGroupInfo.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/DictionaryMaker.java (99%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/FusionDictionary.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/MakedictLog.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/PendingAttribute.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/UnsupportedFormatException.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/Word.java (100%) rename tools/{makedict2 => makedict}/src/com/android/inputmethod/latin/XmlDictInputOutput.java (100%) delete mode 100644 tools/makedict/src/com/android/tools/dict/BigramDictionary.java delete mode 100644 tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java rename tools/{makedict2 => makedict}/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java (100%) delete mode 100644 tools/makedict2/Android.mk delete mode 100644 tools/makedict2/etc/Android.mk delete mode 100755 tools/makedict2/etc/makedict2 delete mode 100644 tools/makedict2/etc/manifest.txt diff --git a/tools/makedict/Android.mk b/tools/makedict/Android.mk index b9fc5533d..6832b1cb6 100644 --- a/tools/makedict/Android.mk +++ b/tools/makedict/Android.mk @@ -1,5 +1,5 @@ # -# Copyright (C) 2009 The Android Open Source Project +# Copyright (C) 2011 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,8 +17,11 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_SRC_FILES := $(call all-java-files-under,src) +LOCAL_SRC_FILES += $(call all-java-files-under,tests) LOCAL_JAR_MANIFEST := etc/manifest.txt +LOCAL_MODULE_TAGS := eng LOCAL_MODULE := makedict +LOCAL_JAVA_LIBRARIES := junit include $(BUILD_HOST_JAVA_LIBRARY) include $(LOCAL_PATH)/etc/Android.mk diff --git a/tools/makedict/etc/Android.mk b/tools/makedict/etc/Android.mk index da162868a..96a90cbe0 100644 --- a/tools/makedict/etc/Android.mk +++ b/tools/makedict/etc/Android.mk @@ -1,4 +1,4 @@ -# Copyright (C) 2009 The Android Open Source Project +# Copyright (C) 2011 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) +LOCAL_MODULE_TAGS := eng + LOCAL_PREBUILT_EXECUTABLES := makedict include $(BUILD_HOST_PREBUILT) - diff --git a/tools/makedict/etc/makedict b/tools/makedict/etc/makedict index 8420d6e5e..7c1c02e85 100755 --- a/tools/makedict/etc/makedict +++ b/tools/makedict/etc/makedict @@ -1,5 +1,5 @@ #!/bin/sh -# Copyright 2009, The Android Open Source Project +# Copyright 2011, The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -60,4 +60,4 @@ fi # need to use "java.ext.dirs" because "-jar" causes classpath to be ignored # might need more memory, e.g. -Xmx128M -exec java -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@" +exec java -ea -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@" diff --git a/tools/makedict/etc/manifest.txt b/tools/makedict/etc/manifest.txt index aa3a3e84c..948609da6 100644 --- a/tools/makedict/etc/manifest.txt +++ b/tools/makedict/etc/manifest.txt @@ -1 +1 @@ -Main-Class: com.android.tools.dict.MakeBinaryDictionary +Main-Class: com.android.inputmethod.latin.DictionaryMaker diff --git a/tools/makedict2/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/BinaryDictInputOutput.java rename to tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/CharGroupInfo.java b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/CharGroupInfo.java rename to tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java similarity index 99% rename from tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java rename to tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java index 61944545a..1ba01075e 100644 --- a/tools/makedict2/src/com/android/inputmethod/latin/DictionaryMaker.java +++ b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java @@ -71,7 +71,7 @@ public class DictionaryMaker { } private void displayHelp() { - MakedictLog.i("Usage: makedict2 " + MakedictLog.i("Usage: makedict " + "[-s [-b ] | -s ] " + " [-d ] [-x ] [-2]\n" + "\n" diff --git a/tools/makedict2/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/FusionDictionary.java rename to tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/MakedictLog.java b/tools/makedict/src/com/android/inputmethod/latin/MakedictLog.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/MakedictLog.java rename to tools/makedict/src/com/android/inputmethod/latin/MakedictLog.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/PendingAttribute.java b/tools/makedict/src/com/android/inputmethod/latin/PendingAttribute.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/PendingAttribute.java rename to tools/makedict/src/com/android/inputmethod/latin/PendingAttribute.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/UnsupportedFormatException.java b/tools/makedict/src/com/android/inputmethod/latin/UnsupportedFormatException.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/UnsupportedFormatException.java rename to tools/makedict/src/com/android/inputmethod/latin/UnsupportedFormatException.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/Word.java b/tools/makedict/src/com/android/inputmethod/latin/Word.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/Word.java rename to tools/makedict/src/com/android/inputmethod/latin/Word.java diff --git a/tools/makedict2/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java similarity index 100% rename from tools/makedict2/src/com/android/inputmethod/latin/XmlDictInputOutput.java rename to tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java diff --git a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java b/tools/makedict/src/com/android/tools/dict/BigramDictionary.java deleted file mode 100644 index 35115bf2c..000000000 --- a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.tools.dict; - -import org.xml.sax.Attributes; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -/** - * Helper for MakeBinaryDictionary - * Deals with all the bigram data - */ -public class BigramDictionary { - - /* - * Must match the values in the client side which is located in dictionary.cpp & dictionary.h - * Changing these values will generate totally different structure which must be also reflected - * on the client side. - */ - public static final int FLAG_BIGRAM_READ = 0x80; - public static final int FLAG_BIGRAM_CHILDEXIST = 0x40; - public static final int FLAG_BIGRAM_CONTINUED = 0x80; - public static final int FLAG_BIGRAM_FREQ = 0x7F; - - public static final int FOR_REVERSE_LOOKUPALL = -99; - - public ArrayList mBigramToFill = new ArrayList(); - public ArrayList mBigramToFillAddress = new ArrayList(); - - public HashMap mBi; - - public boolean mHasBigram; - - public BigramDictionary(String bigramSrcFilename, boolean hasBigram) { - mHasBigram = hasBigram; - loadBigram(bigramSrcFilename); - } - - private void loadBigram(String filename) { - mBi = new HashMap(); - if (!mHasBigram) { - System.out.println("Number of bigrams = " + Bigram.sBigramNum); - return; - } - try { - SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); - parser.parse(new File(filename), new DefaultHandler() { - String w1 = null; - boolean inWord1 = false; - boolean inWord2 = false; - int freq = 0, counter = 0; - Bigram tempBigram = null; - - @Override - public void startElement(String uri, String localName, - String qName, Attributes attributes) { - if (qName.equals("bi")) { - inWord1 = true; - w1 = attributes.getValue(0); - int count = Integer.parseInt(attributes.getValue(1)); - tempBigram = new Bigram(count); - counter = 0; - } else if (qName.equals("w")) { - inWord2 = true; - String word2 = attributes.getValue(0); - int freq = Integer.parseInt(attributes.getValue(1)); - tempBigram.setWord2(counter, word2, freq); - counter++; - Bigram.sBigramNum++; - } - } - - @Override - public void endElement(String uri, String localName, - String qName) { - if (inWord2) { - inWord2 = false; - } else if (inWord1) { - inWord1 = false; - mBi.put(w1, tempBigram); - } - } - }); - } catch (Exception ioe) { - System.err.println("Exception in parsing bigram\n" + ioe); - ioe.printStackTrace(); - } - System.out.println("Number of bigrams = " + Bigram.sBigramNum); - } - - byte[] writeBigrams(byte[] dict, Map mDictionary) { - for (int i = 0; i < mBigramToFill.size(); i++) { - String w1 = mBigramToFill.get(i); - int address = mBigramToFillAddress.get(i); - - Bigram temp = mBi.get(w1); - int word2Count = temp.count; - int j4; - for (int j = 0; j < word2Count; j++) { - if (!mDictionary.containsKey(temp.word2[j])) { - System.out.println("Not in dictionary: " + temp.word2[j]); - System.exit(0); - } else { - j4 = (j * 4); - int addressOfWord2 = mDictionary.get(temp.word2[j]); - dict[address + j4 + 0] = (byte) (((addressOfWord2 & 0x3F0000) >> 16) - | FLAG_BIGRAM_READ); - dict[address + j4 + 1] = (byte) ((addressOfWord2 & 0x00FF00) >> 8); - dict[address + j4 + 2] = (byte) ((addressOfWord2 & 0x0000FF)); - - if (j == (word2Count - 1)) { - dict[address + j4 + 3] = (byte) (temp.freq[j] & FLAG_BIGRAM_FREQ); - } else { - dict[address + j4 + 3] = (byte) ((temp.freq[j] & FLAG_BIGRAM_FREQ) - | FLAG_BIGRAM_CONTINUED); - } - } - } - } - - return dict; - } - - void reverseLookupAll(Map mDictionary, byte[] dict) { - Set st = mDictionary.keySet(); - for (String s : st) { - searchForTerminalNode(mDictionary.get(s), FOR_REVERSE_LOOKUPALL, dict); - } - } - - void searchForTerminalNode(int bigramAddress, int frequency, byte[] dict) { - StringBuilder sb = new StringBuilder(48); - int pos; - boolean found = false; - int followDownBranchAddress = 2; - char followingChar = ' '; - int depth = 0; - int totalLoopCount = 0; - - while (!found) { - boolean followDownAddressSearchStop = false; - boolean firstAddress = true; - boolean haveToSearchAll = true; - - if (depth > 0) { - sb.append(followingChar); - } - pos = followDownBranchAddress; // pos start at count - int count = dict[pos] & 0xFF; - pos++; - for (int i = 0; i < count; i++) { - totalLoopCount++; - // pos at data - pos++; - // pos now at flag - if (!MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // non-terminal - if (!followDownAddressSearchStop) { - int addr = MakeBinaryDictionary.get22BitAddress(pos, dict); - if (addr > bigramAddress) { - followDownAddressSearchStop = true; - if (firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } else if (!haveToSearchAll) { - break; - } - } else { - followDownBranchAddress = addr; - followingChar = (char) (0xFF & dict[pos-1]); - if(firstAddress) { - firstAddress = false; - haveToSearchAll = false; - } - } - } - pos += 3; - } else if (MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // terminal - // found !! - if (bigramAddress == (pos-1)) { - sb.append((char) (0xFF & dict[pos-1])); - found = true; - break; - } - - // address + freq (4 byte) - if (MakeBinaryDictionary.getSecondBitOfByte(pos, dict)) { - if (!followDownAddressSearchStop) { - int addr = MakeBinaryDictionary.get22BitAddress(pos, dict); - if (addr > bigramAddress) { - followDownAddressSearchStop = true; - if (firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } else if (!haveToSearchAll) { - break; - } - } else { - followDownBranchAddress = addr; - followingChar = (char) (0xFF & dict[pos-1]); - if(firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } - } - } - pos += 4; - } else { // freq only (2 byte) - pos += 2; - } - // skipping bigram - int bigramExist = (dict[pos] & FLAG_BIGRAM_READ); - if (bigramExist > 0) { - int nextBigramExist = 1; - while (nextBigramExist > 0) { - pos += 3; - nextBigramExist = (dict[pos++] & FLAG_BIGRAM_CONTINUED); - } - } else { - pos++; - } - } - } - depth++; - if (followDownBranchAddress == 2) { - System.out.println("ERROR!!! Cannot find bigram!!"); - System.exit(0); - } - } - - if (frequency == FOR_REVERSE_LOOKUPALL) { - System.out.println("Reverse: " + sb.toString() + " (" + bigramAddress + ")" - + " Loop: " + totalLoopCount); - } else { - System.out.println(" bigram: " + sb.toString() + " (" + bigramAddress + ") freq: " - + frequency + " Loop: " + totalLoopCount); - } - } - - static class Bigram { - String[] word2; - int[] freq; - int count; - static int sBigramNum = 0; - - String getSecondWord(int i) { - return word2[i]; - } - - int getFrequency(int i) { - return (freq[i] == 0) ? 1 : freq[i]; - } - - void setWord2(int index, String word2, int freq) { - this.word2[index] = word2; - this.freq[index] = freq; - } - - public Bigram(int word2Count) { - count = word2Count; - word2 = new String[word2Count]; - freq = new int[word2Count]; - } - } -} diff --git a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java b/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java deleted file mode 100644 index 4a285ff07..000000000 --- a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.tools.dict; - -import org.xml.sax.Attributes; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -/** - * Compresses a list of words, frequencies, and bigram data - * into a tree structured binary dictionary. - * Dictionary Version: 200 (may contain bigrams) - * Version number started from 200 rather than 1 because we wanted to prevent number of roots in - * any old dictionaries being mistaken as the version number. There is not a chance that there - * will be more than 200 roots. Version number should be increased when there is structural change - * in the data. There is no need to increase the version when only the words in the data changes. - */ -public class MakeBinaryDictionary { - private static final int VERSION_NUM = 200; - - private static final String TAG_WORD = "w"; - private static final String ATTR_FREQ = "f"; - - private static final int FLAG_ADDRESS_MASK = 0x400000; - private static final int FLAG_TERMINAL_MASK = 0x800000; - private static final int ADDRESS_MASK = 0x3FFFFF; - - private static final int INITIAL_STRING_BUILDER_CAPACITY = 48; - - /** - * Unit for this variable is in bytes - * If destination file name is main.dict and file limit causes dictionary to be separated into - * multiple file, it will generate main0.dict, main1.dict, and so forth. - */ - private static int sOutputFileSize; - private static boolean sSplitOutput; - - private static final CharNode EMPTY_NODE = new CharNode(); - - private List mRoots; - private Map mDictionary; - private int mWordCount; - - private BigramDictionary mBigramDict; - - private static class CharNode { - char data; - int freq; - boolean terminal; - List children; - static int sNodes; - - public CharNode() { - sNodes++; - } - } - - private static void usage() { - System.err.println("Usage: makedict -s [-b ] " - + "-d [--size filesize]"); - System.exit(-1); - } - - public static void main(String[] args) { - int checkSource = -1; - int checkBigram = -1; - int checkDest = -1; - int checkFileSize = -1; - for (int i = 0; i < args.length; i+=2) { - if (args[i].equals("-s")) checkSource = (i + 1); - if (args[i].equals("-b")) checkBigram = (i + 1); - if (args[i].equals("-d")) checkDest = (i + 1); - if (args[i].equals("--size")) checkFileSize = (i + 1); - } - if (checkFileSize >= 0) { - sSplitOutput = true; - sOutputFileSize = Integer.parseInt(args[checkFileSize]); - } else { - sSplitOutput = false; - } - if (checkDest >= 0 && !args[checkDest].endsWith(".dict")) { - System.err.println("Error: Dictionary output file extension should be \".dict\""); - usage(); - } else if (checkSource >= 0 && checkBigram >= 0 && checkDest >= 0 && - ((!sSplitOutput && args.length == 6) || (sSplitOutput && args.length == 8))) { - new MakeBinaryDictionary(args[checkSource], args[checkBigram], args[checkDest]); - } else if (checkSource >= 0 && checkDest >= 0 && - ((!sSplitOutput && args.length == 4) || (sSplitOutput && args.length == 6))) { - new MakeBinaryDictionary(args[checkSource], null, args[checkDest]); - } else { - usage(); - } - } - - private MakeBinaryDictionary(String srcFilename, String bigramSrcFilename, - String destFilename) { - System.out.println("Generating dictionary version " + VERSION_NUM); - mBigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null)); - populateDictionary(srcFilename); - writeToDict(destFilename); - - // Enable the code below to verify that the generated tree is traversable - // and bigram data is stored correctly. - if (false) { - mBigramDict.reverseLookupAll(mDictionary, mDict); - traverseDict(2, new char[32], 0); - } - } - - private void populateDictionary(String filename) { - mRoots = new ArrayList(); - mDictionary = new HashMap(); - try { - SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); - parser.parse(new File(filename), new DefaultHandler() { - boolean inWord; - int freq; - StringBuilder wordBuilder = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY); - - @Override - public void startElement(String uri, String localName, - String qName, Attributes attributes) { - if (qName.equals(TAG_WORD)) { - inWord = true; - freq = Integer.parseInt(attributes.getValue(ATTR_FREQ)); - wordBuilder.setLength(0); - } - } - - @Override - public void characters(char[] data, int offset, int length) { - // Ignore other whitespace - if (!inWord) return; - wordBuilder.append(data, offset, length); - } - - @Override - public void endElement(String uri, String localName, - String qName) { - if (qName.equals(TAG_WORD)) { - if (wordBuilder.length() >= 1) { - addWordTop(wordBuilder.toString(), freq); - mWordCount++; - } - inWord = false; - } - } - }); - } catch (Exception ioe) { - System.err.println("Exception in parsing\n" + ioe); - ioe.printStackTrace(); - } - System.out.println("Nodes = " + CharNode.sNodes); - } - - private static int indexOf(List children, char c) { - if (children == null) { - return -1; - } - for (int i = 0; i < children.size(); i++) { - if (children.get(i).data == c) { - return i; - } - } - return -1; - } - - private void addWordTop(String word, int freq) { - if (freq < 0) { - freq = 0; - } else if (freq > 255) { - freq = 255; - } - char firstChar = word.charAt(0); - int index = indexOf(mRoots, firstChar); - if (index == -1) { - CharNode newNode = new CharNode(); - newNode.data = firstChar; - index = mRoots.size(); - mRoots.add(newNode); - } - final CharNode node = mRoots.get(index); - if (word.length() > 1) { - addWordRec(node, word, 1, freq); - } else { - node.terminal = true; - node.freq = freq; - } - } - - private void addWordRec(CharNode parent, String word, int charAt, int freq) { - CharNode child = null; - char data = word.charAt(charAt); - if (parent.children == null) { - parent.children = new ArrayList(); - } else { - for (int i = 0; i < parent.children.size(); i++) { - CharNode node = parent.children.get(i); - if (node.data == data) { - child = node; - break; - } - } - } - if (child == null) { - child = new CharNode(); - parent.children.add(child); - } - child.data = data; - if (word.length() > charAt + 1) { - addWordRec(child, word, charAt + 1, freq); - } else { - child.terminal = true; - child.freq = freq; - } - } - - private byte[] mDict; - private int mDictSize; - private static final int CHAR_WIDTH = 8; - private static final int FLAGS_WIDTH = 1; // Terminal flag (word end) - private static final int ADDR_WIDTH = 23; // Offset to children - private static final int FREQ_WIDTH_BYTES = 1; - private static final int COUNT_WIDTH_BYTES = 1; - - private void addCount(int count) { - mDict[mDictSize++] = (byte) (0xFF & count); - } - - private void addNode(CharNode node, String word1) { - if (node.terminal) { // store address of each word1 for bigram dic generation - mDictionary.put(word1, mDictSize); - } - - int charData = 0xFFFF & node.data; - if (charData > 254) { - mDict[mDictSize++] = (byte) 255; - mDict[mDictSize++] = (byte) ((node.data >> 8) & 0xFF); - mDict[mDictSize++] = (byte) (node.data & 0xFF); - } else { - mDict[mDictSize++] = (byte) (0xFF & node.data); - } - if (node.children != null) { - mDictSize += 3; // Space for children address - } else { - mDictSize += 1; // Space for just the terminal/address flags - } - if ((0xFFFFFF & node.freq) > 255) { - node.freq = 255; - } - if (node.terminal) { - byte freq = (byte) (0xFF & node.freq); - mDict[mDictSize++] = freq; - // bigram - if (mBigramDict.mBi.containsKey(word1)) { - int count = mBigramDict.mBi.get(word1).count; - mBigramDict.mBigramToFill.add(word1); - mBigramDict.mBigramToFillAddress.add(mDictSize); - mDictSize += (4 * count); - } else { - mDict[mDictSize++] = (byte) (0x00); - } - } - } - - private int mNullChildrenCount = 0; - private int mNotTerminalCount = 0; - - private void updateNodeAddress(int nodeAddress, CharNode node, - int childrenAddress) { - if ((mDict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character - nodeAddress += 2; - } - childrenAddress = ADDRESS_MASK & childrenAddress; - if (childrenAddress == 0) { - mNullChildrenCount++; - } else { - childrenAddress |= FLAG_ADDRESS_MASK; - } - if (node.terminal) { - childrenAddress |= FLAG_TERMINAL_MASK; - } else { - mNotTerminalCount++; - } - mDict[nodeAddress + 1] = (byte) (childrenAddress >> 16); - if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) { - mDict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8); - mDict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF)); - } - } - - private void writeWordsRec(List children, StringBuilder word) { - if (children == null || children.size() == 0) { - return; - } - final int childCount = children.size(); - addCount(childCount); - int[] childrenAddresses = new int[childCount]; - for (int j = 0; j < childCount; j++) { - CharNode child = children.get(j); - childrenAddresses[j] = mDictSize; - word.append(child.data); - addNode(child, word.toString()); - word.setLength(word.length() - 1); - } - for (int j = 0; j < childCount; j++) { - CharNode child = children.get(j); - int nodeAddress = childrenAddresses[j]; - int cacheDictSize = mDictSize; - word.append(child.data); - writeWordsRec(child.children, word); - word.setLength(word.length() - 1); - updateNodeAddress(nodeAddress, child, child.children != null ? cacheDictSize : 0); - } - } - - private void writeToDict(String dictFilename) { - // 4MB max, 22-bit offsets - mDict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably - // < 1MB in most cases, as there is a limit in the - // resource size in apks. - mDictSize = 0; - - mDict[mDictSize++] = (byte) (0xFF & VERSION_NUM); // version info - mDict[mDictSize++] = (byte) (0xFF & (mBigramDict.mHasBigram ? 1 : 0)); - - final StringBuilder word = new StringBuilder(INITIAL_STRING_BUILDER_CAPACITY); - writeWordsRec(mRoots, word); - mDict = mBigramDict.writeBigrams(mDict, mDictionary); - System.out.println("Dict Size = " + mDictSize); - if (!sSplitOutput) { - sOutputFileSize = mDictSize; - } - try { - int currentLoc = 0; - int i = 0; - int extension = dictFilename.indexOf(".dict"); - String filename = dictFilename.substring(0, extension); - while (mDictSize > 0) { - FileOutputStream fos; - if (sSplitOutput) { - fos = new FileOutputStream(filename + i + ".dict"); - } else { - fos = new FileOutputStream(filename + ".dict"); - } - if (mDictSize > sOutputFileSize) { - fos.write(mDict, currentLoc, sOutputFileSize); - mDictSize -= sOutputFileSize; - currentLoc += sOutputFileSize; - } else { - fos.write(mDict, currentLoc, mDictSize); - mDictSize = 0; - } - fos.close(); - i++; - } - } catch (IOException ioe) { - System.err.println("Error writing dict file:" + ioe); - } - } - - private void traverseDict(int pos, char[] word, int depth) { - int count = mDict[pos++] & 0xFF; - for (int i = 0; i < count; i++) { - char c = (char) (mDict[pos++] & 0xFF); - if (c == 0xFF) { // two byte character - c = (char) (((mDict[pos] & 0xFF) << 8) | (mDict[pos+1] & 0xFF)); - pos += 2; - } - word[depth] = c; - boolean terminal = getFirstBitOfByte(pos, mDict); - int address = 0; - if ((mDict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check - address = get22BitAddress(pos, mDict); - pos += 3; - } else { - pos += 1; - } - if (terminal) { - showWord(word, depth + 1, mDict[pos] & 0xFF); - pos++; - - int bigramExist = (mDict[pos] & mBigramDict.FLAG_BIGRAM_READ); - if (bigramExist > 0) { - int nextBigramExist = 1; - while (nextBigramExist > 0) { - int bigramAddress = get22BitAddress(pos, mDict); - pos += 3; - int frequency = (mBigramDict.FLAG_BIGRAM_FREQ & mDict[pos]); - mBigramDict.searchForTerminalNode(bigramAddress, frequency, mDict); - nextBigramExist = (mDict[pos++] & mBigramDict.FLAG_BIGRAM_CONTINUED); - } - } else { - pos++; - } - } - if (address != 0) { - traverseDict(address, word, depth + 1); - } - } - } - - private static void showWord(char[] word, int size, int freq) { - System.out.print(new String(word, 0, size) + " " + freq + "\n"); - } - - /* package */ static int get22BitAddress(int pos, byte[] dict) { - return ((dict[pos + 0] & 0x3F) << 16) - | ((dict[pos + 1] & 0xFF) << 8) - | ((dict[pos + 2] & 0xFF)); - } - - /* package */ static boolean getFirstBitOfByte(int pos, byte[] dict) { - return (dict[pos] & 0x80) > 0; - } - - /* package */ static boolean getSecondBitOfByte(int pos, byte[] dict) { - return (dict[pos] & 0x40) > 0; - } -} diff --git a/tools/makedict2/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java similarity index 100% rename from tools/makedict2/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java rename to tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java diff --git a/tools/makedict2/Android.mk b/tools/makedict2/Android.mk deleted file mode 100644 index e056168e9..000000000 --- a/tools/makedict2/Android.mk +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (C) 2011 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := $(call all-java-files-under,src) -LOCAL_SRC_FILES += $(call all-java-files-under,tests) -LOCAL_JAR_MANIFEST := etc/manifest.txt -LOCAL_MODULE_TAGS := eng -LOCAL_MODULE := makedict2 -LOCAL_JAVA_LIBRARIES := junit - -include $(BUILD_HOST_JAVA_LIBRARY) -include $(LOCAL_PATH)/etc/Android.mk diff --git a/tools/makedict2/etc/Android.mk b/tools/makedict2/etc/Android.mk deleted file mode 100644 index c71377cdf..000000000 --- a/tools/makedict2/etc/Android.mk +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2011 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_MODULE_TAGS := eng - -LOCAL_PREBUILT_EXECUTABLES := makedict2 -include $(BUILD_HOST_PREBUILT) diff --git a/tools/makedict2/etc/makedict2 b/tools/makedict2/etc/makedict2 deleted file mode 100755 index 1bad825e1..000000000 --- a/tools/makedict2/etc/makedict2 +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh -# Copyright 2011, The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Set up prog to be the path of this script, including following symlinks, -# and set up progdir to be the fully-qualified pathname of its directory. -prog="$0" -while [ -h "${prog}" ]; do - newProg=`/bin/ls -ld "${prog}"` - newProg=`expr "${newProg}" : ".* -> \(.*\)$"` - if expr "x${newProg}" : 'x/' >/dev/null; then - prog="${newProg}" - else - progdir=`dirname "${prog}"` - prog="${progdir}/${newProg}" - fi -done -oldwd=`pwd` -progdir=`dirname "${prog}"` -cd "${progdir}" -progdir=`pwd` -prog="${progdir}"/`basename "${prog}"` -cd "${oldwd}" - -jarfile=makedict2.jar -frameworkdir="$progdir" -if [ ! -r "$frameworkdir/$jarfile" ] -then - frameworkdir=`dirname "$progdir"`/tools/lib - libdir=`dirname "$progdir"`/tools/lib -fi -if [ ! -r "$frameworkdir/$jarfile" ] -then - frameworkdir=`dirname "$progdir"`/framework - libdir=`dirname "$progdir"`/lib -fi -if [ ! -r "$frameworkdir/$jarfile" ] -then - echo `basename "$prog"`": can't find $jarfile" - exit 1 -fi - -if [ "$OSTYPE" = "cygwin" ] ; then - jarpath=`cygpath -w "$frameworkdir/$jarfile"` - progdir=`cygpath -w "$progdir"` -else - jarpath="$frameworkdir/$jarfile" -fi - -# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored -# might need more memory, e.g. -Xmx128M -exec java -ea -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@" diff --git a/tools/makedict2/etc/manifest.txt b/tools/makedict2/etc/manifest.txt deleted file mode 100644 index 948609da6..000000000 --- a/tools/makedict2/etc/manifest.txt +++ /dev/null @@ -1 +0,0 @@ -Main-Class: com.android.inputmethod.latin.DictionaryMaker