am 36dd126a: Add ShortcutContentReader.
* commit '36dd126ab41ad9d95ce6844be89a8e430b5f51ff': Add ShortcutContentReader.main
commit
104ed74b6e
|
@ -51,9 +51,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
protected DictBuffer mDictBuffer;
|
protected DictBuffer mDictBuffer;
|
||||||
private DictBuffer mFrequencyBuffer;
|
private DictBuffer mFrequencyBuffer;
|
||||||
private DictBuffer mTerminalAddressTableBuffer;
|
private DictBuffer mTerminalAddressTableBuffer;
|
||||||
private DictBuffer mShortcutBuffer;
|
|
||||||
private BigramContentReader mBigramReader;
|
private BigramContentReader mBigramReader;
|
||||||
private SparseTable mShortcutAddressTable;
|
private ShortcutContentReader mShortcutReader;
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
||||||
|
@ -110,8 +109,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
mBigramReader = new BigramContentReader(mDictDirectory.getName(),
|
mBigramReader = new BigramContentReader(mDictDirectory.getName(),
|
||||||
mDictDirectory, mBufferFactory, false);
|
mDictDirectory, mBufferFactory, false);
|
||||||
mBigramReader.openBuffers();
|
mBigramReader.openBuffers();
|
||||||
mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
|
mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory,
|
||||||
loadShortcutAddressSparseTable();
|
mBufferFactory);
|
||||||
|
mShortcutReader.openBuffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -136,21 +136,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Let's have something like SparseTableContentsReader in this class.
|
|
||||||
private void loadShortcutAddressSparseTable() throws IOException {
|
|
||||||
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
|
|
||||||
+ FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
|
|
||||||
final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
|
|
||||||
+ FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
|
|
||||||
+ FormatSpec.SHORTCUT_CONTENT_ID);
|
|
||||||
final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName()
|
|
||||||
+ FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
|
|
||||||
+ FormatSpec.SHORTCUT_CONTENT_ID);
|
|
||||||
mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
|
|
||||||
new File[] { contentFile, timestampsFile },
|
|
||||||
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An auxiliary class for reading bigrams.
|
* An auxiliary class for reading bigrams.
|
||||||
*/
|
*/
|
||||||
|
@ -194,34 +179,71 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
||||||
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
||||||
new SparseTableContentReaderInterface() {
|
new SparseTableContentReaderInterface() {
|
||||||
@Override
|
@Override
|
||||||
public void read(final DictBuffer buffer) {
|
public void read(final DictBuffer buffer) {
|
||||||
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||||
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
||||||
// remaining bigram entries are ignored.
|
// remaining bigram entries are ignored.
|
||||||
final int bigramFlags = buffer.readUnsignedByte();
|
final int bigramFlags = buffer.readUnsignedByte();
|
||||||
final int targetTerminalId = buffer.readUnsignedInt24();
|
final int targetTerminalId = buffer.readUnsignedInt24();
|
||||||
terminalAddressTableBuffer.position(
|
terminalAddressTableBuffer.position(targetTerminalId
|
||||||
targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
* FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||||
final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24();
|
final int targetAddress =
|
||||||
bigrams.add(new PendingAttribute(
|
terminalAddressTableBuffer.readUnsignedInt24();
|
||||||
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
bigrams.add(new PendingAttribute(bigramFlags
|
||||||
targetAddress));
|
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
||||||
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
targetAddress));
|
||||||
break;
|
if (0 == (bigramFlags
|
||||||
|
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||||
|
throw new RuntimeException("Too many bigrams in a PtNode ("
|
||||||
|
+ bigrams.size() + " but max is "
|
||||||
|
+ FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
|
|
||||||
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (bigrams.isEmpty()) return null;
|
if (bigrams.isEmpty()) return null;
|
||||||
return bigrams;
|
return bigrams;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An auxiliary class for reading shortcuts.
|
||||||
|
*/
|
||||||
|
protected static class ShortcutContentReader extends SparseTableContentReader {
|
||||||
|
public ShortcutContentReader(final String name, final File baseDir,
|
||||||
|
final DictionaryBufferFactory factory) {
|
||||||
|
super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
|
||||||
|
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||||
|
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
|
||||||
|
new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, factory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<WeightedString> readShortcuts(final int terminalId) {
|
||||||
|
final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
|
||||||
|
read(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
|
||||||
|
new SparseTableContentReaderInterface() {
|
||||||
|
@Override
|
||||||
|
public void read(final DictBuffer buffer) {
|
||||||
|
while (true) {
|
||||||
|
final int flags = buffer.readUnsignedByte();
|
||||||
|
final String word = CharEncoding.readString(buffer);
|
||||||
|
shortcuts.add(new WeightedString(word,
|
||||||
|
flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
|
||||||
|
if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (shortcuts.isEmpty()) return null;
|
||||||
|
return shortcuts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
||||||
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
||||||
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
||||||
|
@ -233,23 +255,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private ArrayList<WeightedString> readShortcuts(final int terminalId) {
|
|
||||||
if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;
|
|
||||||
|
|
||||||
final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
|
|
||||||
final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX,
|
|
||||||
terminalId);
|
|
||||||
mShortcutBuffer.position(posOfShortcuts);
|
|
||||||
while (true) {
|
|
||||||
final int flags = mShortcutBuffer.readUnsignedByte();
|
|
||||||
final String word = CharEncoding.readString(mShortcutBuffer);
|
|
||||||
ret.add(new WeightedString(word,
|
|
||||||
flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
|
|
||||||
if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Make this buffer thread safe.
|
// TODO: Make this buffer thread safe.
|
||||||
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
|
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
|
||||||
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
|
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
|
||||||
|
@ -300,7 +305,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
childrenAddress += addressPointer;
|
childrenAddress += addressPointer;
|
||||||
}
|
}
|
||||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||||
final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
|
final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(terminalId);
|
||||||
final ArrayList<PendingAttribute> bigrams =
|
final ArrayList<PendingAttribute> bigrams =
|
||||||
mBigramReader.readTargetsAndFrequencies(terminalId,
|
mBigramReader.readTargetsAndFrequencies(terminalId,
|
||||||
mTerminalAddressTableBuffer);
|
mTerminalAddressTableBuffer);
|
||||||
|
|
Loading…
Reference in New Issue