From 25de86a6a26232964937872f203f065dd9f9faa4 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Wed, 10 Jul 2013 20:16:13 +0900
Subject: [PATCH] [FD4] Separate cached address before/after update for groups

This should fix bug#8526576 for good.

Bug: 8526576
Change-Id: I473aad26b69d64efa09d2ec9d8e69f29f5cf4819
---
 .../latin/makedict/BinaryDictInputOutput.java | 122 ++++++++++--------
 .../latin/makedict/FusionDictionary.java      |  12 +-
 2 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index a54344ab5..1b187d85d 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -518,14 +518,56 @@ public final class BinaryDictInputOutput {
     }
 
     /**
-     * Finds the absolute address of a word in the dictionary.
+     * Get the offset from a position inside a current node to a target node, during update.
      *
-     * @param dict the dictionary in which to search.
-     * @param word the word we are searching for.
-     * @return the word address. If it is not found, an exception is thrown.
+     * If the current node is before the target node, the target node has not been updated yet,
+     * so we should return the offset from the old position of the current node to the old position
+     * of the target node. If on the other hand the target is before the current node, it already
+     * has been updated, so we should return the offset from the new position in the current node
+     * to the new position in the target node.
+     * @param currentNode the node containing the CharGroup where the offset will be written
+     * @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
+     * @param targetNode the target node to get the offset to
+     * @return the offset to the target node
      */
-    private static int findAddressOfWord(final FusionDictionary dict, final String word) {
-        return FusionDictionary.findWordInTree(dict.mRoot, word).mCachedAddress;
+    private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode,
+            final int offsetFromStartOfCurrentNode, final Node targetNode) {
+        final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate
+                < currentNode.mCachedAddressBeforeUpdate);
+        if (isTargetBeforeCurrent) {
+            return targetNode.mCachedAddressAfterUpdate
+                    - (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode);
+        } else {
+            return targetNode.mCachedAddressBeforeUpdate
+                    - (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode);
+        }
+    }
+
+    /**
+     * Get the offset from a position inside a current node to a target CharGroup, during update.
+     * @param currentNode the node containing the CharGroup where the offset will be written
+     * @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
+     * @param targetCharGroup the target CharGroup to get the offset to
+     * @return the offset to the target CharGroup
+     */
+    // TODO: is there any way to factorize this method with the one above?
+    private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode,
+            final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) {
+        final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate
+                + offsetFromStartOfCurrentNode;
+        final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
+                < oldOffsetBasePoint);
+        // If the target is before the current node, then its address has already been updated.
+        // We can use the AfterUpdate member, and compare it to our own member after update.
+        // Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate
+        // member, and of course we have to compare this to our own address before update.
+        if (isTargetBeforeCurrent) {
+            final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate
+                    + offsetFromStartOfCurrentNode;
+            return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
+        } else {
+            return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
+        }
     }
 
     /**
@@ -548,30 +590,28 @@ public final class BinaryDictInputOutput {
         boolean changed = false;
         int size = getGroupCountSize(node);
         for (CharGroup group : node.mData) {
-            if (group.mCachedAddress != node.mCachedAddressBeforeUpdate + size) {
+            group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size;
+            if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
                 changed = true;
-                group.mCachedAddress = node.mCachedAddressBeforeUpdate + size;
             }
             int groupSize = getGroupHeaderSize(group, formatOptions);
             if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
             if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) {
                 groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
             } else if (null != group.mChildren) {
-                final int offsetBasePoint = groupSize + node.mCachedAddressBeforeUpdate + size;
-                final int offset = group.mChildren.mCachedAddressBeforeUpdate - offsetBasePoint;
                 if (formatOptions.mSupportsDynamicUpdate) {
                     groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
                 } else {
-                    groupSize += getByteSize(offset);
+                    groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node,
+                            groupSize + size, group.mChildren));
                 }
             }
             groupSize += getShortcutListSize(group.mShortcutTargets);
             if (null != group.mBigrams) {
                 for (WeightedString bigram : group.mBigrams) {
-                    final int offsetBasePoint = groupSize + node.mCachedAddressBeforeUpdate + size
-                            + FormatSpec.GROUP_FLAGS_SIZE;
-                    final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
-                    final int offset = addressOfBigram - offsetBasePoint;
+                    final int offset = getOffsetToTargetCharGroupDuringUpdate(node,
+                            groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
+                            FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord));
                     groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
                 }
             }
@@ -603,7 +643,8 @@ public final class BinaryDictInputOutput {
             int groupCountSize = getGroupCountSize(n);
             int groupOffset = 0;
             for (final CharGroup g : n.mData) {
-                g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
+                g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
+                        groupCountSize + nodeOffset + groupOffset;
                 groupOffset += g.mCachedSize;
             }
             final int nodeSize = groupCountSize + groupOffset
@@ -618,36 +659,14 @@ public final class BinaryDictInputOutput {
      * Updates the cached addresses of nodes after recomputing their new positions.
      *
      * @param flatNodes the array of nodes.
-     * @param formatOptions file format options.
-     * @return the byte size of the entire stack.
      */
-    private static int updateNodeCachedAddresses(final ArrayList<Node> flatNodes,
-            final FormatOptions formatOptions) {
-        int nodeOffset = 0;
+    private static void updateNodeCachedAddresses(final ArrayList<Node> flatNodes) {
         for (final Node n : flatNodes) {
             n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
-            int groupCountSize = getGroupCountSize(n);
-            int groupOffset = 0;
             for (final CharGroup g : n.mData) {
-                // TODO: just copy cached address after update into cached address before update
-                // when the two fields are separated.
-                g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
-                groupOffset += g.mCachedSize;
+                g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
             }
-            final int nodeSize = groupCountSize + groupOffset
-                    + (formatOptions.mSupportsDynamicUpdate
-                            ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
-            if (nodeSize != n.mCachedSize) {
-                // TODO: remove this test when the addresses are separated
-                throw new RuntimeException("Bug : Stored and computed node size differ");
-            }
-            if (nodeOffset != n.mCachedAddressAfterUpdate) {
-                // TODO: remove this test when the code is well tested
-                throw new RuntimeException("Bug : Stored and computed node address differ");
-            }
-            nodeOffset += n.mCachedSize;
         }
-        return nodeOffset;
     }
 
     /**
@@ -665,7 +684,7 @@ public final class BinaryDictInputOutput {
                     // Assign my address to children's parent address
                     // Here BeforeUpdate and AfterUpdate addresses have the same value, so it
                     // does not matter which we use.
-                    group.mChildren.mCachedParentAddress = group.mCachedAddress
+                    group.mChildren.mCachedParentAddress = group.mCachedAddressAfterUpdate
                             - group.mChildren.mCachedAddressAfterUpdate;
                 }
             }
@@ -710,7 +729,7 @@ public final class BinaryDictInputOutput {
                 nodeStartOffset += newNodeSize;
                 changesDone |= changed;
             }
-            updateNodeCachedAddresses(flatNodes, formatOptions);
+            updateNodeCachedAddresses(flatNodes);
             ++passes;
             if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
         } while (changesDone);
@@ -1003,10 +1022,11 @@ public final class BinaryDictInputOutput {
         }
         int groupAddress = index;
         for (int i = 0; i < groupCount; ++i) {
-            CharGroup group = node.mData.get(i);
-            if (index != group.mCachedAddress) throw new RuntimeException("Bug: write index is not "
-                    + "the same as the cached address of the group : "
-                    + index + " <> " + group.mCachedAddress);
+            final CharGroup group = node.mData.get(i);
+            if (index != group.mCachedAddressAfterUpdate) {
+                throw new RuntimeException("Bug: write index is not the same as the cached address "
+                        + "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
+            }
             groupAddress += getGroupHeaderSize(group, formatOptions);
             // Sanity checks.
             if (DBG && group.mFrequency > FormatSpec.MAX_TERMINAL_FREQUENCY) {
@@ -1018,14 +1038,14 @@ public final class BinaryDictInputOutput {
             final int childrenOffset = null == group.mChildren
                     ? FormatSpec.NO_CHILDREN_ADDRESS
                             : group.mChildren.mCachedAddressAfterUpdate - groupAddress;
-            byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
-            buffer[index++] = flags;
+            buffer[index++] =
+                    makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
 
             if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
                 index = writeParentAddress(buffer, index, parentAddress, formatOptions);
             } else {
-                index = writeParentAddress(buffer, index,
-                        parentAddress + (node.mCachedAddressAfterUpdate - group.mCachedAddress),
+                index = writeParentAddress(buffer, index, parentAddress
+                        + (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
                         formatOptions);
             }
 
@@ -1076,7 +1096,7 @@ public final class BinaryDictInputOutput {
                     final WeightedString bigram = bigramIterator.next();
                     final CharGroup target =
                             FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord);
-                    final int addressOfBigram = target.mCachedAddress;
+                    final int addressOfBigram = target.mCachedAddressAfterUpdate;
                     final int unigramFrequencyForThisWord = target.mFrequency;
                     ++groupAddress;
                     final int offset = addressOfBigram - groupAddress;
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 5530b7a74..1132e4a4a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -111,9 +111,15 @@ public final class FusionDictionary implements Iterable<Word> {
         Node mChildren;
         boolean mIsNotAWord; // Only a shortcut
         boolean mIsBlacklistEntry;
-        // The two following members to help with binary generation
-        int mCachedSize;
-        int mCachedAddress;
+        // mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
+        // generation. Before and After always hold the same value except during dictionary
+        // address compression, where the update process needs to know about both values at the
+        // same time. Updating will update the AfterUpdate value, and the code will move them
+        // to BeforeUpdate before the next update pass.
+        // The update process does not need two versions of mCachedSize.
+        int mCachedSize; // The size, in bytes, of this char group.
+        int mCachedAddressBeforeUpdate; // The address of this char group (before update)
+        int mCachedAddressAfterUpdate; // The address of this char group (after update)
 
         public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
                 final ArrayList<WeightedString> bigrams, final int frequency,