Split the sentences into words ourselves

The framework's default split is not suitable for all
languages. Also it does not perform very well when space
is mistyped as period.

Bug: 9063355
Bug: 10780091
Change-Id: I400d790ff1c29f221697fd94d79bbf67c61c7b8a
This commit is contained in:
Jean Chalard 2014-06-09 14:51:17 +09:00
parent a2a4ee2765
commit 7a6bc607ca
2 changed files with 243 additions and 2 deletions

View file

@ -16,6 +16,7 @@
package com.android.inputmethod.latin.spellcheck; package com.android.inputmethod.latin.spellcheck;
import android.content.res.Resources;
import android.os.Binder; import android.os.Binder;
import android.text.TextUtils; import android.text.TextUtils;
import android.util.Log; import android.util.Log;
@ -26,14 +27,18 @@ import android.view.textservice.TextInfo;
import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.PrevWordsInfo;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Locale;
public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheckerSession { public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheckerSession {
private static final String TAG = AndroidSpellCheckerSession.class.getSimpleName(); private static final String TAG = AndroidSpellCheckerSession.class.getSimpleName();
private static final boolean DBG = false; private static final boolean DBG = false;
private final static String[] EMPTY_STRING_ARRAY = new String[0]; private final static String[] EMPTY_STRING_ARRAY = new String[0];
private final Resources mResources;
private SentenceLevelAdapter mSentenceLevelAdapter;
public AndroidSpellCheckerSession(AndroidSpellCheckerService service) { public AndroidSpellCheckerSession(AndroidSpellCheckerService service) {
super(service); super(service);
mResources = service.getResources();
} }
private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote(TextInfo ti, private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote(TextInfo ti,
@ -115,8 +120,7 @@ public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheck
@Override @Override
public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple(TextInfo[] textInfos, public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple(TextInfo[] textInfos,
int suggestionsLimit) { int suggestionsLimit) {
final SentenceSuggestionsInfo[] retval = final SentenceSuggestionsInfo[] retval = splitAndSuggest(textInfos, suggestionsLimit);
super.onGetSentenceSuggestionsMultiple(textInfos, suggestionsLimit);
if (retval == null || retval.length != textInfos.length) { if (retval == null || retval.length != textInfos.length) {
return retval; return retval;
} }
@ -130,6 +134,58 @@ public final class AndroidSpellCheckerSession extends AndroidWordLevelSpellCheck
return retval; return retval;
} }
/**
* Get sentence suggestions for specified texts in an array of TextInfo. This is taken from
* SpellCheckerService#onGetSentenceSuggestionsMultiple that we can't use because it's
* using private variables.
* The default implementation splits the input text to words and returns
* {@link SentenceSuggestionsInfo} which contains suggestions for each word.
* This function will run on the incoming IPC thread.
* So, this is not called on the main thread,
* but will be called in series on another thread.
* @param textInfos an array of the text metadata
* @param suggestionsLimit the maximum number of suggestions to be returned
* @return an array of {@link SentenceSuggestionsInfo} returned by
* {@link SpellCheckerService.Session#onGetSuggestions(TextInfo, int)}
*/
private SentenceSuggestionsInfo[] splitAndSuggest(TextInfo[] textInfos, int suggestionsLimit) {
if (textInfos == null || textInfos.length == 0) {
return SentenceLevelAdapter.EMPTY_SENTENCE_SUGGESTIONS_INFOS;
}
SentenceLevelAdapter sentenceLevelAdapter;
synchronized(this) {
sentenceLevelAdapter = mSentenceLevelAdapter;
if (sentenceLevelAdapter == null) {
final String localeStr = getLocale();
if (!TextUtils.isEmpty(localeStr)) {
sentenceLevelAdapter = new SentenceLevelAdapter(mResources,
new Locale(localeStr));
mSentenceLevelAdapter = sentenceLevelAdapter;
}
}
}
if (sentenceLevelAdapter == null) {
return SentenceLevelAdapter.EMPTY_SENTENCE_SUGGESTIONS_INFOS;
}
final int infosSize = textInfos.length;
final SentenceSuggestionsInfo[] retval = new SentenceSuggestionsInfo[infosSize];
for (int i = 0; i < infosSize; ++i) {
final SentenceLevelAdapter.SentenceTextInfoParams textInfoParams =
sentenceLevelAdapter.getSplitWords(textInfos[i]);
final ArrayList<SentenceLevelAdapter.SentenceWordItem> mItems =
textInfoParams.mItems;
final int itemsSize = mItems.size();
final TextInfo[] splitTextInfos = new TextInfo[itemsSize];
for (int j = 0; j < itemsSize; ++j) {
splitTextInfos[j] = mItems.get(j).mTextInfo;
}
retval[i] = SentenceLevelAdapter.reconstructSuggestions(
textInfoParams, onGetSuggestionsMultiple(
splitTextInfos, suggestionsLimit, true));
}
return retval;
}
@Override @Override
public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos,
int suggestionsLimit, boolean sequentialWords) { int suggestionsLimit, boolean sequentialWords) {

View file

@ -0,0 +1,185 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.spellcheck;
import android.content.res.Resources;
import android.view.textservice.SentenceSuggestionsInfo;
import android.view.textservice.SuggestionsInfo;
import android.view.textservice.TextInfo;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.RunInLocale;
import java.util.ArrayList;
import java.util.Locale;
/**
* This code is mostly lifted directly from android.service.textservice.SpellCheckerService in
* the framework; maybe that should be protected instead, so that implementers don't have to
* rewrite everything for any small change.
*/
public class SentenceLevelAdapter {
public static final SentenceSuggestionsInfo[] EMPTY_SENTENCE_SUGGESTIONS_INFOS =
new SentenceSuggestionsInfo[] {};
private static final SuggestionsInfo EMPTY_SUGGESTIONS_INFO = new SuggestionsInfo(0, null);
/**
* Container for split TextInfo parameters
*/
public static class SentenceWordItem {
public final TextInfo mTextInfo;
public final int mStart;
public final int mLength;
public SentenceWordItem(TextInfo ti, int start, int end) {
mTextInfo = ti;
mStart = start;
mLength = end - start;
}
}
/**
* Container for originally queried TextInfo and parameters
*/
public static class SentenceTextInfoParams {
final TextInfo mOriginalTextInfo;
final ArrayList<SentenceWordItem> mItems;
final int mSize;
public SentenceTextInfoParams(TextInfo ti, ArrayList<SentenceWordItem> items) {
mOriginalTextInfo = ti;
mItems = items;
mSize = items.size();
}
}
private static class WordIterator {
private final SpacingAndPunctuations mSpacingAndPunctuations;
public WordIterator(final Resources res, final Locale locale) {
final RunInLocale<SpacingAndPunctuations> job
= new RunInLocale<SpacingAndPunctuations>() {
@Override
protected SpacingAndPunctuations job(final Resources res) {
return new SpacingAndPunctuations(res);
}
};
mSpacingAndPunctuations = job.runInLocale(res, locale);
}
public int getEndOfWord(final CharSequence sequence, int index) {
final int length = sequence.length();
index = index < 0 ? 0 : Character.offsetByCodePoints(sequence, index, 1);
while (index < length) {
final int codePoint = Character.codePointAt(sequence, index);
if (mSpacingAndPunctuations.isWordSeparator(codePoint)) {
// If it's a period, we want to stop here only if it's followed by another
// word separator. In all other cases we stop here.
if (Constants.CODE_PERIOD == codePoint) {
final int indexOfNextCodePoint =
index + Character.charCount(Constants.CODE_PERIOD);
if (indexOfNextCodePoint < length
&& mSpacingAndPunctuations.isWordSeparator(
Character.codePointAt(sequence, indexOfNextCodePoint))) {
return index;
}
} else {
return index;
}
}
index += Character.charCount(codePoint);
}
return index;
}
public int getBeginningOfNextWord(final CharSequence sequence, int index) {
final int length = sequence.length();
if (index >= length) {
return -1;
}
index = index < 0 ? 0 : Character.offsetByCodePoints(sequence, index, 1);
while (index < length) {
final int codePoint = Character.codePointAt(sequence, index);
if (!mSpacingAndPunctuations.isWordSeparator(codePoint)) {
return index;
}
index += Character.charCount(codePoint);
}
return -1;
}
}
private final WordIterator mWordIterator;
public SentenceLevelAdapter(final Resources res, final Locale locale) {
mWordIterator = new WordIterator(res, locale);
}
public SentenceTextInfoParams getSplitWords(TextInfo originalTextInfo) {
final WordIterator wordIterator = mWordIterator;
final CharSequence originalText = originalTextInfo.getText();
final int cookie = originalTextInfo.getCookie();
final int start = -1;
final int end = originalText.length();
final ArrayList<SentenceWordItem> wordItems = new ArrayList<SentenceWordItem>();
int wordStart = wordIterator.getBeginningOfNextWord(originalText, start);
int wordEnd = wordIterator.getEndOfWord(originalText, wordStart);
while (wordStart <= end && wordEnd != -1 && wordStart != -1) {
if (wordEnd >= start && wordEnd > wordStart) {
final String query = originalText.subSequence(wordStart, wordEnd).toString();
final TextInfo ti = new TextInfo(query, cookie, query.hashCode());
wordItems.add(new SentenceWordItem(ti, wordStart, wordEnd));
}
wordStart = wordIterator.getBeginningOfNextWord(originalText, wordEnd);
if (wordStart == -1) {
break;
}
wordEnd = wordIterator.getEndOfWord(originalText, wordStart);
}
return new SentenceTextInfoParams(originalTextInfo, wordItems);
}
public static SentenceSuggestionsInfo reconstructSuggestions(
SentenceTextInfoParams originalTextInfoParams, SuggestionsInfo[] results) {
if (results == null || results.length == 0) {
return null;
}
if (originalTextInfoParams == null) {
return null;
}
final int originalCookie = originalTextInfoParams.mOriginalTextInfo.getCookie();
final int originalSequence =
originalTextInfoParams.mOriginalTextInfo.getSequence();
final int querySize = originalTextInfoParams.mSize;
final int[] offsets = new int[querySize];
final int[] lengths = new int[querySize];
final SuggestionsInfo[] reconstructedSuggestions = new SuggestionsInfo[querySize];
for (int i = 0; i < querySize; ++i) {
final SentenceWordItem item = originalTextInfoParams.mItems.get(i);
SuggestionsInfo result = null;
for (int j = 0; j < results.length; ++j) {
final SuggestionsInfo cur = results[j];
if (cur != null && cur.getSequence() == item.mTextInfo.getSequence()) {
result = cur;
result.setCookieAndSequence(originalCookie, originalSequence);
break;
}
}
offsets[i] = item.mStart;
lengths[i] = item.mLength;
reconstructedSuggestions[i] = result != null ? result : EMPTY_SUGGESTIONS_INFO;
}
return new SentenceSuggestionsInfo(reconstructedSuggestions, offsets, lengths);
}
}