LatinIME/java/src/com/android/inputmethod/latin/StringUtils.java

/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin;

import android.text.TextUtils;

import java.util.ArrayList;
import java.util.Locale;

public final class StringUtils {
    public static final int CAPITALIZE_NONE = 0;  // No caps, or mixed case
    public static final int CAPITALIZE_FIRST = 1; // First only
    public static final int CAPITALIZE_ALL = 2;   // All caps

    private StringUtils() {
        // This utility class is not publicly instantiable.
    }

    public static int codePointCount(final String text) {
        if (TextUtils.isEmpty(text)) return 0;
        return text.codePointCount(0, text.length());
    }

    public static boolean containsInArray(final String key, final String[] array) {
        for (final String element : array) {
            if (key.equals(element)) return true;
        }
        return false;
    }

    public static boolean containsInCsv(final String key, final String csv) {
        if (TextUtils.isEmpty(csv)) return false;
        return containsInArray(key, csv.split(","));
    }

    public static String appendToCsvIfNotExists(final String key, final String csv) {
        if (TextUtils.isEmpty(csv)) return key;
        if (containsInCsv(key, csv)) return csv;
        return csv + "," + key;
    }

    public static String removeFromCsvIfExists(final String key, final String csv) {
        if (TextUtils.isEmpty(csv)) return "";
        final String[] elements = csv.split(",");
        if (!containsInArray(key, elements)) return csv;
        final ArrayList<String> result = CollectionUtils.newArrayList(elements.length - 1);
        for (final String element : elements) {
            if (!key.equals(element)) result.add(element);
        }
        return TextUtils.join(",", result);
    }

    /**
     * Find a string that start with specified prefix from an array.
     *
     * @param prefix a prefix string to find.
     * @param array an string array to be searched.
     * @return the rest part of the string that starts with the prefix.
     * Returns null if it couldn't be found.
     */
    public static String findPrefixedString(final String prefix, final String[] array) {
        for (final String element : array) {
            if (element.startsWith(prefix)) {
                return element.substring(prefix.length());
            }
        }
        return null;
    }

    /**
     * Remove duplicates from an array of strings.
     *
     * This method will always keep the first occurrence of all strings at their position
     * in the array, removing the subsequent ones.
     */
    public static void removeDupes(final ArrayList<String> suggestions) {
        if (suggestions.size() < 2) return;
        int i = 1;
        // Don't cache suggestions.size(), since we may be removing items
        while (i < suggestions.size()) {
            final String cur = suggestions.get(i);
            // Compare each suggestion with each previous suggestion
            for (int j = 0; j < i; j++) {
                final String previous = suggestions.get(j);
                if (TextUtils.equals(cur, previous)) {
                    suggestions.remove(i);
                    i--;
                    break;
                }
            }
            i++;
        }
    }

    public static String toTitleCase(final String s, final Locale locale) {
        if (s.length() <= 1) {
            // TODO: is this really correct? Shouldn't this be s.toUpperCase()?
            return s;
        }
        // TODO: fix the bugs below
        // - This does not work for Greek, because it returns upper case instead of title case.
        // - It does not work for Serbian, because it fails to account for the "lj" character,
        // which should be "Lj" in title case and "LJ" in upper case.
        // - It does not work for Dutch, because it fails to account for the "ij" digraph, which
        // are two different characters but both should be capitalized as "IJ" as if they were
        // a single letter.
        // - It also does not work with unicode surrogate code points.
        return s.toUpperCase(locale).charAt(0) + s.substring(1);
    }

    private static final int[] EMPTY_CODEPOINTS = {};

    public static int[] toCodePointArray(final String string) {
        final int length = string.length();
        if (length <= 0) {
            return EMPTY_CODEPOINTS;
        }
        final int[] codePoints = new int[string.codePointCount(0, length)];
        int destIndex = 0;
        for (int index = 0; index < length; index = string.offsetByCodePoints(index, 1)) {
            codePoints[destIndex] = string.codePointAt(index);
            destIndex++;
        }
        return codePoints;
    }

    public static String[] parseCsvString(final String text) {
        final int size = text.length();
        if (size == 0) {
            return null;
        }
        if (codePointCount(text) == 1) {
            return text.codePointAt(0) == Constants.CSV_SEPARATOR ? null : new String[] { text };
        }

        ArrayList<String> list = null;
        int start = 0;
        for (int pos = 0; pos < size; pos++) {
            final char c = text.charAt(pos);
            if (c == Constants.CSV_SEPARATOR) {
                // Skip empty entry.
                if (pos - start > 0) {
                    if (list == null) {
                        list = CollectionUtils.newArrayList();
                    }
                    list.add(text.substring(start, pos));
                }
                // Skip comma
                start = pos + 1;
            } else if (c == Constants.CSV_ESCAPE) {
                // Skip escape character and escaped character.
                pos++;
            }
        }
        final String remain = (size - start > 0) ? text.substring(start) : null;
        if (list == null) {
            return remain != null ? new String[] { remain } : null;
        }
        if (remain != null) {
            list.add(remain);
        }
        return list.toArray(new String[list.size()]);
    }

    // This method assumes the text is not empty or null.
    public static int getCapitalizationType(final String text) {
        // If the first char is not uppercase, then the word is either all lower case or
        // camel case, and in either case we return CAPITALIZE_NONE.
        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
        final int len = text.length();
        int capsCount = 1;
        int letterCount = 1;
        for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
            if (1 != capsCount && letterCount != capsCount) break;
            final int codePoint = text.codePointAt(i);
            if (Character.isUpperCase(codePoint)) {
                ++capsCount;
                ++letterCount;
            } else if (Character.isLetter(codePoint)) {
                // We need to discount non-letters since they may not be upper-case, but may
                // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME")
                ++letterCount;
            }
        }
        // We know the first char is upper case. So we want to test if either every letter other
        // than the first is lower case, or if they are all upper case. If the string is exactly
        // one char long, then we will arrive here with letterCount 1, and this is correct, too.
        if (1 == capsCount) return CAPITALIZE_FIRST;
        return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
    }
}
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`/*`
			`* Copyright (C) 2012 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`package com.android.inputmethod.latin;`

			`import android.text.TextUtils;`

			`import java.util.ArrayList;`
Use keyboardSet extra value of subtype to specify layout type Change-Id: Ice1f345a08a8d760e3b847c885c4072e3e142c97 2012-04-04 05:30:42 +00:00			`import java.util.Locale;`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00
Make utility classes final Change-Id: Ic4f69b4c8da33c9fca50d6829724179310c2f875 2012-08-29 08:26:00 +00:00			`public final class StringUtils {`
Move a generic string utility to StringUtils Change-Id: I9cc2e9a7ac0b3346af40bcb083f939333336cf09 2013-04-04 12:14:37 +00:00			`public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case`
			`public static final int CAPITALIZE_FIRST = 1; // First only`
			`public static final int CAPITALIZE_ALL = 2; // All caps`

Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`private StringUtils() {`
			`// This utility class is not publicly instantiable.`
			`}`

Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static int codePointCount(final String text) {`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`if (TextUtils.isEmpty(text)) return 0;`
			`return text.codePointCount(0, text.length());`
			`}`

Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static boolean containsInArray(final String key, final String[] array) {`
Change predefined additional subtype format in preference This change also refactor StringUtils class Change-Id: Ie0b4d169b21c260bf238d6fcc9ab0ee8bfd6b508 2012-04-19 07:39:25 +00:00			`for (final String element : array) {`
			`if (key.equals(element)) return true;`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`}`
			`return false;`
			`}`

Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static boolean containsInCsv(final String key, final String csv) {`
Change predefined additional subtype format in preference This change also refactor StringUtils class Change-Id: Ie0b4d169b21c260bf238d6fcc9ab0ee8bfd6b508 2012-04-19 07:39:25 +00:00			`if (TextUtils.isEmpty(csv)) return false;`
			`return containsInArray(key, csv.split(","));`
			`}`

Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static String appendToCsvIfNotExists(final String key, final String csv) {`
Change predefined additional subtype format in preference This change also refactor StringUtils class Change-Id: Ie0b4d169b21c260bf238d6fcc9ab0ee8bfd6b508 2012-04-19 07:39:25 +00:00			`if (TextUtils.isEmpty(csv)) return key;`
			`if (containsInCsv(key, csv)) return csv;`
			`return csv + "," + key;`
			`}`

Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static String removeFromCsvIfExists(final String key, final String csv) {`
Change predefined additional subtype format in preference This change also refactor StringUtils class Change-Id: Ie0b4d169b21c260bf238d6fcc9ab0ee8bfd6b508 2012-04-19 07:39:25 +00:00			`if (TextUtils.isEmpty(csv)) return "";`
			`final String[] elements = csv.split(",");`
			`if (!containsInArray(key, elements)) return csv;`
Add CollectionUtils class to create generic collection easily Change-Id: I6b4de9187e122298e5e9cd8ddc9070d062df6a89 2012-08-21 07:34:55 +00:00			`final ArrayList<String> result = CollectionUtils.newArrayList(elements.length - 1);`
Change predefined additional subtype format in preference This change also refactor StringUtils class Change-Id: Ie0b4d169b21c260bf238d6fcc9ab0ee8bfd6b508 2012-04-19 07:39:25 +00:00			`for (final String element : elements) {`
			`if (!key.equals(element)) result.add(element);`
			`}`
			`return TextUtils.join(",", result);`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`}`

Set default audio and haptic feedback settings - Default keypress volume is set to 0.2f in resource. - Default keypress vibration duration is set to 10 msec in resource. Bug: 7055329 Change-Id: I83bd6288d171d9787d52e2b02e4e5305f1435681 2012-12-27 05:48:47 +00:00			`/**`
			`* Find a string that start with specified prefix from an array.`
			`*`
			`* @param prefix a prefix string to find.`
			`* @param array an string array to be searched.`
			`* @return the rest part of the string that starts with the prefix.`
			`* Returns null if it couldn't be found.`
			`*/`
			`public static String findPrefixedString(final String prefix, final String[] array) {`
			`for (final String element : array) {`
			`if (element.startsWith(prefix)) {`
			`return element.substring(prefix.length());`
			`}`
			`}`
			`return null;`
			`}`

Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`/**`
			`* Remove duplicates from an array of strings.`
			`*`
Cleanup unused methods Change-Id: Ic7c1ec0e0606da6eb5b954934547c6e8a561fed5 2012-04-06 02:49:48 +00:00			`* This method will always keep the first occurrence of all strings at their position`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`* in the array, removing the subsequent ones.`
			`*/`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static void removeDupes(final ArrayList<String> suggestions) {`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`if (suggestions.size() < 2) return;`
			`int i = 1;`
			`// Don't cache suggestions.size(), since we may be removing items`
			`while (i < suggestions.size()) {`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`final String cur = suggestions.get(i);`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`// Compare each suggestion with each previous suggestion`
			`for (int j = 0; j < i; j++) {`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`final String previous = suggestions.get(j);`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`if (TextUtils.equals(cur, previous)) {`
Kill the StringBuilderPool. The intention may have been nice originally but these end up being copied anyway :/ Let's remove them now, and in a later change, just keep references to the created objects. Change-Id: Ifba8357c20384f9eb40cd916665ed1fc6dc8cab1 2012-03-13 07:52:42 +00:00			`suggestions.remove(i);`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`i--;`
			`break;`
			`}`
			`}`
			`i++;`
			`}`
			`}`
Use keyboardSet extra value of subtype to specify layout type Change-Id: Ice1f345a08a8d760e3b847c885c4072e3e142c97 2012-04-04 05:30:42 +00:00
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`public static String toTitleCase(final String s, final Locale locale) {`
Use keyboardSet extra value of subtype to specify layout type Change-Id: Ice1f345a08a8d760e3b847c885c4072e3e142c97 2012-04-04 05:30:42 +00:00			`if (s.length() <= 1) {`
			`// TODO: is this really correct? Shouldn't this be s.toUpperCase()?`
			`return s;`
			`}`
			`// TODO: fix the bugs below`
			`// - This does not work for Greek, because it returns upper case instead of title case.`
			`// - It does not work for Serbian, because it fails to account for the "lj" character,`
			`// which should be "Lj" in title case and "LJ" in upper case.`
			`// - It does not work for Dutch, because it fails to account for the "ij" digraph, which`
			`// are two different characters but both should be capitalized as "IJ" as if they were`
			`// a single letter.`
			`// - It also does not work with unicode surrogate code points.`
			`return s.toUpperCase(locale).charAt(0) + s.substring(1);`
			`}`
Pass the previous word down to native code in getSuggestions Change-Id: I477b631d81ef58461e44954f3ae5fd895928bb97 2012-04-17 06:55:17 +00:00
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`private static final int[] EMPTY_CODEPOINTS = {};`

Pass the previous word down to native code in getSuggestions Change-Id: I477b631d81ef58461e44954f3ae5fd895928bb97 2012-04-17 06:55:17 +00:00			`public static int[] toCodePointArray(final String string) {`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`final int length = string.length();`
fix IllegalOutOfBoundsException StringUtils.toCodePointArray() had thrown IllegalOutOfBoundsException if passed an empty string. change to just return an empty int[]. Bug: 6188932 Change-Id: Ic41c628c0d407f49fc98cd48cb7ea13d8d5bdd77 2012-06-25 18:17:48 +00:00			`if (length <= 0) {`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`return EMPTY_CODEPOINTS;`
fix IllegalOutOfBoundsException StringUtils.toCodePointArray() had thrown IllegalOutOfBoundsException if passed an empty string. change to just return an empty int[]. Bug: 6188932 Change-Id: Ic41c628c0d407f49fc98cd48cb7ea13d8d5bdd77 2012-06-25 18:17:48 +00:00			`}`
Replace useless CharSequence to String Change-Id: Idc478f901185ee1b4912acc82d0cbc54fee4e991 2012-10-03 06:19:43 +00:00			`final int[] codePoints = new int[string.codePointCount(0, length)];`
			`int destIndex = 0;`
			`for (int index = 0; index < length; index = string.offsetByCodePoints(index, 1)) {`
			`codePoints[destIndex] = string.codePointAt(index);`
			`destIndex++;`
Pass the previous word down to native code in getSuggestions Change-Id: I477b631d81ef58461e44954f3ae5fd895928bb97 2012-04-17 06:55:17 +00:00			`}`
			`return codePoints;`
			`}`
Import TextUtils.getCapsMode to fix it internally (A1) This should have on effect at all on behavior, except an increase in performance. Bug: 4967874 Bug: 6950087 Change-Id: Ie2b51efefe84ca767f5dc8e3b80bfef7e1faab3d 2012-09-10 10:27:45 +00:00
Move parseCsvString to StringUtils Change-Id: I602f33991ca57b6057ec2defe01573552b322857 2013-02-12 07:00:28 +00:00			`public static String[] parseCsvString(final String text) {`
			`final int size = text.length();`
			`if (size == 0) {`
			`return null;`
			`}`
			`if (codePointCount(text) == 1) {`
			`return text.codePointAt(0) == Constants.CSV_SEPARATOR ? null : new String[] { text };`
			`}`

			`ArrayList<String> list = null;`
			`int start = 0;`
			`for (int pos = 0; pos < size; pos++) {`
			`final char c = text.charAt(pos);`
			`if (c == Constants.CSV_SEPARATOR) {`
			`// Skip empty entry.`
			`if (pos - start > 0) {`
			`if (list == null) {`
			`list = CollectionUtils.newArrayList();`
			`}`
			`list.add(text.substring(start, pos));`
			`}`
			`// Skip comma`
			`start = pos + 1;`
			`} else if (c == Constants.CSV_ESCAPE) {`
			`// Skip escape character and escaped character.`
			`pos++;`
			`}`
			`}`
			`final String remain = (size - start > 0) ? text.substring(start) : null;`
			`if (list == null) {`
			`return remain != null ? new String[] { remain } : null;`
			`}`
			`if (remain != null) {`
			`list.add(remain);`
			`}`
			`return list.toArray(new String[list.size()]);`
			`}`
Move a generic string utility to StringUtils Change-Id: I9cc2e9a7ac0b3346af40bcb083f939333336cf09 2013-04-04 12:14:37 +00:00
			`// This method assumes the text is not empty or null.`
			`public static int getCapitalizationType(final String text) {`
			`// If the first char is not uppercase, then the word is either all lower case or`
			`// camel case, and in either case we return CAPITALIZE_NONE.`
			`if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;`
			`final int len = text.length();`
			`int capsCount = 1;`
			`int letterCount = 1;`
			`for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {`
			`if (1 != capsCount && letterCount != capsCount) break;`
			`final int codePoint = text.codePointAt(i);`
			`if (Character.isUpperCase(codePoint)) {`
			`++capsCount;`
			`++letterCount;`
			`} else if (Character.isLetter(codePoint)) {`
			`// We need to discount non-letters since they may not be upper-case, but may`
			`// still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME")`
			`++letterCount;`
			`}`
			`}`
			`// We know the first char is upper case. So we want to test if either every letter other`
			`// than the first is lower case, or if they are all upper case. If the string is exactly`
			`// one char long, then we will arrive here with letterCount 1, and this is correct, too.`
			`if (1 == capsCount) return CAPITALIZE_FIRST;`
			`return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);`
			`}`
Split Utils class to StringUtils, SubtypeUtils, and JniUtils Change-Id: I09e91675fe7d573dad8c933ad513b21d7e409144 2012-03-08 08:07:02 +00:00			`}`