LatinIME/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java

/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.android.inputmethod.latin;

import android.content.ContentResolver;
import android.content.Context;
import android.content.res.AssetFileDescriptor;
import android.content.res.Resources;
import android.database.Cursor;
import android.net.Uri;
import android.text.TextUtils;
import android.util.Log;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

/**
 * Group class for static methods to help with creation and getting of the binary dictionary
 * file from the dictionary provider
 */
public class BinaryDictionaryFileDumper {
    private static final String TAG = BinaryDictionaryFileDumper.class.getSimpleName();
    private static final boolean DEBUG = false;

    /**
     * The size of the temporary buffer to copy files.
     */
    private static final int FILE_READ_BUFFER_SIZE = 1024;
    // TODO: make the following data common with the native code
    private static final byte[] MAGIC_NUMBER = new byte[] { 0x78, (byte)0xB1 };

    private static final String DICTIONARY_PROJECTION[] = { "id" };

    // Prevents this class to be accidentally instantiated.
    private BinaryDictionaryFileDumper() {
    }

    /**
     * Return for a given locale or dictionary id the provider URI to get the dictionary.
     */
    private static Uri getProviderUri(String path) {
        return new Uri.Builder().scheme(ContentResolver.SCHEME_CONTENT)
                .authority(BinaryDictionary.DICTIONARY_PACK_AUTHORITY).appendPath(
                        path).build();
    }

    /**
     * Queries a content provider for the list of word lists for a specific locale
     * available to copy into Latin IME.
     */
    private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
            final Context context) {
        final ContentResolver resolver = context.getContentResolver();
        final Uri dictionaryPackUri = getProviderUri(locale.toString());

        final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null);
        if (null == c) return Collections.<WordListInfo>emptyList();
        if (c.getCount() <= 0 || !c.moveToFirst()) {
            c.close();
            return Collections.<WordListInfo>emptyList();
        }

        try {
            final List<WordListInfo> list = new ArrayList<WordListInfo>();
            do {
                final String wordListId = c.getString(0);
                final String wordListLocale = c.getString(1);
                if (TextUtils.isEmpty(wordListId)) continue;
                list.add(new WordListInfo(wordListId, wordListLocale));
            } while (c.moveToNext());
            c.close();
            return list;
        } catch (Exception e) {
            // Just in case we hit a problem in communication with the dictionary pack.
            // We don't want to die.
            Log.e(TAG, "Exception communicating with the dictionary pack : " + e);
            return Collections.<WordListInfo>emptyList();
        }
    }


    /**
     * Helper method to encapsulate exception handling.
     */
    private static AssetFileDescriptor openAssetFileDescriptor(final ContentResolver resolver,
            final Uri uri) {
        try {
            return resolver.openAssetFileDescriptor(uri, "r");
        } catch (FileNotFoundException e) {
            // I don't want to log the word list URI here for security concerns
            Log.e(TAG, "Could not find a word list from the dictionary provider.");
            return null;
        }
    }

    /**
     * Caches a word list the id of which is passed as an argument. This will write the file
     * to the cache file name designated by its id and locale, overwriting it if already present
     * and creating it (and its containing directory) if necessary.
     */
    private static AssetFileAddress cacheWordList(final String id, final String locale,
            final ContentResolver resolver, final Context context) {

        final int COMPRESSED_CRYPTED_COMPRESSED = 0;
        final int CRYPTED_COMPRESSED = 1;
        final int COMPRESSED_CRYPTED = 2;
        final int COMPRESSED_ONLY = 3;
        final int CRYPTED_ONLY = 4;
        final int NONE = 5;
        final int MODE_MIN = COMPRESSED_CRYPTED_COMPRESSED;
        final int MODE_MAX = NONE;

        final Uri wordListUri = getProviderUri(id);
        final String outputFileName = BinaryDictionaryGetter.getCacheFileName(id, locale, context);

        for (int mode = MODE_MIN; mode <= MODE_MAX; ++mode) {
            InputStream originalSourceStream = null;
            InputStream inputStream = null;
            File outputFile = null;
            FileOutputStream outputStream = null;
            AssetFileDescriptor afd = null;
            try {
                // Open input.
                afd = openAssetFileDescriptor(resolver, wordListUri);
                // If we can't open it at all, don't even try a number of times.
                if (null == afd) return null;
                originalSourceStream = afd.createInputStream();
                // Open output.
                outputFile = new File(outputFileName);
                outputStream = new FileOutputStream(outputFile);
                // Get the appropriate decryption method for this try
                switch (mode) {
                    case COMPRESSED_CRYPTED_COMPRESSED:
                        inputStream = FileTransforms.getUncompressedStream(
                                FileTransforms.getDecryptedStream(
                                        FileTransforms.getUncompressedStream(
                                                originalSourceStream)));
                        break;
                    case CRYPTED_COMPRESSED:
                        inputStream = FileTransforms.getUncompressedStream(
                                FileTransforms.getDecryptedStream(originalSourceStream));
                        break;
                    case COMPRESSED_CRYPTED:
                        inputStream = FileTransforms.getDecryptedStream(
                                FileTransforms.getUncompressedStream(originalSourceStream));
                        break;
                    case COMPRESSED_ONLY:
                        inputStream = FileTransforms.getUncompressedStream(originalSourceStream);
                        break;
                    case CRYPTED_ONLY:
                        inputStream = FileTransforms.getDecryptedStream(originalSourceStream);
                        break;
                    case NONE:
                        inputStream = originalSourceStream;
                        break;
                    }
                checkMagicAndCopyFileTo(new BufferedInputStream(inputStream), outputStream);
                if (0 >= resolver.delete(wordListUri, null, null)) {
                    Log.e(TAG, "Could not have the dictionary pack delete a word list");
                }
                // Success! Close files (through the finally{} clause) and return.
                return AssetFileAddress.makeFromFileName(outputFileName);
            } catch (Exception e) {
                if (DEBUG) {
                    Log.i(TAG, "Can't open word list in mode " + mode + " : " + e);
                }
                if (null != outputFile) {
                    // This may or may not fail. The file may not have been created if the
                    // exception was thrown before it could be. Hence, both failure and
                    // success are expected outcomes, so we don't check the return value.
                    outputFile.delete();
                }
                // Try the next method.
            } finally {
                // Ignore exceptions while closing files.
                try {
                    // afd.close() will close inputStream, we should not call inputStream.close().
                    if (null != afd) afd.close();
                } catch (Exception e) {
                    Log.e(TAG, "Exception while closing a cross-process file descriptor : " + e);
                }
                try {
                    if (null != outputStream) outputStream.close();
                } catch (Exception e) {
                    Log.e(TAG, "Exception while closing a file : " + e);
                }
            }
        }

        // We could not copy the file at all. This is very unexpected.
        // I'd rather not print the word list ID to the log out of security concerns
        Log.e(TAG, "Could not copy a word list. Will not be able to use it.");
        // If we can't copy it we should probably delete it to avoid trying to copy it over
        // and over each time we open LatinIME.
        if (0 >= resolver.delete(wordListUri, null, null)) {
            Log.e(TAG, "In addition, we were unable to delete it.");
        }
        return null;
    }

    /**
     * Queries a content provider for word list data for some locale and cache the returned files
     *
     * This will query a content provider for word list data for a given locale, and copy the
     * files locally so that they can be mmap'ed. This may overwrite previously cached word lists
     * with newer versions if a newer version is made available by the content provider.
     * @returns the addresses of the word list files, or null if no data could be obtained.
     * @throw FileNotFoundException if the provider returns non-existent data.
     * @throw IOException if the provider-returned data could not be read.
     */
    public static List<AssetFileAddress> cacheWordListsFromContentProvider(final Locale locale,
            final Context context) {
        final ContentResolver resolver = context.getContentResolver();
        final List<WordListInfo> idList = getWordListWordListInfos(locale, context);
        final List<AssetFileAddress> fileAddressList = new ArrayList<AssetFileAddress>();
        for (WordListInfo id : idList) {
            final AssetFileAddress afd = cacheWordList(id.mId, id.mLocale, resolver, context);
            if (null != afd) {
                fileAddressList.add(afd);
            }
        }
        return fileAddressList;
    }

    /**
     * Copies the data in an input stream to a target file if the magic number matches.
     *
     * If the magic number does not match the expected value, this method throws an
     * IOException. Other usual conditions for IOException or FileNotFoundException
     * also apply.
     *
     * @param input the stream to be copied.
     * @param outputFile an outputstream to copy the data to.
     */
    private static void checkMagicAndCopyFileTo(final BufferedInputStream input,
            final FileOutputStream output) throws FileNotFoundException, IOException {
        // Check the magic number
        final byte[] magicNumberBuffer = new byte[MAGIC_NUMBER.length];
        final int readMagicNumberSize = input.read(magicNumberBuffer, 0, MAGIC_NUMBER.length);
        if (readMagicNumberSize < MAGIC_NUMBER.length) {
            throw new IOException("Less bytes to read than the magic number length");
        }
        if (!Arrays.equals(MAGIC_NUMBER, magicNumberBuffer)) {
            throw new IOException("Wrong magic number for downloaded file");
        }
        output.write(MAGIC_NUMBER);

        // Actually copy the file
        final byte[] buffer = new byte[FILE_READ_BUFFER_SIZE];
        for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer))
            output.write(buffer, 0, readBytes);
        input.close();
    }
}