Add affinity model for contact names.

This allows us to:
1. Rank contacts and only add the top N names to the keyboard LM.
2. Avoid adding duplicate names.

Note: The affinity calcualuation is limited by the fact that some apps
currently do not update the TIMES_CONTACTED counter. To better handle
this case, the new measure also takes into account whether or not a
name is in the visible contacts group.

Bug: 20053274
Change-Id: I2741cb8958667d4a294aba8c437a45cec4b42dc7
This commit is contained in:
Tom Ouyang 2015-04-13 17:49:17 -07:00
parent 40f0f61bb3
commit 44a175732d
4 changed files with 152 additions and 21 deletions

View file

@ -84,10 +84,10 @@ public class ContactsContentObserver implements Runnable {
boolean haveContentsChanged() {
final long startTime = SystemClock.uptimeMillis();
final int contactCount = mManager.getContactCount();
if (contactCount > ContactsDictionaryConstants.MAX_CONTACT_COUNT) {
if (contactCount > ContactsDictionaryConstants.MAX_CONTACTS_PROVIDER_QUERY_LIMIT) {
// If there are too many contacts then return false. In this rare case it is impossible
// to include all of them anyways and the cost of rebuilding the dictionary is too high.
// TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
// TODO: Sort and check only the most recent contacts?
return false;
}
if (contactCount != mManager.getContactCountAtLastRebuild()) {

View file

@ -26,7 +26,8 @@ public class ContactsDictionaryConstants {
/**
* Projections for {@link Contacts.CONTENT_URI}
*/
public static final String[] PROJECTION = { BaseColumns._ID, Contacts.DISPLAY_NAME };
public static final String[] PROJECTION = { BaseColumns._ID, Contacts.DISPLAY_NAME,
Contacts.TIMES_CONTACTED, Contacts.LAST_TIME_CONTACTED, Contacts.IN_VISIBLE_GROUP };
public static final String[] PROJECTION_ID_ONLY = { BaseColumns._ID };
/**
@ -36,13 +37,16 @@ public class ContactsDictionaryConstants {
public static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
/**
* The maximum number of contacts that this dictionary supports.
* Do not attempt to query contacts if there are more than this many entries.
*/
public static final int MAX_CONTACT_COUNT = 10000;
public static final int MAX_CONTACTS_PROVIDER_QUERY_LIMIT = 10000;
/**
* Index of the column for 'name' in content providers:
* Contacts & ContactsContract.Profile.
*/
public static final int NAME_INDEX = 1;
public static final int TIMES_CONTACTED_INDEX = 2;
public static final int LAST_TIME_CONTACTED_INDEX = 3;
public static final int IN_VISIBLE_GROUP_INDEX = 4;
}

View file

@ -21,11 +21,17 @@ import android.database.Cursor;
import android.database.sqlite.SQLiteException;
import android.net.Uri;
import android.provider.ContactsContract.Contacts;
import android.text.TextUtils;
import android.util.Log;
import com.android.inputmethod.latin.common.Constants;
import com.android.inputmethod.latin.common.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
@ -37,6 +43,63 @@ import java.util.concurrent.atomic.AtomicInteger;
public class ContactsManager {
private static final String TAG = "ContactsManager";
/**
* Use at most this many of the highest affinity contacts.
*/
public static final int MAX_CONTACT_NAMES = 200;
protected static class RankedContact {
public final String mName;
public final long mLastContactedTime;
public final int mTimesContacted;
public final boolean mInVisibleGroup;
private float mAffinity = 0.0f;
RankedContact(final Cursor cursor) {
mName = cursor.getString(
ContactsDictionaryConstants.NAME_INDEX);
mTimesContacted = cursor.getInt(
ContactsDictionaryConstants.TIMES_CONTACTED_INDEX);
mLastContactedTime = cursor.getLong(
ContactsDictionaryConstants.LAST_TIME_CONTACTED_INDEX);
mInVisibleGroup = cursor.getInt(
ContactsDictionaryConstants.IN_VISIBLE_GROUP_INDEX) == 1;
}
float getAffinity() {
return mAffinity;
}
/**
* Calculates the affinity with the contact based on:
* - How many times it has been contacted
* - How long since the last contact.
* - Whether the contact is in the visible group (i.e., Contacts list).
*
* Note: This affinity is limited by the fact that some apps currently do not update the
* LAST_TIME_CONTACTED or TIMES_CONTACTED counters. As a result, a frequently messaged
* contact may still have 0 affinity.
*/
void computeAffinity(final int maxTimesContacted, final long currentTime) {
final float timesWeight = ((float) mTimesContacted + 1) / (maxTimesContacted + 1);
final long timeSinceLastContact = Math.min(
Math.max(0, currentTime - mLastContactedTime),
TimeUnit.MILLISECONDS.convert(180, TimeUnit.DAYS));
final float lastTimeWeight = (float) Math.pow(0.5,
timeSinceLastContact / (TimeUnit.MILLISECONDS.convert(10, TimeUnit.DAYS)));
final float visibleWeight = mInVisibleGroup ? 1.0f : 0.0f;
mAffinity = (timesWeight + lastTimeWeight + visibleWeight) / 3;
}
}
private static class AffinityComparator implements Comparator<RankedContact> {
@Override
public int compare(RankedContact contact1, RankedContact contact2) {
return Float.compare(contact2.getAffinity(), contact1.getAffinity());
}
}
/**
* Interface to implement for classes interested in getting notified for updates
* to Contacts content provider.
@ -82,14 +145,18 @@ public class ContactsManager {
* Returns all the valid names in the Contacts DB. Callers should also
* call {@link #updateLocalState(ArrayList)} after they are done with result
* so that the manager can cache local state for determining updates.
*
* These names are sorted by their affinity to the user, with favorite
* contacts appearing first.
*/
public ArrayList<String> getValidNames(final Uri uri) {
final ArrayList<String> names = new ArrayList<>();
// Check all contacts since it's not possible to find out which names have changed.
// This is needed because it's possible to receive extraneous onChange events even when no
// name has changed.
final Cursor cursor = mContext.getContentResolver().query(uri,
ContactsDictionaryConstants.PROJECTION, null, null, null);
final ArrayList<RankedContact> contacts = new ArrayList<>();
int maxTimesContacted = 0;
if (cursor != null) {
try {
if (cursor.moveToFirst()) {
@ -97,7 +164,12 @@ public class ContactsManager {
final String name = cursor.getString(
ContactsDictionaryConstants.NAME_INDEX);
if (isValidName(name)) {
names.add(name);
final int timesContacted = cursor.getInt(
ContactsDictionaryConstants.TIMES_CONTACTED_INDEX);
if (timesContacted > maxTimesContacted) {
maxTimesContacted = timesContacted;
}
contacts.add(new RankedContact(cursor));
}
cursor.moveToNext();
}
@ -106,7 +178,16 @@ public class ContactsManager {
cursor.close();
}
}
return names;
final long currentTime = System.currentTimeMillis();
for (RankedContact contact : contacts) {
contact.computeAffinity(maxTimesContacted, currentTime);
}
Collections.sort(contacts, new AffinityComparator());
final HashSet<String> names = new HashSet<>();
for (int i = 0; i < contacts.size() && names.size() < MAX_CONTACT_NAMES; ++i) {
names.add(contacts.get(i).mName);
}
return new ArrayList<>(names);
}
/**
@ -134,10 +215,16 @@ public class ContactsManager {
}
private static boolean isValidName(final String name) {
if (name != null && -1 == name.indexOf(Constants.CODE_COMMERCIAL_AT)) {
return true;
if (TextUtils.isEmpty(name) || name.indexOf(Constants.CODE_COMMERCIAL_AT) != -1) {
return false;
}
return false;
final boolean hasSpace = name.indexOf(Constants.CODE_SPACE) != -1;
if (!hasSpace) {
// Only allow an isolated word if it does not contain a hyphen.
// This helps to filter out mailing lists.
return name.indexOf(Constants.CODE_DASH) == -1;
}
return true;
}
/**

View file

@ -29,11 +29,15 @@ import android.test.mock.MockContentProvider;
import android.test.mock.MockContentResolver;
import android.test.suitebuilder.annotation.SmallTest;
import com.android.inputmethod.latin.ContactsDictionaryConstants;
import com.android.inputmethod.latin.ContactsManager;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
/**
* Tests for {@link ContactsManager}
@ -63,12 +67,13 @@ public class ContactsManagerTest extends AndroidTestCase {
@Test
public void testGetValidNames() {
final String contactName1 = "firstname lastname";
final String contactName1 = "firstname last-name";
final String contactName2 = "larry";
mMatrixCursor.addRow(new Object[] { 1, contactName1 });
mMatrixCursor.addRow(new Object[] { 2, null /* null name */ });
mMatrixCursor.addRow(new Object[] { 3, contactName2 });
mMatrixCursor.addRow(new Object[] { 4, "floopy@example.com" /* invalid name */ });
mMatrixCursor.addRow(new Object[] { 1, contactName1, 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 2, null /* null name */, 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 3, contactName2, 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 4, "floopy@example.com" /* invalid name */, 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 5, "news-group" /* invalid name */, 0, 0, 0 });
mFakeContactsContentProvider.addQueryResult(Contacts.CONTENT_URI, mMatrixCursor);
final ArrayList<String> validNames = mManager.getValidNames(Contacts.CONTENT_URI);
@ -78,13 +83,48 @@ public class ContactsManagerTest extends AndroidTestCase {
}
@Test
public void testGetCount() {
mMatrixCursor.addRow(new Object[] { 1, "firstname" });
mMatrixCursor.addRow(new Object[] { 2, null /* null name */ });
mMatrixCursor.addRow(new Object[] { 3, "larry" });
mMatrixCursor.addRow(new Object[] { 4, "floopy@example.com" /* invalid name */ });
public void testGetValidNamesAffinity() {
final long now = System.currentTimeMillis();
final long month_ago = now - TimeUnit.MILLISECONDS.convert(31, TimeUnit.DAYS);
for (int i = 0; i < ContactsManager.MAX_CONTACT_NAMES + 10; ++i) {
mMatrixCursor.addRow(new Object[] { i, "name" + i, i, now, 1 });
}
mFakeContactsContentProvider.addQueryResult(Contacts.CONTENT_URI, mMatrixCursor);
final ArrayList<String> validNames = mManager.getValidNames(Contacts.CONTENT_URI);
assertEquals(ContactsManager.MAX_CONTACT_NAMES, validNames.size());
for (int i = 0; i < 10; ++i) {
assertFalse(validNames.contains("name" + i));
}
for (int i = 10; i < ContactsManager.MAX_CONTACT_NAMES + 10; ++i) {
assertTrue(validNames.contains("name" + i));
}
}
@Test
public void testComputeAffinity() {
final long now = System.currentTimeMillis();
final long month_ago = now - TimeUnit.MILLISECONDS.convert(31, TimeUnit.DAYS);
mMatrixCursor.addRow(new Object[] { 1, "name", 1, month_ago, 1 });
mFakeContactsContentProvider.addQueryResult(Contacts.CONTENT_URI, mMatrixCursor);
Cursor cursor = mFakeContactsContentProvider.query(Contacts.CONTENT_URI,
ContactsDictionaryConstants.PROJECTION_ID_ONLY, null, null, null);
cursor.moveToFirst();
ContactsManager.RankedContact contact = new ContactsManager.RankedContact(cursor);
contact.computeAffinity(1, month_ago);
assertEquals(contact.getAffinity(), 1.0f);
contact.computeAffinity(2, now);
assertEquals(contact.getAffinity(), (2.0f/3.0f + (float)Math.pow(0.5, 3) + 1.0f) / 3);
}
@Test
public void testGetCount() {
mMatrixCursor.addRow(new Object[] { 1, "firstname", 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 2, null /* null name */, 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 3, "larry", 0, 0, 0 });
mMatrixCursor.addRow(new Object[] { 4, "floopy@example.com" /* invalid name */, 0, 0, 0 });
mFakeContactsContentProvider.addQueryResult(Contacts.CONTENT_URI, mMatrixCursor);
assertEquals(4, mManager.getContactCount());
}