null+****@clear*****
null+****@clear*****
2010年 7月 28日 (水) 12:37:41 JST
Kouhei Sutou 2010-07-28 03:37:41 +0000 (Wed, 28 Jul 2010) New Revision: d74b93551351d6d7a4b6745bfa1524a8b6f94947 Log: improve sort accuracy with geo index. This change complements sort target points detected by near cursor with prefix cursors for arround mesh. Modified files: lib/db.c Modified: lib/db.c (+213 -43) =================================================================== --- lib/db.c 2010-08-05 01:36:13 +0000 (c90a243) +++ lib/db.c 2010-07-28 03:37:41 +0000 (c2153d4) @@ -6516,6 +6516,198 @@ typedef struct { double d; } geo_entry; +static void +grn_table_sort_geo_detect_far_point(grn_ctx *ctx, grn_obj *table, grn_obj *index, + grn_pat *pat, + grn_pat_cursor *pc, int n, int accessorp, + grn_geo_point *base_point, + int *lng_far, int *lat_far, double *d_far) +{ + int i = 0; + grn_id tid; + double lng1, lat1, lng2, lat2, x, y, d; + + *lng_far = base_point->longitude; + *lat_far = base_point->latitude; + lng1 = GEO_INT2RAD(*lng_far); + lat1 = GEO_INT2RAD(*lat_far); + *d_far = 0.0; + while (i < n && (tid = grn_pat_cursor_next(ctx, pc))) { + grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0); + if (ic) { + grn_geo_point pos; + grn_ii_posting *posting; + grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point)); + lng2 = GEO_INT2RAD(pos.longitude); + lat2 = GEO_INT2RAD(pos.latitude); + x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5); + y = (lat2 - lat1); + d = ((x * x) + (y * y)); + if (d > *d_far) { + *lng_far = pos.longitude; + *lat_far = pos.latitude; + *d_far = d; + } + while (i < n && (posting = grn_ii_cursor_next(ctx, ic))) { + grn_id rid = accessorp + ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id)) + : posting->rid; + if (rid) { + i++; + } + } + grn_ii_cursor_close(ctx, ic); + } + } +} + +static void +grn_table_sort_geo_detect_mesh(grn_ctx *ctx, grn_obj *table, + grn_geo_point *base_point, + int lng_far, int lat_far, double d_far, + grn_geo_point *geo_min, grn_geo_point *geo_max, + int *diff_byte, int *diff_bit) +{ + int i; + int diff_bit_mask = 0; + uint8_t geo_key_base[4], geo_key_far[4], geo_key_min[4], geo_key_max[4]; + grn_geo_point point; + + grn_gton(geo_key_base, base_point, 4); + point.latitude = lat_far; + point.longitude = lng_far; + grn_gton(geo_key_far, &point, 4); + for (i = 0; i < 4; i++) { + if (((geo_key_base[i] >> 6) & 3) != ((geo_key_far[i] >> 6) & 3)) { + *diff_bit = 0; + diff_bit_mask = 0xff >> 2; + break; + } else if (((geo_key_base[i] >> 4) & 3) != ((geo_key_far[i] >> 4) & 3)) { + *diff_bit = 2; + diff_bit_mask = 0xff >> 4; + break; + } else if (((geo_key_base[i] >> 2) & 3) != ((geo_key_far[i] >> 2) & 3)) { + *diff_bit = 4; + diff_bit_mask = 0xff >> 6; + break; + } else if (((geo_key_base[i] >> 0) & 3) != ((geo_key_far[i] >> 0) & 3)) { + *diff_bit = 6; + diff_bit_mask = 0xff >> 8; + break; + } + } + *diff_byte = i; + memcpy(geo_key_min, geo_key_base, i + 1); + geo_key_min[i] &= ~diff_bit_mask; + memset(geo_key_min + i + 1, 0, 4 - i - 1); + memcpy(geo_key_max, geo_key_base, i + 1); + geo_key_max[i] |= diff_bit_mask; + memset(geo_key_max + i + 1, 0xff, 4 - i - 1); + grn_ntog((uint8_t *)geo_min, geo_key_min, 4); + grn_ntog((uint8_t *)geo_max, geo_key_max, 4); +} + +static void +grn_table_sort_geo_collect_points(grn_ctx *ctx, grn_obj *table, grn_obj *index, + grn_pat *pat, + int n, int accessorp, + grn_geo_point *base_point, + int lng_far, int lat_far, double d_far, + geo_entry **entries, int *n_entries) +{ + int e; + uint8_t geos[4][4]; + grn_geo_point geo_min, geo_max, geo_base, keys[4]; + int diff_byte = 0, diff_bit = 0, key_size; + int lat_diff, lng_diff; + double lng1, lat1; + geo_entry *ep, *p; + + grn_table_sort_geo_detect_mesh(ctx, table, base_point, lng_far, lat_far, d_far, + &geo_min, &geo_max, &diff_byte, &diff_bit); + key_size = diff_byte * 8 + diff_bit; + lat1 = base_point->latitude; + lng1 = base_point->longitude; + lat_diff = geo_max.latitude - geo_min.latitude; + lng_diff = geo_max.longitude - geo_min.longitude; + if (lat1 >= geo_min.latitude + lat_diff / 2) { + geo_base.latitude = geo_max.latitude; + if (lng1 >= geo_max.longitude + lng_diff / 2) { + geo_base.longitude = geo_max.longitude; + } else { + geo_base.longitude = geo_min.longitude; + } + } else { + geo_base.latitude = geo_min.latitude; + if (lng1 >= geo_max.longitude + lng_diff / 2) { + geo_base.longitude = geo_max.longitude; + } else { + geo_base.longitude = geo_min.longitude; + } + } + keys[0].latitude = geo_base.latitude + lat_diff; + keys[0].longitude = geo_base.longitude + lng_diff; + keys[1].latitude = geo_base.latitude + lat_diff; + keys[1].longitude = geo_base.longitude - lng_diff; + keys[2].latitude = geo_base.latitude - lat_diff; + keys[2].longitude = geo_base.longitude - lng_diff; + keys[3].latitude = geo_base.latitude - lat_diff; + keys[3].longitude = geo_base.longitude + lng_diff; + grn_gton(geos[0], keys + 0, 4); + grn_gton(geos[1], keys + 1, 4); + grn_gton(geos[2], keys + 2, 4); + grn_gton(geos[3], keys + 3, 4); + + e = 0; + if ((ep = *entries = GRN_MALLOC(sizeof(geo_entry) * (n + 1)))) { + int i; + grn_id tid; + double lng2, lat2, x, y, d; + for (i = 0; i < 4; i++) { + grn_pat_cursor *pc = grn_pat_cursor_open(ctx, pat, + keys + i, key_size, + NULL, 0, + 0, -1, + GRN_CURSOR_PREFIX|GRN_CURSOR_SIZE_BY_BIT); + if (pc) { + while ((tid = grn_pat_cursor_next(ctx, pc))) { + grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0); + if (ic) { + grn_geo_point pos; + grn_ii_posting *posting; + grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point)); + lng2 = GEO_INT2RAD(pos.longitude); + lat2 = GEO_INT2RAD(pos.latitude); + x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5); + y = (lat2 - lat1); + d = ((x * x) + (y * y)); + while ((posting = grn_ii_cursor_next(ctx, ic))) { + grn_id rid = accessorp + ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id)) + : posting->rid; + if (rid) { + for (p = ep; *entries < p && p[-1].d > d; p--) { + p->id = p[-1].id; + p->d = p[-1].d; + } + p->id = rid; + p->d = d; + if (e < n) { + ep++; + e++; + } + } + } + grn_ii_cursor_close(ctx, ic); + } + } + grn_pat_cursor_close(ctx, pc); + } + } + } + *n_entries = e; +} + static int grn_table_sort_geo(grn_ctx *ctx, grn_obj *table, int offset, int limit, grn_obj *result, grn_table_sort_key *keys, int n_keys) @@ -6548,50 +6740,28 @@ grn_table_sort_geo(grn_ctx *ctx, grn_obj *table, int offset, int limit, } } } else { - double lng1, lat1, lng2, lat2, x, y, d; - geo_entry *array, *ep, *p; - int n = e << 4; - if ((ep = array = GRN_MALLOC(sizeof(geo_entry) * n))) { - lng1 = GEO_INT2RAD(((grn_geo_point *)GRN_BULK_HEAD(arg))->longitude); - lat1 = GEO_INT2RAD(((grn_geo_point *)GRN_BULK_HEAD(arg))->latitude); - while (i < n && (tid = grn_pat_cursor_next(ctx, pc))) { - grn_ii_cursor *ic = grn_ii_cursor_open(ctx, (grn_ii *)index, tid, 0, 0, 1, 0); - if (ic) { - grn_geo_point pos; - grn_ii_posting *posting; - grn_pat_get_key(ctx, pat, tid, &pos, sizeof(grn_geo_point)); - lng2 = GEO_INT2RAD(pos.longitude); - lat2 = GEO_INT2RAD(pos.latitude); - x = (lng2 - lng1) * cos((lat1 + lat2) * 0.5); - y = (lat2 - lat1); - d = ((x * x) + (y * y)); - while (i < n && (posting = grn_ii_cursor_next(ctx, ic))) { - grn_id rid = accessorp - ? grn_table_get(ctx, table, &posting->rid, sizeof(grn_id)) - : posting->rid; - if (rid) { - for (p = ep; array < p && p[-1].d > d; p--) { - p->id = p[-1].id; - p->d = p[-1].d; - } - p->id = rid; - p->d = d; - ep++; - i++; - } - } - grn_ii_cursor_close(ctx, ic); - } - } - /* sort */ - { - grn_id *v; - for (i = 0, ep = array + offset; i < limit && ep < array + n; i++, ep++) { - if (!grn_array_add(ctx, (grn_array *)result, (void **)&v)) { break; } - *v = ep->id; - } - GRN_FREE(array); + int lng_far, lat_far; + double d_far; + grn_geo_point *base_point; + geo_entry *array = NULL, *ep; + int n = 0; + base_point = (grn_geo_point *)GRN_BULK_HEAD(arg); + grn_table_sort_geo_detect_far_point(ctx, table, index, pat, + pc, e, accessorp, + base_point, + &lat_far, &lng_far, &d_far); + grn_table_sort_geo_collect_points(ctx, table, index, pat, + e, accessorp, + base_point, + lat_far, lng_far, d_far, + &array, &n); + if (array) { + grn_id *v; + for (i = 0, ep = array + offset; i < limit && ep < array + n; i++, ep++) { + if (!grn_array_add(ctx, (grn_array *)result, (void **)&v)) { break; } + *v = ep->id; } + GRN_FREE(array); } } grn_pat_cursor_close(ctx, pc);