Kouhei Sutou
null+****@clear*****
Fri May 6 16:02:00 JST 2016
Kouhei Sutou 2016-05-06 16:02:00 +0900 (Fri, 06 May 2016) New Revision: 8a814ad16f4b05bffd0616df3fc7fb354e3add4e https://github.com/groonga/groonga/commit/8a814ad16f4b05bffd0616df3fc7fb354e3add4e Message: select: improve performance for range search and enough filtered case This may improve performance when a table has many records and two conditions such as --filter 'content @ "keyword" && price < 1000'. If 'content @ "keyword"' filters many records, 'price < 1000' may be evaluated as sequential search instead of index search. Sequential search is faster than index search when the number of remained records is less. It's disabled by default. You can enable it by defining GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO=0.1 environment variable. 0.1 means that "the number of remained records by 'content @ "keyword"'" / "the number of original records" must be 10% or less to use this optimization. If "the number of remained records by 'content @ "keyword"'" is greater than 1000, this optimization isn't used. You can custom the threshold by defining GRN_TABLE_SELECT_MAX_N_ENOUGH_FILTERED_RECORDS=5000 environment variable. In the case, the optimization may be used when "the number of remained records by 'content @ "keyword"'" is equal to or less than than 5000. This optimization is used when both of the two conditions are satisfied. Added files: test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test Modified files: lib/ctx.c lib/expr.c lib/grn_expr.h Modified: lib/ctx.c (+2 -0) =================================================================== --- lib/ctx.c 2016-05-06 16:00:24 +0900 (c5550ab) +++ lib/ctx.c 2016-05-06 16:02:00 +0900 (4b24a69) @@ -34,6 +34,7 @@ #include "grn_ctx_impl_mrb.h" #include "grn_logger.h" #include "grn_cache.h" +#include "grn_expr.h" #include <stdio.h> #include <stdarg.h> #include <time.h> @@ -89,6 +90,7 @@ grn_init_from_env(void) grn_io_init_from_env(); grn_ii_init_from_env(); grn_db_init_from_env(); + grn_expr_init_from_env(); grn_index_column_init_from_env(); grn_proc_init_from_env(); grn_plugin_init_from_env(); Modified: lib/expr.c (+100 -0) =================================================================== --- lib/expr.c 2016-05-06 16:00:24 +0900 (c8085a2) +++ lib/expr.c 2016-05-06 16:02:00 +0900 (bb0d9b2) @@ -39,6 +39,35 @@ # include <oniguruma.h> #endif +static double grn_table_select_enough_filtered_ratio = 0.0; +static int grn_table_select_max_n_enough_filtered_records = 1000; + +void +grn_expr_init_from_env(void) +{ + { + char grn_table_select_enough_filtered_ratio_env[GRN_ENV_BUFFER_SIZE]; + grn_getenv("GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO", + grn_table_select_enough_filtered_ratio_env, + GRN_ENV_BUFFER_SIZE); + if (grn_table_select_enough_filtered_ratio_env[0]) { + grn_table_select_enough_filtered_ratio = + atof(grn_table_select_enough_filtered_ratio_env); + } + } + + { + char grn_table_select_max_n_enough_filtered_records_env[GRN_ENV_BUFFER_SIZE]; + grn_getenv("GRN_TABLE_SELECT_MAX_N_ENOUGH_FILTERED_RECORDS", + grn_table_select_max_n_enough_filtered_records_env, + GRN_ENV_BUFFER_SIZE); + if (grn_table_select_max_n_enough_filtered_records_env[0]) { + grn_table_select_max_n_enough_filtered_records = + atoi(grn_table_select_max_n_enough_filtered_records_env); + } + } +} + grn_obj * grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags) { @@ -5765,6 +5794,66 @@ grn_table_select_index_report(grn_ctx *ctx, const char *tag, grn_obj *index) grn_report_index(ctx, "[table][select]", tag, index); } +static inline void +grn_table_select_index_not_used_report(grn_ctx *ctx, + const char *tag, + grn_obj *index, + const char *reason) +{ + grn_report_index_not_used(ctx, "[table][select]", tag, index, reason); +} + +static inline grn_bool +grn_table_select_index_use_sequential_search(grn_ctx *ctx, + grn_obj *table, + grn_obj *res, + grn_operator logical_op, + const char *tag, + grn_obj *index) +{ + int n_records; + int n_filtered_records; + double filtered_ratio; + grn_obj reason; + + if (logical_op != GRN_OP_AND) { + return GRN_FALSE; + } + + n_records = grn_table_size(ctx, table); + n_filtered_records = grn_table_size(ctx, res); + if (n_records == 0) { + filtered_ratio = 1.0; + } else { + filtered_ratio = (double)n_filtered_records / (double)n_records; + } + + if (filtered_ratio >= grn_table_select_enough_filtered_ratio) { + return GRN_FALSE; + } + + if (n_filtered_records > grn_table_select_max_n_enough_filtered_records) { + return GRN_FALSE; + } + + GRN_TEXT_INIT(&reason, 0); + grn_text_printf(ctx, &reason, + "enough filtered: %.2f%%(%d/%d) < %.2f%% && %d <= %d", + filtered_ratio * 100, + n_filtered_records, + n_records, + grn_table_select_enough_filtered_ratio * 100, + n_filtered_records, + grn_table_select_max_n_enough_filtered_records); + GRN_TEXT_PUTC(ctx, &reason, '\0'); + grn_table_select_index_not_used_report(ctx, + tag, + index, + GRN_TEXT_VALUE(&reason)); + GRN_OBJ_FIN(ctx, &reason); + return GRN_TRUE; +} + static inline grn_bool grn_table_select_index_equal(grn_ctx *ctx, grn_obj *table, @@ -6002,6 +6091,7 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table, scan_info *si, grn_operator logical_op, grn_obj *res) { + const char *tag = "[range]"; grn_bool processed = GRN_FALSE; grn_obj *index_table; grn_obj range; @@ -6011,6 +6101,16 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table, return GRN_FALSE; } + if (grn_table_select_index_use_sequential_search(ctx, + table, + res, + logical_op, + tag, + index_table)) { + grn_obj_unlink(ctx, index_table); + return GRN_FALSE; + } + GRN_OBJ_INIT(&range, GRN_BULK, 0, index_table->header.domain); if (grn_obj_cast(ctx, si->query, &range, GRN_FALSE) == GRN_SUCCESS) { grn_table_cursor *cursor; Modified: lib/grn_expr.h (+2 -0) =================================================================== --- lib/grn_expr.h 2016-05-06 16:00:24 +0900 (76633a9) +++ lib/grn_expr.h 2016-05-06 16:02:00 +0900 (c20821f) @@ -40,6 +40,8 @@ typedef enum { typedef struct _grn_scan_info scan_info; typedef grn_bool (*grn_scan_info_each_arg_callback)(grn_ctx *ctx, grn_obj *obj, void *user_data); +void grn_expr_init_from_env(void); + scan_info **grn_scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, grn_operator op, grn_bool record_exist); Added: test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected (+166 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/index/range/optimization/use_sequetial_search.expected 2016-05-06 16:02:00 +0900 (01157b8) @@ -0,0 +1,166 @@ +table_create Users TABLE_HASH_KEY ShortText +[[0,0.0,0.0],true] +column_create Users age COLUMN_SCALAR Int32 +[[0,0.0,0.0],true] +table_create Ages TABLE_PAT_KEY Int32 +[[0,0.0,0.0],true] +column_create Ages users_age COLUMN_INDEX Users age +[[0,0.0,0.0],true] +load --table Users +[ +{"_key": "user00", "age": 0}, +{"_key": "user01", "age": 1}, +{"_key": "user02", "age": 2}, +{"_key": "user03", "age": 3}, +{"_key": "user04", "age": 4}, +{"_key": "user05", "age": 5}, +{"_key": "user06", "age": 6}, +{"_key": "user07", "age": 7}, +{"_key": "user08", "age": 8}, +{"_key": "user09", "age": 9}, +{"_key": "user10", "age": 10}, +{"_key": "user11", "age": 11}, +{"_key": "user12", "age": 12}, +{"_key": "user13", "age": 13}, +{"_key": "user14", "age": 14}, +{"_key": "user15", "age": 15}, +{"_key": "user16", "age": 16}, +{"_key": "user17", "age": 17}, +{"_key": "user18", "age": 18}, +{"_key": "user19", "age": 19}, +{"_key": "user20", "age": 20}, +{"_key": "user21", "age": 21}, +{"_key": "user22", "age": 22}, +{"_key": "user23", "age": 23}, +{"_key": "user24", "age": 24}, +{"_key": "user25", "age": 25}, +{"_key": "user26", "age": 26}, +{"_key": "user27", "age": 27}, +{"_key": "user28", "age": 28}, +{"_key": "user29", "age": 29}, +{"_key": "user30", "age": 30}, +{"_key": "user31", "age": 31}, +{"_key": "user32", "age": 32}, +{"_key": "user33", "age": 33}, +{"_key": "user34", "age": 34}, +{"_key": "user35", "age": 35}, +{"_key": "user36", "age": 36}, +{"_key": "user37", "age": 37}, +{"_key": "user38", "age": 38}, +{"_key": "user39", "age": 39}, +{"_key": "user40", "age": 40}, +{"_key": "user41", "age": 41}, +{"_key": "user42", "age": 42}, +{"_key": "user43", "age": 43}, +{"_key": "user44", "age": 44}, +{"_key": "user45", "age": 45}, +{"_key": "user46", "age": 46}, +{"_key": "user47", "age": 47}, +{"_key": "user48", "age": 48}, +{"_key": "user49", "age": 49}, +{"_key": "user50", "age": 50}, +{"_key": "user51", "age": 51}, +{"_key": "user52", "age": 52}, +{"_key": "user53", "age": 53}, +{"_key": "user54", "age": 54}, +{"_key": "user55", "age": 55}, +{"_key": "user56", "age": 56}, +{"_key": "user57", "age": 57}, +{"_key": "user58", "age": 58}, +{"_key": "user59", "age": 59}, +{"_key": "user60", "age": 60}, +{"_key": "user61", "age": 61}, +{"_key": "user62", "age": 62}, +{"_key": "user63", "age": 63}, +{"_key": "user64", "age": 64}, +{"_key": "user65", "age": 65}, +{"_key": "user66", "age": 66}, +{"_key": "user67", "age": 67}, +{"_key": "user68", "age": 68}, +{"_key": "user69", "age": 69}, +{"_key": "user70", "age": 70}, +{"_key": "user71", "age": 71}, +{"_key": "user72", "age": 72}, +{"_key": "user73", "age": 73}, +{"_key": "user74", "age": 74}, +{"_key": "user75", "age": 75}, +{"_key": "user76", "age": 76}, +{"_key": "user77", "age": 77}, +{"_key": "user78", "age": 78}, +{"_key": "user79", "age": 79}, +{"_key": "user80", "age": 80}, +{"_key": "user81", "age": 81}, +{"_key": "user82", "age": 82}, +{"_key": "user83", "age": 83}, +{"_key": "user84", "age": 84}, +{"_key": "user85", "age": 85}, +{"_key": "user86", "age": 86}, +{"_key": "user87", "age": 87}, +{"_key": "user88", "age": 88}, +{"_key": "user89", "age": 89}, +{"_key": "user90", "age": 90}, +{"_key": "user91", "age": 91}, +{"_key": "user92", "age": 92}, +{"_key": "user93", "age": 93}, +{"_key": "user94", "age": 94}, +{"_key": "user95", "age": 95}, +{"_key": "user96", "age": 96}, +{"_key": "user97", "age": 97}, +{"_key": "user98", "age": 98}, +{"_key": "user99", "age": 99} +] +[[0,0.0,0.0],100] +log_level --level info +[[0,0.0,0.0],true] +select Users --filter '_key @^ "user0" && age > 5' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 4 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "_key", + "ShortText" + ], + [ + "age", + "Int32" + ] + ], + [ + 7, + "user06", + 6 + ], + [ + 8, + "user07", + 7 + ], + [ + 9, + "user08", + 8 + ], + [ + 10, + "user09", + 9 + ] + ] + ] +] +#|i| [table][select][index-not-used][range] <Ages>: enough filtered: 10.00%(10/100) < 11.00% && 10 <= 1000 +log_level --level notice +[[0,0.0,0.0],true] Added: test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test (+117 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/index/range/optimization/use_sequetial_search.test 2016-05-06 16:02:00 +0900 (5eb1d50) @@ -0,0 +1,117 @@ +#$GRN_TABLE_SELECT_ENOUGH_FILTERED_RATIO=0.11 + +table_create Users TABLE_HASH_KEY ShortText +column_create Users age COLUMN_SCALAR Int32 + +table_create Ages TABLE_PAT_KEY Int32 +column_create Ages users_age COLUMN_INDEX Users age + +load --table Users +[ +{"_key": "user00", "age": 0}, +{"_key": "user01", "age": 1}, +{"_key": "user02", "age": 2}, +{"_key": "user03", "age": 3}, +{"_key": "user04", "age": 4}, +{"_key": "user05", "age": 5}, +{"_key": "user06", "age": 6}, +{"_key": "user07", "age": 7}, +{"_key": "user08", "age": 8}, +{"_key": "user09", "age": 9}, +{"_key": "user10", "age": 10}, +{"_key": "user11", "age": 11}, +{"_key": "user12", "age": 12}, +{"_key": "user13", "age": 13}, +{"_key": "user14", "age": 14}, +{"_key": "user15", "age": 15}, +{"_key": "user16", "age": 16}, +{"_key": "user17", "age": 17}, +{"_key": "user18", "age": 18}, +{"_key": "user19", "age": 19}, +{"_key": "user20", "age": 20}, +{"_key": "user21", "age": 21}, +{"_key": "user22", "age": 22}, +{"_key": "user23", "age": 23}, +{"_key": "user24", "age": 24}, +{"_key": "user25", "age": 25}, +{"_key": "user26", "age": 26}, +{"_key": "user27", "age": 27}, +{"_key": "user28", "age": 28}, +{"_key": "user29", "age": 29}, +{"_key": "user30", "age": 30}, +{"_key": "user31", "age": 31}, +{"_key": "user32", "age": 32}, +{"_key": "user33", "age": 33}, +{"_key": "user34", "age": 34}, +{"_key": "user35", "age": 35}, +{"_key": "user36", "age": 36}, +{"_key": "user37", "age": 37}, +{"_key": "user38", "age": 38}, +{"_key": "user39", "age": 39}, +{"_key": "user40", "age": 40}, +{"_key": "user41", "age": 41}, +{"_key": "user42", "age": 42}, +{"_key": "user43", "age": 43}, +{"_key": "user44", "age": 44}, +{"_key": "user45", "age": 45}, +{"_key": "user46", "age": 46}, +{"_key": "user47", "age": 47}, +{"_key": "user48", "age": 48}, +{"_key": "user49", "age": 49}, +{"_key": "user50", "age": 50}, +{"_key": "user51", "age": 51}, +{"_key": "user52", "age": 52}, +{"_key": "user53", "age": 53}, +{"_key": "user54", "age": 54}, +{"_key": "user55", "age": 55}, +{"_key": "user56", "age": 56}, +{"_key": "user57", "age": 57}, +{"_key": "user58", "age": 58}, +{"_key": "user59", "age": 59}, +{"_key": "user60", "age": 60}, +{"_key": "user61", "age": 61}, +{"_key": "user62", "age": 62}, +{"_key": "user63", "age": 63}, +{"_key": "user64", "age": 64}, +{"_key": "user65", "age": 65}, +{"_key": "user66", "age": 66}, +{"_key": "user67", "age": 67}, +{"_key": "user68", "age": 68}, +{"_key": "user69", "age": 69}, +{"_key": "user70", "age": 70}, +{"_key": "user71", "age": 71}, +{"_key": "user72", "age": 72}, +{"_key": "user73", "age": 73}, +{"_key": "user74", "age": 74}, +{"_key": "user75", "age": 75}, +{"_key": "user76", "age": 76}, +{"_key": "user77", "age": 77}, +{"_key": "user78", "age": 78}, +{"_key": "user79", "age": 79}, +{"_key": "user80", "age": 80}, +{"_key": "user81", "age": 81}, +{"_key": "user82", "age": 82}, +{"_key": "user83", "age": 83}, +{"_key": "user84", "age": 84}, +{"_key": "user85", "age": 85}, +{"_key": "user86", "age": 86}, +{"_key": "user87", "age": 87}, +{"_key": "user88", "age": 88}, +{"_key": "user89", "age": 89}, +{"_key": "user90", "age": 90}, +{"_key": "user91", "age": 91}, +{"_key": "user92", "age": 92}, +{"_key": "user93", "age": 93}, +{"_key": "user94", "age": 94}, +{"_key": "user95", "age": 95}, +{"_key": "user96", "age": 96}, +{"_key": "user97", "age": 97}, +{"_key": "user98", "age": 98}, +{"_key": "user99", "age": 99} +] + +log_level --level info +#@add-important-log-levels info +select Users --filter '_key @^ "user0" && age > 5' +#@remove-important-log-levels info +log_level --level notice -------------- next part -------------- HTML����������������������������...Download