Kouhei Sutou 2018-11-22 18:42:17 +0900 (Thu, 22 Nov 2018) Revision: a61d91a2e7b0137a72d1c22775f5e1296f07e53e https://github.com/groonga/groonga/commit/a61d91a2e7b0137a72d1c22775f5e1296f07e53e Message: Extract Onigmo related code Added files: lib/grn_onigmo.h lib/onigmo.c Modified files: lib/c_sources.am lib/expr_executor.c lib/ii.c lib/operator.c Modified: lib/c_sources.am (+2 -0) =================================================================== --- lib/c_sources.am 2018-11-22 17:50:15 +0900 (a5fdc0365) +++ lib/c_sources.am 2018-11-22 18:42:17 +0900 (d1f818079) @@ -64,6 +64,8 @@ libgroonga_c_sources = \ grn_normalizer.h \ obj.c \ grn_obj.h \ + onigmo.c \ + grn_onigmo.h \ operator.c \ options.c \ grn_options.h \ Modified: lib/expr_executor.c (+13 -48) =================================================================== --- lib/expr_executor.c 2018-11-22 17:50:15 +0900 (c0e84135d) +++ lib/expr_executor.c 2018-11-22 18:42:17 +0900 (7595574e7) @@ -25,7 +25,7 @@ #ifdef GRN_SUPPORT_REGEXP # include "grn_normalizer.h" -# include <onigmo.h> +# include "grn_onigmo.h" #endif static void @@ -2555,61 +2555,22 @@ grn_expr_executor_init_simple_regexp(grn_ctx *ctx, { grn_expr *e = (grn_expr *)(executor->expr); grn_obj *result_buffer = &(executor->data.simple_regexp.result_buffer); - OnigEncoding onig_encoding; - OnigRegex regex; - int onig_result; - OnigErrorInfo onig_error_info; grn_obj *pattern; GRN_BOOL_INIT(result_buffer, 0); GRN_BOOL_SET(ctx, result_buffer, GRN_FALSE); - if (ctx->encoding == GRN_ENC_NONE) { - executor->data.simple_regexp.regex = NULL; - return; - } - - switch (ctx->encoding) { - case GRN_ENC_EUC_JP : - onig_encoding = ONIG_ENCODING_EUC_JP; - break; - case GRN_ENC_UTF8 : - onig_encoding = ONIG_ENCODING_UTF8; - break; - case GRN_ENC_SJIS : - onig_encoding = ONIG_ENCODING_CP932; - break; - case GRN_ENC_LATIN1 : - onig_encoding = ONIG_ENCODING_ISO_8859_1; - break; - case GRN_ENC_KOI8R : - onig_encoding = ONIG_ENCODING_KOI8_R; - break; - default : - executor->data.simple_regexp.regex = NULL; - return; - } - pattern = e->codes[1].value; - onig_result = onig_new(®ex, - GRN_TEXT_VALUE(pattern), - GRN_TEXT_VALUE(pattern) + GRN_TEXT_LEN(pattern), - ONIG_OPTION_ASCII_RANGE | - ONIG_OPTION_MULTILINE, - onig_encoding, - ONIG_SYNTAX_RUBY, - &onig_error_info); - if (onig_result != ONIG_NORMAL) { - char message[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(message, onig_result, onig_error_info); - ERR(GRN_INVALID_ARGUMENT, - "[expr-executor][regexp] " - "failed to create regular expression object: <%.*s>: %s", - (int)GRN_TEXT_LEN(pattern), GRN_TEXT_VALUE(pattern), - message); + executor->data.simple_regexp.regex = + grn_onigmo_new(ctx, + GRN_TEXT_VALUE(pattern), + GRN_TEXT_LEN(pattern), + GRN_ONIGMO_OPTION_DEFAULT, + GRN_ONIGMO_SYNTAX_DEFAULT, + "[expr-executor]"); + if (!executor->data.simple_regexp.regex) { return; } - executor->data.simple_regexp.regex = regex; GRN_VOID_INIT(&(executor->data.simple_regexp.value_buffer)); @@ -2687,6 +2648,10 @@ grn_expr_executor_is_simple_regexp(grn_ctx *ctx, return GRN_FALSE; } + if (!grn_onigmo_is_valid_encoding(ctx)) { + return GRN_FALSE; + } + grn_expr_executor_init_simple_regexp(ctx, executor); return GRN_TRUE; Added: lib/grn_onigmo.h (+54 -0) 100644 =================================================================== --- /dev/null +++ lib/grn_onigmo.h 2018-11-22 18:42:17 +0900 (4df3824cd) @@ -0,0 +1,54 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#pragma once + +#include "grn.h" + +#ifdef GRN_SUPPORT_REGEXP +# include <onigmo.h> +#endif /* GRN_SUPPORT_REGEXP */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef GRN_SUPPORT_REGEXP + +#define GRN_ONIGMO_OPTION_DEFAULT \ + (ONIG_OPTION_ASCII_RANGE | \ + ONIG_OPTION_MULTILINE) + +#define GRN_ONIGMO_SYNTAX_DEFAULT ONIG_SYNTAX_RUBY + +grn_bool +grn_onigmo_is_valid_encoding(grn_ctx *ctx); + +OnigRegex +grn_onigmo_new(grn_ctx *ctx, + const char *pattern, + size_t pattern_length, + OnigOptionType option, + const OnigSyntaxType *syntax, + const char *context); + +#endif /* GRN_SUPPORT_REGEXP */ + +#ifdef __cplusplus +} +#endif Modified: lib/ii.c (+8 -17) =================================================================== --- lib/ii.c 2018-11-22 17:50:15 +0900 (c1c1d3080) +++ lib/ii.c 2018-11-22 18:42:17 +0900 (c7fae6898) @@ -37,14 +37,14 @@ #include "grn_scorer.h" #include "grn_util.h" -#ifdef GRN_WITH_ONIGMO +#ifdef GRN_SUPPORT_REGEXP # define GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH -#endif +#endif /* GRN_SUPPORT_REGEXP */ #ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH # include "grn_string.h" -# include <onigmo.h> -#endif +# include "grn_onigmo.h" +#endif /* GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH */ #define MAX_PSEG 0x20000 #define MAX_PSEG_SMALL 0x00200 @@ -9073,26 +9073,17 @@ grn_ii_select_sequential_search(grn_ctx *ctx, NULL); { OnigRegex regex; - int onig_result; - OnigErrorInfo error_info; - onig_result = onig_new(®ex, + regex = grn_onigmo_new(ctx, normalized_query, - normalized_query + normalized_query_length, + normalized_query_length, ONIG_OPTION_NONE, - ONIG_ENCODING_UTF8, ONIG_SYNTAX_ASIS, - &error_info); - if (onig_result == ONIG_NORMAL) { + "[ii][select][sequential]"); + if (regex) { grn_ii_select_sequential_search_body(ctx, ii, encoding, regex, result, op, wvm, optarg); onig_free(regex); } else { - char message[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(message, onig_result, error_info); - GRN_LOG(ctx, GRN_LOG_WARNING, - "[ii][select][sequential] " - "failed to create regular expression object: %s", - message); processed = GRN_FALSE; } } Added: lib/onigmo.c (+100 -0) 100644 =================================================================== --- /dev/null +++ lib/onigmo.c 2018-11-22 18:42:17 +0900 (651a49b65) @@ -0,0 +1,100 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "grn_ctx.h" +#include "grn_onigmo.h" + +#ifdef GRN_SUPPORT_REGEXP +grn_bool +grn_onigmo_is_valid_encoding(grn_ctx *ctx) +{ + switch (ctx->encoding) { + case GRN_ENC_EUC_JP : + case GRN_ENC_UTF8 : + case GRN_ENC_SJIS : + case GRN_ENC_LATIN1 : + case GRN_ENC_KOI8R : + return GRN_TRUE; + default : + return GRN_FALSE; + } +} + +OnigRegex +grn_onigmo_new(grn_ctx *ctx, + const char *pattern, + size_t pattern_length, + OnigOptionType option, + const OnigSyntaxType *syntax, + const char *context) +{ + OnigEncoding onig_encoding; + OnigRegex onig_regex; + int onig_result; + OnigErrorInfo onig_error_info; + + switch (ctx->encoding) { + case GRN_ENC_EUC_JP : + onig_encoding = ONIG_ENCODING_EUC_JP; + break; + case GRN_ENC_UTF8 : + onig_encoding = ONIG_ENCODING_UTF8; + break; + case GRN_ENC_SJIS : + onig_encoding = ONIG_ENCODING_CP932; + break; + case GRN_ENC_LATIN1 : + onig_encoding = ONIG_ENCODING_ISO_8859_1; + break; + case GRN_ENC_KOI8R : + onig_encoding = ONIG_ENCODING_KOI8_R; + break; + default : + ERR(GRN_INVALID_ARGUMENT, + "%s[regexp][new] invalid encoding: <%.*s>: <%s>", + context, + (int)pattern_length, + pattern, + grn_encoding_to_string(ctx->encoding)); + return NULL; + } + + onig_result = onig_new(&onig_regex, + pattern, + pattern + pattern_length, + option, + onig_encoding, + syntax, + &onig_error_info); + if (onig_result != ONIG_NORMAL) { + char message[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(message, onig_result, onig_error_info); + ERR(GRN_INVALID_ARGUMENT, + "%s[regexp][new] " + "failed to create regular expression object: <%.*s>: %s", + context, + (int)pattern_length, + pattern, + message); + return NULL; + } + + return onig_regex; +} + +#endif /* GRN_SUPPORT_REGEXP */ Modified: lib/operator.c (+22 -68) =================================================================== --- lib/operator.c 2018-11-22 17:50:15 +0900 (e11eb0840) +++ lib/operator.c 2018-11-22 18:42:17 +0900 (ac2c14496) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2014-2017 Brazil + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,15 +21,12 @@ #include "grn_db.h" #include "grn_str.h" #include "grn_normalizer.h" +#include "grn_onigmo.h" #include <string.h> #include <math.h> #include <float.h> -#ifdef GRN_SUPPORT_REGEXP -# include <onigmo.h> -#endif - static const char *operator_names[] = { "push", "pop", @@ -879,63 +877,6 @@ exec_match_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *query) } #ifdef GRN_SUPPORT_REGEXP -static OnigRegex -regexp_compile(grn_ctx *ctx, - const char *pattern, - unsigned int pattern_len, - const OnigSyntaxType *syntax) -{ - OnigRegex regex; - OnigEncoding onig_encoding; - int onig_result; - OnigErrorInfo onig_error_info; - - if (ctx->encoding == GRN_ENC_NONE) { - return NULL; - } - - switch (ctx->encoding) { - case GRN_ENC_EUC_JP : - onig_encoding = ONIG_ENCODING_EUC_JP; - break; - case GRN_ENC_UTF8 : - onig_encoding = ONIG_ENCODING_UTF8; - break; - case GRN_ENC_SJIS : - onig_encoding = ONIG_ENCODING_CP932; - break; - case GRN_ENC_LATIN1 : - onig_encoding = ONIG_ENCODING_ISO_8859_1; - break; - case GRN_ENC_KOI8R : - onig_encoding = ONIG_ENCODING_KOI8_R; - break; - default : - return NULL; - } - - onig_result = onig_new(®ex, - pattern, - pattern + pattern_len, - ONIG_OPTION_ASCII_RANGE | - ONIG_OPTION_MULTILINE, - onig_encoding, - syntax, - &onig_error_info); - if (onig_result != ONIG_NORMAL) { - char message[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str(message, onig_result, onig_error_info); - ERR(GRN_INVALID_ARGUMENT, - "[operator][regexp] " - "failed to create regular expression object: <%.*s>: %s", - pattern_len, pattern, - message); - return NULL; - } - - return regex; -} - static grn_bool regexp_is_match(grn_ctx *ctx, OnigRegex regex, const char *target, unsigned int target_len) @@ -967,11 +908,16 @@ string_have_sub_text(grn_ctx *ctx, } #ifdef GRN_SUPPORT_REGEXP - { + if (grn_onigmo_is_valid_encoding(ctx)) { OnigRegex regex; grn_bool matched; - regex = regexp_compile(ctx, sub_text, sub_text_len, ONIG_SYNTAX_ASIS); + regex = grn_onigmo_new(ctx, + sub_text, + sub_text_len, + GRN_ONIGMO_OPTION_DEFAULT, + ONIG_SYNTAX_ASIS, + "[operator]"); if (!regex) { return GRN_FALSE; } @@ -1048,7 +994,11 @@ string_match_regexp(grn_ctx *ctx, OnigRegex regex; grn_bool matched; - regex = regexp_compile(ctx, pattern, pattern_len, ONIG_SYNTAX_RUBY); + regex = grn_onigmo_new(ctx, + pattern, pattern_len, + GRN_ONIGMO_OPTION_DEFAULT, + GRN_ONIGMO_SYNTAX_DEFAULT, + "[operator]"); if (!regex) { return GRN_FALSE; } @@ -1307,10 +1257,12 @@ exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern) return GRN_FALSE; } - regex = regexp_compile(ctx, + regex = grn_onigmo_new(ctx, GRN_TEXT_VALUE(pattern), GRN_TEXT_LEN(pattern), - ONIG_SYNTAX_RUBY); + GRN_ONIGMO_OPTION_DEFAULT, + GRN_ONIGMO_SYNTAX_DEFAULT, + "[operator]"); if (!regex) { return GRN_FALSE; } @@ -1389,10 +1341,12 @@ exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern) return GRN_FALSE; } - regex = regexp_compile(ctx, + regex = grn_onigmo_new(ctx, GRN_TEXT_VALUE(pattern), GRN_TEXT_LEN(pattern), - ONIG_SYNTAX_RUBY); + GRN_ONIGMO_OPTION_DEFAULT, + GRN_ONIGMO_SYNTAX_DEFAULT, + "[operator]"); if (!regex) { return GRN_FALSE; } -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181122/3ab17dcc/attachment-0001.html>