[Groonga-commit] groonga/groonga at a61d91a [master] Extract Onigmo related code

Zurück zum Archiv-Index
Kouhei Sutou null+****@clear*****
Thu Nov 22 18:42:17 JST 2018


Kouhei Sutou	2018-11-22 18:42:17 +0900 (Thu, 22 Nov 2018)

  Revision: a61d91a2e7b0137a72d1c22775f5e1296f07e53e
  https://github.com/groonga/groonga/commit/a61d91a2e7b0137a72d1c22775f5e1296f07e53e

  Message:
    Extract Onigmo related code

  Added files:
    lib/grn_onigmo.h
    lib/onigmo.c
  Modified files:
    lib/c_sources.am
    lib/expr_executor.c
    lib/ii.c
    lib/operator.c

  Modified: lib/c_sources.am (+2 -0)
===================================================================
--- lib/c_sources.am    2018-11-22 17:50:15 +0900 (a5fdc0365)
+++ lib/c_sources.am    2018-11-22 18:42:17 +0900 (d1f818079)
@@ -64,6 +64,8 @@ libgroonga_c_sources =				\
 	grn_normalizer.h			\
 	obj.c					\
 	grn_obj.h				\
+	onigmo.c				\
+	grn_onigmo.h				\
 	operator.c				\
 	options.c				\
 	grn_options.h				\

  Modified: lib/expr_executor.c (+13 -48)
===================================================================
--- lib/expr_executor.c    2018-11-22 17:50:15 +0900 (c0e84135d)
+++ lib/expr_executor.c    2018-11-22 18:42:17 +0900 (7595574e7)
@@ -25,7 +25,7 @@
 
 #ifdef GRN_SUPPORT_REGEXP
 # include "grn_normalizer.h"
-# include <onigmo.h>
+# include "grn_onigmo.h"
 #endif
 
 static void
@@ -2555,61 +2555,22 @@ grn_expr_executor_init_simple_regexp(grn_ctx *ctx,
 {
   grn_expr *e = (grn_expr *)(executor->expr);
   grn_obj *result_buffer = &(executor->data.simple_regexp.result_buffer);
-  OnigEncoding onig_encoding;
-  OnigRegex regex;
-  int onig_result;
-  OnigErrorInfo onig_error_info;
   grn_obj *pattern;
 
   GRN_BOOL_INIT(result_buffer, 0);
   GRN_BOOL_SET(ctx, result_buffer, GRN_FALSE);
 
-  if (ctx->encoding == GRN_ENC_NONE) {
-    executor->data.simple_regexp.regex = NULL;
-    return;
-  }
-
-  switch (ctx->encoding) {
-  case GRN_ENC_EUC_JP :
-    onig_encoding = ONIG_ENCODING_EUC_JP;
-    break;
-  case GRN_ENC_UTF8 :
-    onig_encoding = ONIG_ENCODING_UTF8;
-    break;
-  case GRN_ENC_SJIS :
-    onig_encoding = ONIG_ENCODING_CP932;
-    break;
-  case GRN_ENC_LATIN1 :
-    onig_encoding = ONIG_ENCODING_ISO_8859_1;
-    break;
-  case GRN_ENC_KOI8R :
-    onig_encoding = ONIG_ENCODING_KOI8_R;
-    break;
-  default :
-    executor->data.simple_regexp.regex = NULL;
-    return;
-  }
-
   pattern = e->codes[1].value;
-  onig_result = onig_new(&regex,
-                         GRN_TEXT_VALUE(pattern),
-                         GRN_TEXT_VALUE(pattern) + GRN_TEXT_LEN(pattern),
-                         ONIG_OPTION_ASCII_RANGE |
-                         ONIG_OPTION_MULTILINE,
-                         onig_encoding,
-                         ONIG_SYNTAX_RUBY,
-                         &onig_error_info);
-  if (onig_result != ONIG_NORMAL) {
-    char message[ONIG_MAX_ERROR_MESSAGE_LEN];
-    onig_error_code_to_str(message, onig_result, onig_error_info);
-    ERR(GRN_INVALID_ARGUMENT,
-        "[expr-executor][regexp] "
-        "failed to create regular expression object: <%.*s>: %s",
-        (int)GRN_TEXT_LEN(pattern), GRN_TEXT_VALUE(pattern),
-        message);
+  executor->data.simple_regexp.regex =
+    grn_onigmo_new(ctx,
+                   GRN_TEXT_VALUE(pattern),
+                   GRN_TEXT_LEN(pattern),
+                   GRN_ONIGMO_OPTION_DEFAULT,
+                   GRN_ONIGMO_SYNTAX_DEFAULT,
+                   "[expr-executor]");
+  if (!executor->data.simple_regexp.regex) {
     return;
   }
-  executor->data.simple_regexp.regex = regex;
 
   GRN_VOID_INIT(&(executor->data.simple_regexp.value_buffer));
 
@@ -2687,6 +2648,10 @@ grn_expr_executor_is_simple_regexp(grn_ctx *ctx,
     return GRN_FALSE;
   }
 
+  if (!grn_onigmo_is_valid_encoding(ctx)) {
+    return GRN_FALSE;
+  }
+
   grn_expr_executor_init_simple_regexp(ctx, executor);
 
   return GRN_TRUE;

  Added: lib/grn_onigmo.h (+54 -0) 100644
===================================================================
--- /dev/null
+++ lib/grn_onigmo.h    2018-11-22 18:42:17 +0900 (4df3824cd)
@@ -0,0 +1,54 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#pragma once
+
+#include "grn.h"
+
+#ifdef GRN_SUPPORT_REGEXP
+# include <onigmo.h>
+#endif /* GRN_SUPPORT_REGEXP */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef GRN_SUPPORT_REGEXP
+
+#define GRN_ONIGMO_OPTION_DEFAULT               \
+  (ONIG_OPTION_ASCII_RANGE |                    \
+   ONIG_OPTION_MULTILINE)
+
+#define GRN_ONIGMO_SYNTAX_DEFAULT ONIG_SYNTAX_RUBY
+
+grn_bool
+grn_onigmo_is_valid_encoding(grn_ctx *ctx);
+
+OnigRegex
+grn_onigmo_new(grn_ctx *ctx,
+               const char *pattern,
+               size_t pattern_length,
+               OnigOptionType option,
+               const OnigSyntaxType *syntax,
+               const char *context);
+
+#endif /* GRN_SUPPORT_REGEXP */
+
+#ifdef __cplusplus
+}
+#endif

  Modified: lib/ii.c (+8 -17)
===================================================================
--- lib/ii.c    2018-11-22 17:50:15 +0900 (c1c1d3080)
+++ lib/ii.c    2018-11-22 18:42:17 +0900 (c7fae6898)
@@ -37,14 +37,14 @@
 #include "grn_scorer.h"
 #include "grn_util.h"
 
-#ifdef GRN_WITH_ONIGMO
+#ifdef GRN_SUPPORT_REGEXP
 # define GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
-#endif
+#endif /* GRN_SUPPORT_REGEXP */
 
 #ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
 # include "grn_string.h"
-# include <onigmo.h>
-#endif
+# include "grn_onigmo.h"
+#endif /* GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH */
 
 #define MAX_PSEG                 0x20000
 #define MAX_PSEG_SMALL           0x00200
@@ -9073,26 +9073,17 @@ grn_ii_select_sequential_search(grn_ctx *ctx,
                               NULL);
     {
       OnigRegex regex;
-      int onig_result;
-      OnigErrorInfo error_info;
-      onig_result = onig_new(&regex,
+      regex = grn_onigmo_new(ctx,
                              normalized_query,
-                             normalized_query + normalized_query_length,
+                             normalized_query_length,
                              ONIG_OPTION_NONE,
-                             ONIG_ENCODING_UTF8,
                              ONIG_SYNTAX_ASIS,
-                             &error_info);
-      if (onig_result == ONIG_NORMAL) {
+                             "[ii][select][sequential]");
+      if (regex) {
         grn_ii_select_sequential_search_body(ctx, ii, encoding,
                                              regex, result, op, wvm, optarg);
         onig_free(regex);
       } else {
-        char message[ONIG_MAX_ERROR_MESSAGE_LEN];
-        onig_error_code_to_str(message, onig_result, error_info);
-        GRN_LOG(ctx, GRN_LOG_WARNING,
-                "[ii][select][sequential] "
-                "failed to create regular expression object: %s",
-                message);
         processed = GRN_FALSE;
       }
     }

  Added: lib/onigmo.c (+100 -0) 100644
===================================================================
--- /dev/null
+++ lib/onigmo.c    2018-11-22 18:42:17 +0900 (651a49b65)
@@ -0,0 +1,100 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "grn_ctx.h"
+#include "grn_onigmo.h"
+
+#ifdef GRN_SUPPORT_REGEXP
+grn_bool
+grn_onigmo_is_valid_encoding(grn_ctx *ctx)
+{
+  switch (ctx->encoding) {
+  case GRN_ENC_EUC_JP :
+  case GRN_ENC_UTF8 :
+  case GRN_ENC_SJIS :
+  case GRN_ENC_LATIN1 :
+  case GRN_ENC_KOI8R :
+    return GRN_TRUE;
+  default :
+    return GRN_FALSE;
+  }
+}
+
+OnigRegex
+grn_onigmo_new(grn_ctx *ctx,
+               const char *pattern,
+               size_t pattern_length,
+               OnigOptionType option,
+               const OnigSyntaxType *syntax,
+               const char *context)
+{
+  OnigEncoding onig_encoding;
+  OnigRegex onig_regex;
+  int onig_result;
+  OnigErrorInfo onig_error_info;
+
+  switch (ctx->encoding) {
+  case GRN_ENC_EUC_JP :
+    onig_encoding = ONIG_ENCODING_EUC_JP;
+    break;
+  case GRN_ENC_UTF8 :
+    onig_encoding = ONIG_ENCODING_UTF8;
+    break;
+  case GRN_ENC_SJIS :
+    onig_encoding = ONIG_ENCODING_CP932;
+    break;
+  case GRN_ENC_LATIN1 :
+    onig_encoding = ONIG_ENCODING_ISO_8859_1;
+    break;
+  case GRN_ENC_KOI8R :
+    onig_encoding = ONIG_ENCODING_KOI8_R;
+    break;
+  default :
+    ERR(GRN_INVALID_ARGUMENT,
+        "%s[regexp][new] invalid encoding: <%.*s>: <%s>",
+        context,
+        (int)pattern_length,
+        pattern,
+        grn_encoding_to_string(ctx->encoding));
+    return NULL;
+  }
+
+  onig_result = onig_new(&onig_regex,
+                         pattern,
+                         pattern + pattern_length,
+                         option,
+                         onig_encoding,
+                         syntax,
+                         &onig_error_info);
+  if (onig_result != ONIG_NORMAL) {
+    char message[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str(message, onig_result, onig_error_info);
+    ERR(GRN_INVALID_ARGUMENT,
+        "%s[regexp][new] "
+        "failed to create regular expression object: <%.*s>: %s",
+        context,
+        (int)pattern_length,
+        pattern,
+        message);
+    return NULL;
+  }
+
+  return onig_regex;
+}
+
+#endif /* GRN_SUPPORT_REGEXP */

  Modified: lib/operator.c (+22 -68)
===================================================================
--- lib/operator.c    2018-11-22 17:50:15 +0900 (e11eb0840)
+++ lib/operator.c    2018-11-22 18:42:17 +0900 (ac2c14496)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2014-2017 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -20,15 +21,12 @@
 #include "grn_db.h"
 #include "grn_str.h"
 #include "grn_normalizer.h"
+#include "grn_onigmo.h"
 
 #include <string.h>
 #include <math.h>
 #include <float.h>
 
-#ifdef GRN_SUPPORT_REGEXP
-# include <onigmo.h>
-#endif
-
 static const char *operator_names[] = {
   "push",
   "pop",
@@ -879,63 +877,6 @@ exec_match_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *query)
 }
 
 #ifdef GRN_SUPPORT_REGEXP
-static OnigRegex
-regexp_compile(grn_ctx *ctx,
-               const char *pattern,
-               unsigned int pattern_len,
-               const OnigSyntaxType *syntax)
-{
-  OnigRegex regex;
-  OnigEncoding onig_encoding;
-  int onig_result;
-  OnigErrorInfo onig_error_info;
-
-  if (ctx->encoding == GRN_ENC_NONE) {
-    return NULL;
-  }
-
-  switch (ctx->encoding) {
-  case GRN_ENC_EUC_JP :
-    onig_encoding = ONIG_ENCODING_EUC_JP;
-    break;
-  case GRN_ENC_UTF8 :
-    onig_encoding = ONIG_ENCODING_UTF8;
-    break;
-  case GRN_ENC_SJIS :
-    onig_encoding = ONIG_ENCODING_CP932;
-    break;
-  case GRN_ENC_LATIN1 :
-    onig_encoding = ONIG_ENCODING_ISO_8859_1;
-    break;
-  case GRN_ENC_KOI8R :
-    onig_encoding = ONIG_ENCODING_KOI8_R;
-    break;
-  default :
-    return NULL;
-  }
-
-  onig_result = onig_new(&regex,
-                         pattern,
-                         pattern + pattern_len,
-                         ONIG_OPTION_ASCII_RANGE |
-                         ONIG_OPTION_MULTILINE,
-                         onig_encoding,
-                         syntax,
-                         &onig_error_info);
-  if (onig_result != ONIG_NORMAL) {
-    char message[ONIG_MAX_ERROR_MESSAGE_LEN];
-    onig_error_code_to_str(message, onig_result, onig_error_info);
-    ERR(GRN_INVALID_ARGUMENT,
-        "[operator][regexp] "
-        "failed to create regular expression object: <%.*s>: %s",
-        pattern_len, pattern,
-        message);
-    return NULL;
-  }
-
-  return regex;
-}
-
 static grn_bool
 regexp_is_match(grn_ctx *ctx, OnigRegex regex,
                 const char *target, unsigned int target_len)
@@ -967,11 +908,16 @@ string_have_sub_text(grn_ctx *ctx,
   }
 
 #ifdef GRN_SUPPORT_REGEXP
-  {
+  if (grn_onigmo_is_valid_encoding(ctx)) {
     OnigRegex regex;
     grn_bool matched;
 
-    regex = regexp_compile(ctx, sub_text, sub_text_len, ONIG_SYNTAX_ASIS);
+    regex = grn_onigmo_new(ctx,
+                           sub_text,
+                           sub_text_len,
+                           GRN_ONIGMO_OPTION_DEFAULT,
+                           ONIG_SYNTAX_ASIS,
+                           "[operator]");
     if (!regex) {
       return GRN_FALSE;
     }
@@ -1048,7 +994,11 @@ string_match_regexp(grn_ctx *ctx,
   OnigRegex regex;
   grn_bool matched;
 
-  regex = regexp_compile(ctx, pattern, pattern_len, ONIG_SYNTAX_RUBY);
+  regex = grn_onigmo_new(ctx,
+                         pattern, pattern_len,
+                         GRN_ONIGMO_OPTION_DEFAULT,
+                         GRN_ONIGMO_SYNTAX_DEFAULT,
+                         "[operator]");
   if (!regex) {
     return GRN_FALSE;
   }
@@ -1307,10 +1257,12 @@ exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern)
     return GRN_FALSE;
   }
 
-  regex = regexp_compile(ctx,
+  regex = grn_onigmo_new(ctx,
                          GRN_TEXT_VALUE(pattern),
                          GRN_TEXT_LEN(pattern),
-                         ONIG_SYNTAX_RUBY);
+                         GRN_ONIGMO_OPTION_DEFAULT,
+                         GRN_ONIGMO_SYNTAX_DEFAULT,
+                         "[operator]");
   if (!regex) {
     return GRN_FALSE;
   }
@@ -1389,10 +1341,12 @@ exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern)
     return GRN_FALSE;
   }
 
-  regex = regexp_compile(ctx,
+  regex = grn_onigmo_new(ctx,
                          GRN_TEXT_VALUE(pattern),
                          GRN_TEXT_LEN(pattern),
-                         ONIG_SYNTAX_RUBY);
+                         GRN_ONIGMO_OPTION_DEFAULT,
+                         GRN_ONIGMO_SYNTAX_DEFAULT,
+                         "[operator]");
   if (!regex) {
     return GRN_FALSE;
   }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181122/3ab17dcc/attachment-0001.html>


More information about the Groonga-commit mailing list
Zurück zum Archiv-Index