[Groonga-commit] groonga/groonga at 4a8995a [master] NormalizerNFKC100: add "remove_blank" option

Zurück zum Archiv-Index
Kouhei Sutou null+****@clear*****
Fri Feb 1 13:03:19 JST 2019


Kouhei Sutou	2019-02-01 13:03:19 +0900 (Fri, 01 Feb 2019)

  Revision: 4a8995a782032e338cbf171f0730c366ed51475d
  https://github.com/groonga/groonga/commit/4a8995a782032e338cbf171f0730c366ed51475d

  Message:
    NormalizerNFKC100: add "remove_blank" option

  Added files:
    test/command/suite/normalizers/nfkc100/remove_blank.expected
    test/command/suite/normalizers/nfkc100/remove_blank.test
  Modified files:
    lib/grn_nfkc.h
    lib/nfkc.c
    lib/normalizer.c

  Modified: lib/grn_nfkc.h (+2 -1)
===================================================================
--- lib/grn_nfkc.h    2019-02-01 13:00:00 +0900 (b4001a82a)
+++ lib/grn_nfkc.h    2019-02-01 13:03:19 +0900 (41b6e198d)
@@ -1,7 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2016 Brazil
-  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+  Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -46,6 +46,7 @@ typedef struct {
   grn_bool unify_katakana_v_sounds;
   grn_bool unify_katakana_bu_sound;
   grn_bool unify_to_romaji;
+  grn_bool remove_blank;
 } grn_nfkc_normalize_options;
 
 const char *grn_nfkc_decompose(const unsigned char *utf8);

  Modified: lib/nfkc.c (+8 -1)
===================================================================
--- lib/nfkc.c    2019-02-01 13:00:00 +0900 (fff5a3bb8)
+++ lib/nfkc.c    2019-02-01 13:03:19 +0900 (9ac8ffcd7)
@@ -1,7 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2010-2016 Brazil
-  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+  Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -64,6 +64,7 @@ grn_nfkc_normalize_options_init(grn_ctx *ctx,
   options->unify_katakana_v_sounds = GRN_FALSE;
   options->unify_katakana_bu_sound = GRN_FALSE;
   options->unify_to_romaji = GRN_FALSE;
+  options->remove_blank = GRN_FALSE;
 }
 
 void
@@ -161,6 +162,12 @@ grn_nfkc_normalize_options_apply(grn_ctx *ctx,
                                     raw_options,
                                     i,
                                     options->unify_to_romaji);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "remove_blank")) {
+      options->remove_blank =
+        grn_vector_get_element_bool(ctx,
+                                    raw_options,
+                                    i,
+                                    options->remove_blank);
     }
   } GRN_OPTION_VALUES_EACH_END();
 

  Modified: lib/normalizer.c (+3 -1)
===================================================================
--- lib/normalizer.c    2019-02-01 13:00:00 +0900 (024766a8d)
+++ lib/normalizer.c    2019-02-01 13:03:19 +0900 (2d8b2473f)
@@ -685,7 +685,9 @@ grn_nfkc_normalize_data_init(grn_ctx *ctx,
   memset(data, 0, sizeof(grn_nfkc_normalize_data));
   data->string = (grn_string *)string;
   data->options = options;
-  data->remove_blank_p = (data->string->flags & GRN_STRING_REMOVE_BLANK);
+  data->remove_blank_p =
+    (data->string->flags & GRN_STRING_REMOVE_BLANK) ||
+    data->options->remove_blank;
   data->remove_tokenized_delimiter_p =
     (data->string->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER);
 

  Added: test/command/suite/normalizers/nfkc100/remove_blank.expected (+50 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/remove_blank.expected    2019-02-01 13:03:19 +0900 (db6c134da)
@@ -0,0 +1,50 @@
+normalize   'NormalizerNFKC100("remove_blank", true,                      "report_source_offset", true)'   "This is a pen."   WITH_CHECKS|WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "thisisapen.",
+    "types": [
+      "alpha",
+      "alpha",
+      "alpha",
+      "alpha|blank",
+      "alpha",
+      "alpha|blank",
+      "alpha|blank",
+      "alpha",
+      "alpha",
+      "alpha",
+      "symbol"
+    ],
+    "checks": [
+      1,
+      1,
+      1,
+      1,
+      2,
+      1,
+      2,
+      2,
+      1,
+      1,
+      1
+    ],
+    "offsets": [
+      0,
+      1,
+      2,
+      3,
+      5,
+      6,
+      8,
+      10,
+      11,
+      12,
+      13
+    ]
+  }
+]

  Added: test/command/suite/normalizers/nfkc100/remove_blank.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/remove_blank.test    2019-02-01 13:03:19 +0900 (0c65d41d1)
@@ -0,0 +1,5 @@
+normalize \
+  'NormalizerNFKC100("remove_blank", true, \
+                     "report_source_offset", true)' \
+  "This is a pen." \
+  WITH_CHECKS|WITH_TYPES
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190201/2e494136/attachment-0001.html>


More information about the Groonga-commit mailing list
Zurück zum Archiv-Index