Kouhei Sutou 2019-02-01 13:03:19 +0900 (Fri, 01 Feb 2019) Revision: 4a8995a782032e338cbf171f0730c366ed51475d https://github.com/groonga/groonga/commit/4a8995a782032e338cbf171f0730c366ed51475d Message: NormalizerNFKC100: add "remove_blank" option Added files: test/command/suite/normalizers/nfkc100/remove_blank.expected test/command/suite/normalizers/nfkc100/remove_blank.test Modified files: lib/grn_nfkc.h lib/nfkc.c lib/normalizer.c Modified: lib/grn_nfkc.h (+2 -1) =================================================================== --- lib/grn_nfkc.h 2019-02-01 13:00:00 +0900 (b4001a82a) +++ lib/grn_nfkc.h 2019-02-01 13:03:19 +0900 (41b6e198d) @@ -1,7 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2016 Brazil - Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -46,6 +46,7 @@ typedef struct { grn_bool unify_katakana_v_sounds; grn_bool unify_katakana_bu_sound; grn_bool unify_to_romaji; + grn_bool remove_blank; } grn_nfkc_normalize_options; const char *grn_nfkc_decompose(const unsigned char *utf8); Modified: lib/nfkc.c (+8 -1) =================================================================== --- lib/nfkc.c 2019-02-01 13:00:00 +0900 (fff5a3bb8) +++ lib/nfkc.c 2019-02-01 13:03:19 +0900 (9ac8ffcd7) @@ -1,7 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2010-2016 Brazil - Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -64,6 +64,7 @@ grn_nfkc_normalize_options_init(grn_ctx *ctx, options->unify_katakana_v_sounds = GRN_FALSE; options->unify_katakana_bu_sound = GRN_FALSE; options->unify_to_romaji = GRN_FALSE; + options->remove_blank = GRN_FALSE; } void @@ -161,6 +162,12 @@ grn_nfkc_normalize_options_apply(grn_ctx *ctx, raw_options, i, options->unify_to_romaji); + } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "remove_blank")) { + options->remove_blank = + grn_vector_get_element_bool(ctx, + raw_options, + i, + options->remove_blank); } } GRN_OPTION_VALUES_EACH_END(); Modified: lib/normalizer.c (+3 -1) =================================================================== --- lib/normalizer.c 2019-02-01 13:00:00 +0900 (024766a8d) +++ lib/normalizer.c 2019-02-01 13:03:19 +0900 (2d8b2473f) @@ -685,7 +685,9 @@ grn_nfkc_normalize_data_init(grn_ctx *ctx, memset(data, 0, sizeof(grn_nfkc_normalize_data)); data->string = (grn_string *)string; data->options = options; - data->remove_blank_p = (data->string->flags & GRN_STRING_REMOVE_BLANK); + data->remove_blank_p = + (data->string->flags & GRN_STRING_REMOVE_BLANK) || + data->options->remove_blank; data->remove_tokenized_delimiter_p = (data->string->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER); Added: test/command/suite/normalizers/nfkc100/remove_blank.expected (+50 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/remove_blank.expected 2019-02-01 13:03:19 +0900 (db6c134da) @@ -0,0 +1,50 @@ +normalize 'NormalizerNFKC100("remove_blank", true, "report_source_offset", true)' "This is a pen." WITH_CHECKS|WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "thisisapen.", + "types": [ + "alpha", + "alpha", + "alpha", + "alpha|blank", + "alpha", + "alpha|blank", + "alpha|blank", + "alpha", + "alpha", + "alpha", + "symbol" + ], + "checks": [ + 1, + 1, + 1, + 1, + 2, + 1, + 2, + 2, + 1, + 1, + 1 + ], + "offsets": [ + 0, + 1, + 2, + 3, + 5, + 6, + 8, + 10, + 11, + 12, + 13 + ] + } +] Added: test/command/suite/normalizers/nfkc100/remove_blank.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/remove_blank.test 2019-02-01 13:03:19 +0900 (0c65d41d1) @@ -0,0 +1,5 @@ +normalize \ + 'NormalizerNFKC100("remove_blank", true, \ + "report_source_offset", true)' \ + "This is a pen." \ + WITH_CHECKS|WITH_TYPES -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190201/2e494136/attachment-0001.html>