[Groonga-commit] groonga/groonga [master] Revert "[normalizer] implemented normalizer as grn_proc. refs #1164"

Zurück zum Archiv-Index

null+****@clear***** null+****@clear*****
2012年 2月 14日 (火) 14:00:52 JST


Kouhei Sutou	2012-02-14 14:00:52 +0900 (Tue, 14 Feb 2012)

  New Revision: f1d46f851ec86981ff53d2a127cb9d8ccd8b5b7a

  Log:
    Revert "[normalizer] implemented normalizer as grn_proc. refs #1164"
    
    This reverts commit 67def859ac4bdcfef7345b5d654e0741d34a8710.

  Removed files:
    lib/normalizer.c
    lib/normalizer.h
  Modified files:
    include/groonga.h
    lib/Makefile.am
    lib/dat.cpp
    lib/dat.h
    lib/db.c
    lib/db.h
    lib/expr.c
    lib/hash.c
    lib/hash.h
    lib/pat.c
    lib/pat.h
    lib/snip.c
    lib/str.c
    lib/str.h
    lib/util.c
    test/unit/core/dat/test-dat.cpp
    test/unit/util/test-snip.c
    test/unit/util/test-string.c

  Modified: include/groonga.h (+2 -29)
===================================================================
--- include/groonga.h    2012-02-14 14:00:31 +0900 (728df55)
+++ include/groonga.h    2012-02-14 14:00:52 +0900 (8a078c2)
@@ -414,7 +414,6 @@ typedef unsigned short int grn_obj_flags;
 #define GRN_ACCESSOR_VIEW              (0x0a)
 #define GRN_SNIP                       (0x0b)
 #define GRN_PATSNIP                    (0x0c)
-#define GRN_NORMALIZED_TEXT            (0x0d)
 #define GRN_CURSOR_TABLE_HASH_KEY      (0x10)
 #define GRN_CURSOR_TABLE_PAT_KEY       (0x11)
 #define GRN_CURSOR_TABLE_DAT_KEY       (0x12)
@@ -591,16 +590,6 @@ typedef enum {
   GRN_DB_TRIGRAM,
 } grn_builtin_tokenizer;
 
-typedef enum {
-  GRN_DB_NORMALIZER_ASCII = 96,
-  GRN_DB_NORMALIZER_UTF8_NFKC,       /* Normalization Form KC */
-  GRN_DB_NORMALIZER_EUC_JP,
-  GRN_DB_NORMALIZER_SJIS,
-  GRN_DB_NORMALIZER_LATIN1,
-  GRN_DB_NORMALIZER_KOI8R,
-  GRN_DB_NORMALIZER_UTF8_UCA         /* Unicode Collation Algorithm */
-} grn_builtin_normalizer;
-
 GRN_API grn_obj *grn_ctx_at(grn_ctx *ctx, grn_id id);
 
 /**
@@ -647,8 +636,7 @@ typedef enum {
   GRN_PROC_TOKENIZER = 1,
   GRN_PROC_COMMAND,
   GRN_PROC_FUNCTION,
-  GRN_PROC_HOOK,
-  GRN_PROC_NORMALIZER
+  GRN_PROC_HOOK
 } grn_proc_type;
 
 GRN_API grn_obj *grn_proc_create(grn_ctx *ctx,
@@ -2434,7 +2422,7 @@ GRN_API void grn_time_now(grn_ctx *ctx, grn_obj *obj);
   grn_bulk_write((ctx), (obj), (char *)&_val, sizeof(grn_obj *));\
 } while (0)
 
-/* grn_str: deprecated */
+/* grn_str */
 
 typedef struct {
   const char *orig;
@@ -2457,21 +2445,6 @@ GRN_API grn_str *grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_le
                               int flags);
 GRN_API grn_rc grn_str_close(grn_ctx *ctx, grn_str *nstr);
 
-/* grn_normalized_text */
-
-#define GRN_NORMALIZED_TEXT_REMOVE_BLANK (0x01<<0)
-#define GRN_NORMALIZED_TEXT_WITH_CTYPES  (0x01<<1)
-#define GRN_NORMALIZED_TEXT_WITH_CHECKS  (0x01<<2)
-
-GRN_API grn_obj *grn_normalized_text_open(grn_ctx *ctx, grn_obj *normalizer,
-                                          const char *str, unsigned int str_len,
-                                          grn_encoding encoding, int flags);
-GRN_API grn_rc grn_normalized_text_get_value(grn_ctx *ctx,
-                                             grn_obj *normalized_text,
-                                             const char **value,
-                                             unsigned int *length,
-                                             unsigned int *binary_length);
-
 GRN_API int grn_charlen(grn_ctx *ctx, const char *str, const char *end);
 
 /* expr */

  Modified: lib/Makefile.am (+0 -2)
===================================================================
--- lib/Makefile.am    2012-02-14 14:00:31 +0900 (c9bea94)
+++ lib/Makefile.am    2012-02-14 14:00:52 +0900 (7f8cfcd)
@@ -15,7 +15,6 @@ libgroonga_la_SOURCES =				\
 	io.c					\
 	str.c					\
 	nfkc.c					\
-	normalizer.c				\
 	snip.c					\
 	store.c					\
 	com.c					\
@@ -55,7 +54,6 @@ noinst_HEADERS =				\
 	ii.h					\
 	io.h					\
 	nfkc.h					\
-	normalizer.c				\
 	output.h				\
 	pat.h					\
 	plugin_in.h				\

  Modified: lib/dat.cpp (+0 -15)
===================================================================
--- lib/dat.cpp    2012-02-14 14:00:31 +0900 (eddb5be)
+++ lib/dat.cpp    2012-02-14 14:00:52 +0900 (897186a)
@@ -22,7 +22,6 @@
 #include "str.h"
 #include "io.h"
 #include "dat.h"
-#include "normalizer.h"
 #include "util.h"
 
 /*
@@ -313,14 +312,6 @@ grn_dat_create(grn_ctx *ctx, const char *path, uint32_t,
   dat->header->encoding = encoding;
   dat->header->tokenizer = GRN_ID_NIL;
   dat->header->file_id = 0;
-  if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) {
-    dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-    dat->header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-    dat->normalizer = grn_ctx_at(ctx, dat->header->normalizer);
-  } else {
-    dat->header->normalizer = GRN_ID_NIL;
-    dat->normalizer = NULL;
-  }
   dat->encoding = encoding;
   dat->tokenizer = NULL;
   return dat;
@@ -356,12 +347,6 @@ grn_dat_open(grn_ctx *ctx, const char *path)
   dat->encoding = dat->header->encoding;
   dat->obj.header.flags = dat->header->flags;
   dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer);
-  if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) {
-    dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-    dat->header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-
-  }
-  dat->normalizer = grn_ctx_at(ctx, dat->header->normalizer);
   return dat;
 }
 

  Modified: lib/dat.h (+0 -3)
===================================================================
--- lib/dat.h    2012-02-14 14:00:31 +0900 (4409467)
+++ lib/dat.h    2012-02-14 14:00:52 +0900 (a92b0f7)
@@ -36,7 +36,6 @@ struct _grn_dat {
   void *trie;
   void *old_trie;
   grn_obj *tokenizer;
-  grn_obj *normalizer;
   grn_critical_section lock;
 };
 
@@ -45,8 +44,6 @@ struct grn_dat_header {
   grn_encoding encoding;
   grn_id tokenizer;
   uint32_t file_id;
-  grn_id normalizer;
-  uint32_t reserved[235];
 };
 
 struct _grn_dat_cursor {

  Modified: lib/db.c (+6 -21)
===================================================================
--- lib/db.c    2012-02-14 14:00:31 +0900 (99099a7)
+++ lib/db.c    2012-02-14 14:00:52 +0900 (9b64a10)
@@ -22,7 +22,6 @@
 #include "ii.h"
 #include "ctx_impl.h"
 #include "token.h"
-#include "normalizer.h"
 #include "proc.h"
 #include "plugin_in.h"
 #include "geo.h"
@@ -33,16 +32,13 @@
 #define NEXT_ADDR(p) (((byte *)(p)) + sizeof *(p))
 
 #define WITH_NORMALIZE(table,key,key_size,block) {\
-  if ((table)->normalizer) {\
-    grn_obj *nstr;\
-    if ((nstr = grn_normalized_text_open(ctx, (table)->normalizer,\
-                                         key, key_size,\
-                                         (table)->encoding, 0))) {\
-      const char *key;\
-      unsigned int key_size;\
-      grn_normalized_text_get_value(ctx, nstr, &key, NULL, &key_size);\
+  if ((table)->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) {\
+    grn_str *nstr;\
+    if ((nstr = grn_str_open(ctx, key, key_size, GRN_STR_NORMALIZE))) { \
+      char *key = nstr->norm;\
+      unsigned int key_size = nstr->norm_blen;\
       block\
-      grn_obj_close(ctx, nstr);\
+      grn_str_close(ctx, nstr);\
     }\
   } else {\
     block\
@@ -143,7 +139,6 @@ grn_db_create(grn_ctx *ctx, const char *path, grn_db_create_optarg *optarg)
           if ((s->specs = grn_ja_create(ctx, buffer, 65536, 0))) {
             grn_ctx_use(ctx, (grn_obj *)s);
             grn_db_init_builtin_types(ctx);
-            grn_db_init_builtin_normalizers(ctx);
             GRN_API_RETURN((grn_obj *)s);
           } else {
             ERR(GRN_NO_MEMORY_AVAILABLE, "ja create failed");
@@ -152,7 +147,6 @@ grn_db_create(grn_ctx *ctx, const char *path, grn_db_create_optarg *optarg)
           s->specs = NULL;
           grn_ctx_use(ctx, (grn_obj *)s);
           grn_db_init_builtin_types(ctx);
-          grn_db_init_builtin_normalizers(ctx);
           GRN_API_RETURN((grn_obj *)s);
         }
         if (use_pat_as_db_keys) {
@@ -214,7 +208,6 @@ grn_db_open(grn_ctx *ctx, const char *path)
           }
 #endif
           grn_db_init_builtin_tokenizers(ctx);
-          grn_db_init_builtin_normalizers(ctx);
           grn_db_init_builtin_query(ctx);
           GRN_API_RETURN((grn_obj *)s);
         }
@@ -6887,9 +6880,6 @@ grn_obj_close(grn_ctx *ctx, grn_obj *obj)
     case GRN_ACCESSOR_VIEW :
       rc = grn_accessor_view_close(ctx, obj);
       break;
-    case GRN_NORMALIZED_TEXT :
-      rc = grn_normalized_text_close(ctx, obj);
-      break;
     case GRN_CURSOR_TABLE_PAT_KEY :
       grn_pat_cursor_close(ctx, (grn_pat_cursor *)obj);
       break;
@@ -8002,11 +7992,6 @@ grn_db_init_builtin_types(grn_ctx *ctx)
   }
 #endif
   grn_db_init_builtin_tokenizers(ctx);
-  for (id = grn_db_curr_id(ctx, db) + 1; id < GRN_DB_NORMALIZER_ASCII; id++) {
-    grn_itoh(id, buf + 3, 2);
-    grn_obj_register(ctx, db, buf, 5);
-  }
-  grn_db_init_builtin_normalizers(ctx);
   for (id = grn_db_curr_id(ctx, db) + 1; id < 128; id++) {
     grn_itoh(id, buf + 3, 2);
     grn_obj_register(ctx, db, buf, 5);

  Modified: lib/db.h (+1 -1)
===================================================================
--- lib/db.h    2012-02-14 14:00:31 +0900 (4f76d43)
+++ lib/db.h    2012-02-14 14:00:52 +0900 (2f7271b)
@@ -92,7 +92,7 @@ grn_id grn_table_get_v(grn_ctx *ctx, grn_obj *table, const void *key, int key_si
 grn_id grn_table_add_v(grn_ctx *ctx, grn_obj *table, const void *key, int key_size,
                        void **value, int *added);
 GRN_API grn_rc grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags,
-                                  grn_encoding *encoding, grn_obj **tokenizer);
+                          grn_encoding *encoding, grn_obj **tokenizer);
 const char *_grn_table_key(grn_ctx *ctx, grn_obj *table, grn_id id, uint32_t *key_size);
 
 grn_rc grn_table_search(grn_ctx *ctx, grn_obj *table,

  Modified: lib/expr.c (+7 -24)
===================================================================
--- lib/expr.c    2012-02-14 14:00:31 +0900 (795192d)
+++ lib/expr.c    2012-02-14 14:00:52 +0900 (3c79202)
@@ -22,7 +22,6 @@
 #include <float.h>
 #include "ii.h"
 #include "geo.h"
-#include "normalizer.h"
 #include "util.h"
 
 static inline int
@@ -2242,19 +2241,13 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
 void
 pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y, grn_obj *res)
 {
-  grn_id normalizer_id;
-  grn_obj *normalizer;
-  grn_obj *a = NULL, *b = NULL;
+  grn_str *a = NULL, *b = NULL;
 
-  normalizer_id = grn_normalizer_find(ctx, ctx->encoding);
-  normalizer = grn_ctx_at(ctx, normalizer_id);
   switch (x->header.domain) {
   case GRN_DB_SHORT_TEXT:
   case GRN_DB_TEXT:
   case GRN_DB_LONG_TEXT:
-    a = grn_normalized_text_open(ctx, normalizer,
-                                 GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x),
-                                 ctx->encoding, 0);
+    a = grn_str_open(ctx, GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x), GRN_STR_NORMALIZE);
     break;
   default:
     break;
@@ -2264,33 +2257,23 @@ pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y, grn_obj *res)
   case GRN_DB_SHORT_TEXT:
   case GRN_DB_TEXT:
   case GRN_DB_LONG_TEXT:
-    b = grn_normalized_text_open(ctx, normalizer,
-                                 GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
-                                 ctx->encoding, 0);
+    b = grn_str_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y), GRN_STR_NORMALIZE);
     break;
   default:
     break;
   }
 
   /* normalized str doesn't contain '\0'. */
-  if (a && b) {
-    const char *normalized_a, *normalized_b;
-    grn_normalized_text_get_value(ctx, a, &normalized_a, NULL, NULL);
-    grn_normalized_text_get_value(ctx, b, &normalized_b, NULL, NULL);
-    if (strstr(normalized_a, normalized_b)) {
-      GRN_INT32_SET(ctx, res, 1);
-    } else {
-      GRN_INT32_SET(ctx, res, 0);
-    }
+  if (a && b && strstr(a->norm, b->norm)) {
+    GRN_INT32_SET(ctx, res, 1);
   } else {
     GRN_INT32_SET(ctx, res, 0);
   }
   res->header.type = GRN_BULK;
   res->header.domain = GRN_DB_INT32;
 
-  if (a) { grn_obj_close(ctx, a); }
-  if (b) { grn_obj_close(ctx, b); }
-  if (normalizer) { grn_obj_unlink(ctx, normalizer); }
+  if (a) { grn_str_close(ctx, a); }
+  if (b) { grn_str_close(ctx, b); }
 }
 
 grn_obj *

  Modified: lib/hash.c (+1 -18)
===================================================================
--- lib/hash.c    2012-02-14 14:00:31 +0900 (9a5455a)
+++ lib/hash.c    2012-02-14 14:00:52 +0900 (e30f1f0)
@@ -18,7 +18,6 @@
 #include "hash.h"
 #include "pat.h"
 #include "output.h"
-#include "normalizer.h"
 #include <string.h>
 #include <limits.h>
 
@@ -869,14 +868,6 @@ io_hash_init(grn_hash *ih, grn_ctx *ctx, const char *path, uint32_t key_size,
   header->n_entries = 0;
   header->n_garbages = 0;
   header->tokenizer = GRN_ID_NIL;
-  if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
-    header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-    header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-    ih->normalizer = grn_ctx_at(ctx, header->normalizer);
-  } else {
-    header->normalizer = GRN_ID_NIL;
-    ih->normalizer = NULL;
-  }
   ih->obj.header.flags = flags;
   ih->ctx = ctx;
   ih->key_size = key_size;
@@ -931,7 +922,6 @@ tiny_hash_init(grn_hash *ah, grn_ctx *ctx, const char *path, uint32_t key_size,
   ah->n_entries_ = 0;
   ah->garbages = GRN_ID_NIL;
   ah->tokenizer = NULL;
-  ah->normalizer = NULL;
   grn_tiny_array_init(ctx, &ah->a, entry_size, GRN_TINY_ARRAY_CLEAR);
   grn_tiny_array_init(ctx, &ah->bitmap, 1, GRN_TINY_ARRAY_CLEAR);
   return GRN_SUCCESS;
@@ -991,11 +981,6 @@ grn_hash_open(grn_ctx *ctx, const char *path)
           hash->header = header;
           hash->lock = &header->lock;
           hash->tokenizer = grn_ctx_at(ctx, header->tokenizer);
-          if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
-            header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-            header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-          }
-          hash->normalizer = grn_ctx_at(ctx, header->normalizer);
           return (grn_hash *)hash;
         } else {
           GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid hash flag. (%x)", header->flags);
@@ -2159,7 +2144,7 @@ grn_hash_check(grn_ctx *ctx, grn_hash *hash)
   char buf[8];
   struct grn_hash_header *h = hash->header;
   GRN_OUTPUT_ARRAY_OPEN("RESULT", 1);
-  GRN_OUTPUT_MAP_OPEN("SUMMARY", 25);
+  GRN_OUTPUT_MAP_OPEN("SUMMARY", 24);
   GRN_OUTPUT_CSTR("flags");
   grn_itoh(h->flags, buf, 8);
   GRN_OUTPUT_STR(buf, 8);
@@ -2169,8 +2154,6 @@ grn_hash_check(grn_ctx *ctx, grn_hash *hash)
   GRN_OUTPUT_INT64(hash->value_size);
   GRN_OUTPUT_CSTR("tokenizer");
   GRN_OUTPUT_INT64(h->tokenizer);
-  GRN_OUTPUT_CSTR("normalizer");
-  GRN_OUTPUT_INT64(h->normalizer);
   GRN_OUTPUT_CSTR("curr_rec");
   GRN_OUTPUT_INT64(h->curr_rec);
   GRN_OUTPUT_CSTR("curr_key");

  Modified: lib/hash.h (+1 -3)
===================================================================
--- lib/hash.h    2012-02-14 14:00:31 +0900 (541835e)
+++ lib/hash.h    2012-02-14 14:00:52 +0900 (efe364f)
@@ -185,7 +185,6 @@ struct _grn_hash {
   uint32_t *n_entries;
   uint32_t *max_offset;
   grn_obj *tokenizer;
-  grn_obj *normalizer;
   /* portions for io_hash */
   grn_io *io;
   struct grn_hash_header *header;
@@ -226,8 +225,7 @@ struct grn_hash_header {
   uint32_t n_entries;
   uint32_t n_garbages;
   uint32_t lock;
-  grn_id normalizer;
-  uint32_t reserved[15];
+  uint32_t reserved[16];
   grn_id garbages[GRN_HASH_MAX_KEY_SIZE];
 };
 

  Deleted: lib/normalizer.c (+0 -1183) 100644
===================================================================
--- lib/normalizer.c    2012-02-14 14:00:31 +0900 (a21de48)
+++ /dev/null
@@ -1,1183 +0,0 @@
-/* -*- c-basic-offset: 2 -*- */
-/*
-  Copyright(C) 2012 Brazil
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License version 2.1 as published by the Free Software Foundation.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-*/
-
-#include "groonga_in.h"
-#include <string.h>
-#include "normalizer.h"
-#include "str.h"
-
-grn_id
-grn_normalizer_find(grn_ctx *ctx, grn_encoding encoding)
-{
-  grn_id normalizer_id = GRN_ID_NIL;
-
-  switch (encoding) {
-  case GRN_ENC_EUC_JP :
-    normalizer_id = GRN_DB_NORMALIZER_EUC_JP;
-    break;
-  case GRN_ENC_UTF8 :
-#ifdef NO_NFKC
-    normalizer_id = GRN_DB_NORMALIZER_ASCII;
-#else /* NO_NFKC */
-    normalizer_id = GRN_DB_NORMALIZER_UTF8_NFKC;
-#endif /* NO_NFKC */
-    break;
-  case GRN_ENC_SJIS :
-    normalizer_id = GRN_DB_NORMALIZER_SJIS;
-    break;
-  case GRN_ENC_LATIN1 :
-    normalizer_id = GRN_DB_NORMALIZER_LATIN1;
-    break;
-  case GRN_ENC_KOI8R :
-    normalizer_id = GRN_DB_NORMALIZER_KOI8R;
-    break;
-  default :
-    normalizer_id = GRN_DB_NORMALIZER_ASCII;
-    break;
-  }
-
-  return normalizer_id;
-}
-
-grn_rc
-grn_normalizer_init(void)
-{
-  return GRN_SUCCESS;
-}
-
-grn_rc
-grn_normalizer_fin(void)
-{
-  return GRN_SUCCESS;
-}
-
-grn_obj *
-grn_normalized_text_open(grn_ctx *ctx, grn_obj *normalizer,
-                         const char *str, unsigned int str_len,
-                         grn_encoding encoding, int flags)
-{
-  grn_normalized_text *normalized_text;
-  grn_obj *obj;
-
-  if (!normalizer) {
-    return NULL;
-  }
-
-  normalized_text = GRN_MALLOCN(grn_normalized_text, 1);
-  if (!normalized_text) {
-    return NULL;
-  }
-
-  GRN_API_ENTER;
-  obj = (grn_obj *)normalized_text;
-  GRN_OBJ_INIT(obj, GRN_NORMALIZED_TEXT, GRN_OBJ_ALLOCATED, GRN_ID_NIL);
-  normalized_text->orig = str;
-  normalized_text->orig_blen = str_len;
-  normalized_text->norm = NULL;
-  normalized_text->norm_blen = 0;
-  normalized_text->length = 0;
-  normalized_text->checks = NULL;
-  normalized_text->ctypes = NULL;
-  normalized_text->encoding = encoding;
-  normalized_text->flags = flags;
-
-  ((grn_proc *)normalizer)->funcs[PROC_NEXT](ctx, 1, &obj, NULL);
-
-  GRN_API_RETURN(obj);
-}
-
-grn_rc
-grn_normalized_text_get_value(grn_ctx *ctx, grn_obj *normalized_text,
-                              const char **value, unsigned int *length,
-                              unsigned int *binary_length)
-{
-  grn_rc rc;
-  grn_normalized_text *text = (grn_normalized_text *)normalized_text;
-  GRN_API_ENTER;
-  if (text) {
-    if (value) { *value = text->norm; }
-    if (length) { *length = text->length; }
-    if (binary_length) { *binary_length = text->norm_blen; }
-    rc = GRN_SUCCESS;
-  } else {
-    rc = GRN_INVALID_ARGUMENT;
-  }
-  GRN_API_RETURN(rc);
-}
-
-grn_rc
-grn_normalized_text_close(grn_ctx *ctx, grn_obj *normalized_text)
-{
-  grn_rc rc;
-  grn_normalized_text *text = (grn_normalized_text *)normalized_text;
-  if (text) {
-    if (text->norm) { GRN_FREE(text->norm); }
-    if (text->ctypes) { GRN_FREE(text->ctypes); }
-    if (text->checks) { GRN_FREE(text->checks); }
-    GRN_FREE(text);
-    rc = GRN_SUCCESS;
-  } else {
-    rc = GRN_INVALID_ARGUMENT;
-  }
-  return rc;
-}
-
-static unsigned char symbol[] = {
-  ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
-  '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-inline static grn_obj *
-eucjp_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
-                grn_user_data *user_data)
-{
-  static uint16_t hankana[] = {
-    0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
-    0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
-    0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
-    0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
-    0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
-    0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
-    0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
-    0xa1eb
-  };
-  static unsigned char dakuten[] = {
-    0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
-    0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
-    0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
-    0, 0xdc
-  };
-  static unsigned char handaku[] = {
-    0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
-  };
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_, *e;
-  unsigned char *d, *d0, *d_, b;
-  uint_least8_t *cp, *ctypes, ctype;
-  size_t size = nstr->orig_blen, length = 0;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][eucjp] failed to allocate normalized text space");
-    return NULL;
-  }
-  d0 = (unsigned char *) nstr->norm;
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][eucjp] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
-      GRN_FREE(nstr->checks);
-      GRN_FREE(nstr->norm);
-      nstr->checks = NULL;
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][eucjp] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = ctypes = nstr->ctypes;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
-    if ((*s & 0x80)) {
-      if (((s + 1) < e) && (*(s + 1) & 0x80)) {
-        unsigned char c1 = *s++, c2 = *s, c3 = 0;
-        switch (c1 >> 4) {
-        case 0x08 :
-          if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
-            uint16_t c = hankana[c2 - 0xa0];
-            switch (c) {
-            case 0xa1ab :
-              if (d > d0 + 1 && d[-2] == 0xa5
-                  && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
-                *(d - 1) = b;
-                if (ch) { ch[-1] += 2; s_ += 2; }
-                continue;
-              } else {
-                *d++ = c >> 8; *d = c & 0xff;
-              }
-              break;
-            case 0xa1eb :
-              if (d > d0 + 1 && d[-2] == 0xa5
-                  && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
-                *(d - 1) = b;
-                if (ch) { ch[-1] += 2; s_ += 2; }
-                continue;
-              } else {
-                *d++ = c >> 8; *d = c & 0xff;
-              }
-              break;
-            default :
-              *d++ = c >> 8; *d = c & 0xff;
-              break;
-            }
-            ctype = grn_str_katakana;
-          } else {
-            *d++ = c1; *d = c2;
-            ctype = grn_str_others;
-          }
-          break;
-        case 0x09 :
-          *d++ = c1; *d = c2;
-          ctype = grn_str_others;
-          break;
-        case 0x0a :
-          switch (c1 & 0x0f) {
-          case 1 :
-            switch (c2) {
-            case 0xbc :
-              *d++ = c1; *d = c2;
-              ctype = grn_str_katakana;
-              break;
-            case 0xb9 :
-              *d++ = c1; *d = c2;
-              ctype = grn_str_kanji;
-              break;
-            case 0xa1 :
-              if (removeblankp) {
-                if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-                continue;
-              } else {
-                *d = ' ';
-                ctype = GRN_STR_BLANK|grn_str_symbol;
-              }
-              break;
-            default :
-              if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
-                *d = c3;
-                ctype = grn_str_symbol;
-              } else {
-                *d++ = c1; *d = c2;
-                ctype = grn_str_others;
-              }
-              break;
-            }
-            break;
-          case 2 :
-            *d++ = c1; *d = c2;
-            ctype = grn_str_symbol;
-            break;
-          case 3 :
-            c3 = c2 - 0x80;
-            if ('a' <= c3 && c3 <= 'z') {
-              ctype = grn_str_alpha;
-              *d = c3;
-            } else if ('A' <= c3 && c3 <= 'Z') {
-              ctype = grn_str_alpha;
-              *d = c3 + 0x20;
-            } else if ('0' <= c3 && c3 <= '9') {
-              ctype = grn_str_digit;
-              *d = c3;
-            } else {
-              ctype = grn_str_others;
-              *d++ = c1; *d = c2;
-            }
-            break;
-          case 4 :
-            *d++ = c1; *d = c2;
-            ctype = grn_str_hiragana;
-            break;
-          case 5 :
-            *d++ = c1; *d = c2;
-            ctype = grn_str_katakana;
-            break;
-          case 6 :
-          case 7 :
-          case 8 :
-            *d++ = c1; *d = c2;
-            ctype = grn_str_symbol;
-            break;
-          default :
-            *d++ = c1; *d = c2;
-            ctype = grn_str_others;
-            break;
-          }
-          break;
-        default :
-          *d++ = c1; *d = c2;
-          ctype = grn_str_kanji;
-          break;
-        }
-      } else {
-        /* skip invalid character */
-        continue;
-      }
-    } else {
-      unsigned char c = *s;
-      switch (c >> 4) {
-      case 0 :
-      case 1 :
-        /* skip unprintable ascii */
-        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-        continue;
-      case 2 :
-        if (c == 0x20) {
-          if (removeblankp) {
-            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-            continue;
-          } else {
-            *d = ' ';
-            ctype = GRN_STR_BLANK|grn_str_symbol;
-          }
-        } else {
-          *d = c;
-          ctype = grn_str_symbol;
-        }
-        break;
-      case 3 :
-        *d = c;
-        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
-        break;
-      case 4 :
-        *d = ('A' <= c) ? c + 0x20 : c;
-        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
-        break;
-      case 5 :
-        *d = (c <= 'Z') ? c + 0x20 : c;
-        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
-        break;
-      case 6 :
-        *d = c;
-        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
-        break;
-      case 7 :
-        *d = c;
-        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
-        break;
-      default :
-        *d = c;
-        ctype = grn_str_others;
-        break;
-      }
-    }
-    d++;
-    length++;
-    if (cp) { *cp++ = ctype; }
-    if (ch) {
-      *ch++ = (int16_t)(s + 1 - s_);
-      s_ = s + 1;
-      while (++d_ < d) { *ch++ = 0; }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-
-#ifndef NO_NFKC
-uint_least8_t grn_nfkc_ctype(const unsigned char *str);
-const char *grn_nfkc_map1(const unsigned char *str);
-const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);
-
-inline static grn_obj *
-utf8_nfkc_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
-                    grn_user_data *user_data)
-{
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
-  unsigned char *d, *d_, *de;
-  uint_least8_t *cp;
-  size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][utf8][nfkc] failed to allocate normalized text space");
-    return NULL;
-  }
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][utf8][nfkc] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
-      if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][utf8][nfkc] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = nstr->ctypes;
-  d = (unsigned char *)nstr->norm;
-  de = d + ds;
-  d_ = NULL;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
-    if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
-      break;
-    }
-    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
-      pe = p + strlen((char *)p);
-    } else {
-      p = s;
-      pe = p + ls;
-    }
-    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
-      p = p2;
-      pe = p + strlen((char *)p);
-      if (cp) { cp--; }
-      if (ch) {
-        ch -= (d - d_);
-        s_ = s__;
-      }
-      d = d_;
-      length--;
-    }
-    for (; ; p += lp) {
-      if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
-        break;
-      }
-      if ((*p == ' ' && removeblankp) || *p < 0x20  /* skip unprintable ascii */ ) {
-        if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-      } else {
-        if (de <= d + lp) {
-          unsigned char *norm;
-          ds += (ds >> 1) + lp;
-          if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
-            if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
-            if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
-            GRN_FREE(nstr->norm); nstr->norm = NULL;
-            ERR(GRN_NO_MEMORY_AVAILABLE,
-                "[normalizer][utf8][nfkc] "
-                "failed to reallocate normalized text space");
-            return NULL;
-          }
-          de = norm + ds;
-          d = norm + (d - (unsigned char *)nstr->norm);
-          nstr->norm = norm;
-          if (ch) {
-            int16_t *checks;
-            if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
-              if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
-              GRN_FREE(nstr->checks); nstr->checks = NULL;
-              GRN_FREE(nstr->norm); nstr->norm = NULL;
-              ERR(GRN_NO_MEMORY_AVAILABLE,
-                  "[normalizer][utf8][nfkc] "
-                  "failed to reallocate checks space");
-              return NULL;
-            }
-            ch = checks + (ch - nstr->checks);
-            nstr->checks = checks;
-          }
-          if (cp) {
-            uint_least8_t *ctypes;
-            if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
-              GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
-              if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
-              GRN_FREE(nstr->norm); nstr->norm = NULL;
-              ERR(GRN_NO_MEMORY_AVAILABLE,
-                  "[normalizer][utf8][nfkc] "
-                  "failed to reallocate character types space");
-              return NULL;
-            }
-            cp = ctypes + (cp - nstr->ctypes);
-            nstr->ctypes = ctypes;
-          }
-        }
-        memcpy(d, p, lp);
-        d_ = d;
-        d += lp;
-        length++;
-        if (cp) { *cp++ = grn_nfkc_ctype(p); }
-        if (ch) {
-          size_t i;
-          if (s_ == s + ls) {
-            *ch++ = -1;
-          } else {
-            *ch++ = (int16_t)(s + ls - s_);
-            s__ = s_;
-            s_ = s + ls;
-          }
-          for (i = lp; i > 1; i--) { *ch++ = 0; }
-        }
-      }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-#endif /* NO_NFKC */
-
-inline static grn_obj *
-sjis_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
-               grn_user_data *user_data)
-{
-  static uint16_t hankana[] = {
-    0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
-    0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
-    0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
-    0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
-    0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
-    0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
-    0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
-    0x814b
-  };
-  static unsigned char dakuten[] = {
-    0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
-    0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
-    0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
-    0, 0x7b
-  };
-  static unsigned char handaku[] = {
-    0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
-  };
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_;
-  unsigned char *d, *d0, *d_, b, *e;
-  uint_least8_t *cp, *ctypes, ctype;
-  size_t size = nstr->orig_blen, length = 0;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][sjis] failed to allocate normalized text space");
-    return NULL;
-  }
-  d0 = (unsigned char *) nstr->norm;
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][sjis] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
-      GRN_FREE(nstr->checks);
-      GRN_FREE(nstr->norm);
-      nstr->checks = NULL;
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][sjis] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = ctypes = nstr->ctypes;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
-    if ((*s & 0x80)) {
-      if (0xa0 <= *s && *s <= 0xdf) {
-        uint16_t c = hankana[*s - 0xa0];
-        switch (c) {
-        case 0x814a :
-          if (d > d0 + 1 && d[-2] == 0x83
-              && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
-            *(d - 1) = b;
-            if (ch) { ch[-1]++; s_++; }
-            continue;
-          } else {
-            *d++ = c >> 8; *d = c & 0xff;
-          }
-          break;
-        case 0x814b :
-          if (d > d0 + 1 && d[-2] == 0x83
-              && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
-            *(d - 1) = b;
-            if (ch) { ch[-1]++; s_++; }
-            continue;
-          } else {
-            *d++ = c >> 8; *d = c & 0xff;
-          }
-          break;
-        default :
-          *d++ = c >> 8; *d = c & 0xff;
-          break;
-        }
-        ctype = grn_str_katakana;
-      } else {
-        if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
-          unsigned char c1 = *s++, c2 = *s, c3 = 0;
-          if (0x81 <= c1 && c1 <= 0x87) {
-            switch (c1 & 0x0f) {
-            case 1 :
-              switch (c2) {
-              case 0x5b :
-                *d++ = c1; *d = c2;
-                ctype = grn_str_katakana;
-                break;
-              case 0x58 :
-                *d++ = c1; *d = c2;
-                ctype = grn_str_kanji;
-                break;
-              case 0x40 :
-                if (removeblankp) {
-                  if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-                  continue;
-                } else {
-                  *d = ' ';
-                  ctype = GRN_STR_BLANK|grn_str_symbol;
-                }
-                break;
-              default :
-                if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
-                  *d = c3;
-                  ctype = grn_str_symbol;
-                } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
-                  *d = c3;
-                  ctype = grn_str_symbol;
-                } else {
-                  *d++ = c1; *d = c2;
-                  ctype = grn_str_others;
-                }
-                break;
-              }
-              break;
-            case 2 :
-              c3 = c2 - 0x1f;
-              if (0x4f <= c2 && c2 <= 0x58) {
-                ctype = grn_str_digit;
-                *d = c2 - 0x1f;
-              } else if (0x60 <= c2 && c2 <= 0x79) {
-                ctype = grn_str_alpha;
-                *d = c2 + 0x01;
-              } else if (0x81 <= c2 && c2 <= 0x9a) {
-                ctype = grn_str_alpha;
-                *d = c2 - 0x20;
-              } else if (0x9f <= c2 && c2 <= 0xf1) {
-                *d++ = c1; *d = c2;
-                ctype = grn_str_hiragana;
-              } else {
-                *d++ = c1; *d = c2;
-                ctype = grn_str_others;
-              }
-              break;
-            case 3 :
-              if (0x40 <= c2 && c2 <= 0x96) {
-                *d++ = c1; *d = c2;
-                ctype = grn_str_katakana;
-              } else {
-                *d++ = c1; *d = c2;
-                ctype = grn_str_symbol;
-              }
-              break;
-            case 4 :
-            case 7 :
-              *d++ = c1; *d = c2;
-              ctype = grn_str_symbol;
-              break;
-            default :
-              *d++ = c1; *d = c2;
-              ctype = grn_str_others;
-              break;
-            }
-          } else {
-            *d++ = c1; *d = c2;
-            ctype = grn_str_kanji;
-          }
-        } else {
-          /* skip invalid character */
-          continue;
-        }
-      }
-    } else {
-      unsigned char c = *s;
-      switch (c >> 4) {
-      case 0 :
-      case 1 :
-        /* skip unprintable ascii */
-        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-        continue;
-      case 2 :
-        if (c == 0x20) {
-          if (removeblankp) {
-            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-            continue;
-          } else {
-            *d = ' ';
-            ctype = GRN_STR_BLANK|grn_str_symbol;
-          }
-        } else {
-          *d = c;
-          ctype = grn_str_symbol;
-        }
-        break;
-      case 3 :
-        *d = c;
-        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
-        break;
-      case 4 :
-        *d = ('A' <= c) ? c + 0x20 : c;
-        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
-        break;
-      case 5 :
-        *d = (c <= 'Z') ? c + 0x20 : c;
-        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
-        break;
-      case 6 :
-        *d = c;
-        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
-        break;
-      case 7 :
-        *d = c;
-        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
-        break;
-      default :
-        *d = c;
-        ctype = grn_str_others;
-        break;
-      }
-    }
-    d++;
-    length++;
-    if (cp) { *cp++ = ctype; }
-    if (ch) {
-      *ch++ = (int16_t)(s + 1 - s_);
-      s_ = s + 1;
-      while (++d_ < d) { *ch++ = 0; }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-
-inline static grn_obj *
-ascii_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_, *e;
-  unsigned char *d, *d0, *d_;
-  uint_least8_t *cp, *ctypes, ctype;
-  size_t size = nstr->orig_blen, length = 0;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][ascii] failed to allocate normalized text space");
-    return NULL;
-  }
-  d0 = (unsigned char *) nstr->norm;
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][ascii] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
-      GRN_FREE(nstr->checks);
-      GRN_FREE(nstr->norm);
-      nstr->checks = NULL;
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][ascii] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = ctypes = nstr->ctypes;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
-    unsigned char c = *s;
-    switch (c >> 4) {
-    case 0 :
-    case 1 :
-      /* skip unprintable ascii */
-      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-      continue;
-    case 2 :
-      if (c == 0x20) {
-        if (removeblankp) {
-          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-          continue;
-        } else {
-          *d = ' ';
-          ctype = GRN_STR_BLANK|grn_str_symbol;
-        }
-      } else {
-        *d = c;
-        ctype = grn_str_symbol;
-      }
-      break;
-    case 3 :
-      *d = c;
-      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
-      break;
-    case 4 :
-      *d = ('A' <= c) ? c + 0x20 : c;
-      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 5 :
-      *d = (c <= 'Z') ? c + 0x20 : c;
-      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
-      break;
-    case 6 :
-      *d = c;
-      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 7 :
-      *d = c;
-      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
-      break;
-    default :
-      *d = c;
-      ctype = grn_str_others;
-      break;
-    }
-    d++;
-    length++;
-    if (cp) { *cp++ = ctype; }
-    if (ch) {
-      *ch++ = (int16_t)(s + 1 - s_);
-      s_ = s + 1;
-      while (++d_ < d) { *ch++ = 0; }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-
-/* use cp1252 as latin1 */
-inline static grn_obj *
-latin1_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
-                 grn_user_data *user_data)
-{
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_, *e;
-  unsigned char *d, *d0, *d_;
-  uint_least8_t *cp, *ctypes, ctype;
-  size_t size = strlen(nstr->orig), length = 0;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][latin1] failed to allocate normalized text space");
-    return NULL;
-  }
-  d0 = (unsigned char *) nstr->norm;
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][latin1] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
-      GRN_FREE(nstr->checks);
-      GRN_FREE(nstr->norm);
-      nstr->checks = NULL;
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][latin1] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = ctypes = nstr->ctypes;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
-    unsigned char c = *s;
-    switch (c >> 4) {
-    case 0 :
-    case 1 :
-      /* skip unprintable ascii */
-      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-      continue;
-    case 2 :
-      if (c == 0x20) {
-        if (removeblankp) {
-          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-          continue;
-        } else {
-          *d = ' ';
-          ctype = GRN_STR_BLANK|grn_str_symbol;
-        }
-      } else {
-        *d = c;
-        ctype = grn_str_symbol;
-      }
-      break;
-    case 3 :
-      *d = c;
-      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
-      break;
-    case 4 :
-      *d = ('A' <= c) ? c + 0x20 : c;
-      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 5 :
-      *d = (c <= 'Z') ? c + 0x20 : c;
-      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
-      break;
-    case 6 :
-      *d = c;
-      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 7 :
-      *d = c;
-      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
-      break;
-    case 8 :
-      if (c == 0x8a || c == 0x8c || c == 0x8e) {
-        *d = c + 0x10;
-        ctype = grn_str_alpha;
-      } else {
-        *d = c;
-        ctype = grn_str_symbol;
-      }
-      break;
-    case 9 :
-      if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
-        *d = (c == 0x9f) ? c + 0x60 : c;
-        ctype = grn_str_alpha;
-      } else {
-        *d = c;
-        ctype = grn_str_symbol;
-      }
-      break;
-    case 0x0c :
-      *d = c + 0x20;
-      ctype = grn_str_alpha;
-      break;
-    case 0x0d :
-      *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
-      ctype = (c == 0xd7) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 0x0e :
-      *d = c;
-      ctype = grn_str_alpha;
-      break;
-    case 0x0f :
-      *d = c;
-      ctype = (c == 0xf7) ? grn_str_symbol : grn_str_alpha;
-      break;
-    default :
-      *d = c;
-      ctype = grn_str_others;
-      break;
-    }
-    d++;
-    length++;
-    if (cp) { *cp++ = ctype; }
-    if (ch) {
-      *ch++ = (int16_t)(s + 1 - s_);
-      s_ = s + 1;
-      while (++d_ < d) { *ch++ = 0; }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-
-inline static grn_obj *
-koi8r_normalize(grn_ctx *ctx, int nargs, grn_obj **args,
-                grn_user_data *user_data)
-{
-  grn_normalized_text *nstr = (grn_normalized_text *)args[0];
-  int16_t *ch;
-  const unsigned char *s, *s_, *e;
-  unsigned char *d, *d0, *d_;
-  uint_least8_t *cp, *ctypes, ctype;
-  size_t size = strlen(nstr->orig), length = 0;
-  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
-  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalizer][koi8r] failed to allocate normalized text space");
-    return NULL;
-  }
-  d0 = (unsigned char *) nstr->norm;
-  if (nstr->flags & GRN_STR_WITH_CHECKS) {
-    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
-      GRN_FREE(nstr->norm);
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][koi8r] failed to allocate checks space");
-      return NULL;
-    }
-  }
-  ch = nstr->checks;
-  if (nstr->flags & GRN_STR_WITH_CTYPES) {
-    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
-      GRN_FREE(nstr->checks);
-      GRN_FREE(nstr->norm);
-      nstr->checks = NULL;
-      nstr->norm = NULL;
-      ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalizer][koi8r] failed to allocate character types space");
-      return NULL;
-    }
-  }
-  cp = ctypes = nstr->ctypes;
-  e = (unsigned char *)nstr->orig + size;
-  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
-    unsigned char c = *s;
-    switch (c >> 4) {
-    case 0 :
-    case 1 :
-      /* skip unprintable ascii */
-      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-      continue;
-    case 2 :
-      if (c == 0x20) {
-        if (removeblankp) {
-          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
-          continue;
-        } else {
-          *d = ' ';
-          ctype = GRN_STR_BLANK|grn_str_symbol;
-        }
-      } else {
-        *d = c;
-        ctype = grn_str_symbol;
-      }
-      break;
-    case 3 :
-      *d = c;
-      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
-      break;
-    case 4 :
-      *d = ('A' <= c) ? c + 0x20 : c;
-      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 5 :
-      *d = (c <= 'Z') ? c + 0x20 : c;
-      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
-      break;
-    case 6 :
-      *d = c;
-      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
-      break;
-    case 7 :
-      *d = c;
-      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
-      break;
-    case 0x0a :
-      *d = c;
-      ctype = (c == 0xa3) ? grn_str_alpha : grn_str_others;
-      break;
-    case 0x0b :
-      if (c == 0xb3) {
-        *d = c - 0x10;
-        ctype = grn_str_alpha;
-      } else {
-        *d = c;
-        ctype = grn_str_others;
-      }
-      break;
-    case 0x0c :
-    case 0x0d :
-      *d = c;
-      ctype = grn_str_alpha;
-      break;
-    case 0x0e :
-    case 0x0f :
-      *d = c - 0x20;
-      ctype = grn_str_alpha;
-      break;
-    default :
-      *d = c;
-      ctype = grn_str_others;
-      break;
-    }
-    d++;
-    length++;
-    if (cp) { *cp++ = ctype; }
-    if (ch) {
-      *ch++ = (int16_t)(s + 1 - s_);
-      s_ = s + 1;
-      while (++d_ < d) { *ch++ = 0; }
-    }
-  }
-  if (cp) { *cp = grn_str_null; }
-  *d = '\0';
-  nstr->length = length;
-  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
-  return NULL;
-}
-
-#define DEF_NORMALIZERIZER(name, normalize)\
-  (grn_proc_create(ctx, (name), (sizeof(name) - 1),\
-                   GRN_PROC_NORMALIZER, NULL, (normalize), NULL, 0, NULL))
-
-grn_rc
-grn_db_init_builtin_normalizers(grn_ctx *ctx)
-{
-  grn_obj *obj;
-
-  obj = DEF_NORMALIZERIZER("NormalizerASCII", ascii_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_ASCII) {
-    return GRN_FILE_CORRUPT;
-  }
-  obj = DEF_NORMALIZERIZER("NormalizerUTF8NFKC", utf8_nfkc_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_UTF8_NFKC) {
-    return GRN_FILE_CORRUPT;
-  }
-  obj = DEF_NORMALIZERIZER("NormalizerEUCJP", eucjp_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_EUC_JP) {
-    return GRN_FILE_CORRUPT;
-  }
-  obj = DEF_NORMALIZERIZER("NormalizerSJIS", sjis_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_SJIS) {
-    return GRN_FILE_CORRUPT;
-  }
-  obj = DEF_NORMALIZERIZER("NormalizerLATIN1", latin1_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_LATIN1) {
-    return GRN_FILE_CORRUPT;
-  }
-  obj = DEF_NORMALIZERIZER("NormalizerKOI8R", koi8r_normalize);
-  if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_KOI8R) {
-    return GRN_FILE_CORRUPT;
-  }
-  /* obj = DEF_NORMALIZERIZER("NormalizerUTF8UCA", utf8_uca_normalize); */
-  /* if (!obj || ((grn_db_obj *)obj)->id != GRN_DB_NORMALIZER_UTF8_UCA) { */
-  /*   return GRN_FILE_CORRUPT; */
-  /* } */
-
-  return GRN_SUCCESS;
-}

  Deleted: lib/normalizer.h (+0 -67) 100644
===================================================================
--- lib/normalizer.h    2012-02-14 14:00:31 +0900 (39c1e36)
+++ /dev/null
@@ -1,67 +0,0 @@
-/* -*- c-basic-offset: 2 -*- */
-/*
-  Copyright(C) 2012 Brazil
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License version 2.1 as published by the Free Software Foundation.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-*/
-#ifndef GRN_NORMALIZER_H
-#define GRN_NORMALIZER_H
-
-#ifndef GROONGA_IN_H
-#include "groonga_in.h"
-#endif /* GROONGA_IN_H */
-
-#ifndef GRN_CTX_H
-#include "ctx.h"
-#endif /* GRN_CTX_H */
-
-#ifndef GRN_DB_H
-#include "db.h"
-#endif /* GRN_DB_H */
-
-#ifndef GRN_STR_H
-#include "str.h"
-#endif /* GRN_STR_H */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  grn_obj_header header;
-  const char *orig;
-  unsigned int orig_blen;
-  char *norm;
-  unsigned int norm_blen;
-  unsigned int length;
-  short *checks;
-  unsigned char *ctypes;
-  grn_encoding encoding;
-  int flags;
-} grn_normalized_text;
-
-grn_rc grn_normalizer_init(void);
-grn_rc grn_normalizer_fin(void);
-
-grn_rc grn_normalized_text_close(grn_ctx *ctx, grn_obj *normalized_text);
-
-grn_id grn_normalizer_find(grn_ctx *ctx, grn_encoding encoding);
-
-grn_rc grn_db_init_builtin_normalizers(grn_ctx *ctx);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* GRN_NORMALIZER_H */

  Modified: lib/pat.c (+1 -17)
===================================================================
--- lib/pat.c    2012-02-14 14:00:31 +0900 (e575a3e)
+++ lib/pat.c    2012-02-14 14:00:52 +0900 (525eba0)
@@ -19,7 +19,6 @@
 #include <limits.h>
 #include "pat.h"
 #include "output.h"
-#include "normalizer.h"
 #include "util.h"
 
 #define GRN_PAT_DELETED (GRN_ID_MAX + 1)
@@ -423,14 +422,6 @@ _grn_pat_create(grn_ctx *ctx, grn_pat *pat,
   header->curr_del3 = 0;
   header->n_garbages = 0;
   header->tokenizer = GRN_ID_NIL;
-  if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
-    header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-    header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-    pat->normalizer = grn_ctx_at(ctx, header->normalizer);
-  } else {
-    header->normalizer = GRN_ID_NIL;
-    pat->normalizer = NULL;
-  }
   pat->io = io;
   pat->header = header;
   pat->key_size = key_size;
@@ -527,11 +518,6 @@ grn_pat_open(grn_ctx *ctx, const char *path)
   pat->encoding = header->encoding;
   pat->obj.header.flags = header->flags;
   pat->tokenizer = grn_ctx_at(ctx, header->tokenizer);
-  if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
-    header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
-    header->normalizer = grn_normalizer_find(ctx, ctx->encoding);
-  }
-  pat->normalizer = grn_ctx_at(ctx, header->normalizer);
   PAT_AT(pat, 0, node0);
   if (!node0) {
     grn_io_close(ctx, io);
@@ -2285,7 +2271,7 @@ grn_pat_check(grn_ctx *ctx, grn_pat *pat)
   char buf[8];
   struct grn_pat_header *h = pat->header;
   GRN_OUTPUT_ARRAY_OPEN("RESULT", 1);
-  GRN_OUTPUT_MAP_OPEN("SUMMARY", 23);
+  GRN_OUTPUT_MAP_OPEN("SUMMARY", 22);
   GRN_OUTPUT_CSTR("flags");
   grn_itoh(h->flags, buf, 8);
   GRN_OUTPUT_STR(buf, 8);
@@ -2295,8 +2281,6 @@ grn_pat_check(grn_ctx *ctx, grn_pat *pat)
   GRN_OUTPUT_INT64(h->value_size);
   GRN_OUTPUT_CSTR("tokenizer");
   GRN_OUTPUT_INT64(h->tokenizer);
-  GRN_OUTPUT_CSTR("normalizer");
-  GRN_OUTPUT_INT64(h->normalizer);
   GRN_OUTPUT_CSTR("n_entries");
   GRN_OUTPUT_INT64(h->n_entries);
   GRN_OUTPUT_CSTR("curr_rec");

  Modified: lib/pat.h (+1 -3)
===================================================================
--- lib/pat.h    2012-02-14 14:00:31 +0900 (32acdea)
+++ lib/pat.h    2012-02-14 14:00:52 +0900 (30e484a)
@@ -38,7 +38,6 @@ struct _grn_pat {
   uint32_t key_size;
   uint32_t value_size;
   grn_obj *tokenizer;
-  grn_obj *normalizer;
   grn_id *cache;
   uint32_t cache_size;
 };
@@ -65,8 +64,7 @@ struct grn_pat_header {
   int32_t curr_del2;
   int32_t curr_del3;
   uint32_t n_garbages;
-  grn_id normalizer;
-  uint32_t reserved[1004];
+  uint32_t reserved[1005];
   grn_pat_delinfo delinfos[GRN_PAT_NDELINFOS];
   grn_id garbages[GRN_PAT_MAX_KEY_SIZE + 1];
 };

  Modified: lib/snip.c (+1 -1)
===================================================================
--- lib/snip.c    2012-02-14 14:00:31 +0900 (0f0f58d)
+++ lib/snip.c    2012-02-14 14:00:52 +0900 (cfe958a)
@@ -247,7 +247,7 @@ grn_snip_cond_close(grn_ctx *ctx, snip_cond *cond)
 
 grn_rc
 grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len,
-                   grn_encoding enc, int flags)
+                grn_encoding enc, int flags)
 {
   size_t norm_blen;
   int f = GRN_STR_REMOVEBLANK;

  Modified: lib/str.c (+988 -32)
===================================================================
--- lib/str.c    2012-02-14 14:00:31 +0900 (2865a7b)
+++ lib/str.c    2012-02-14 14:00:52 +0900 (f6f518c)
@@ -20,14 +20,13 @@
 #include <string.h>
 #include "db.h"
 #include "str.h"
-#include "normalizer.h"
 
 #ifndef _ISOC99_SOURCE
 #define _ISOC99_SOURCE
 #endif /* _ISOC99_SOURCE */
 #include <math.h>
 
-int
+inline static int
 grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
 {
   /* MEMO: This function allows non-null-terminated string as str. */
@@ -171,6 +170,952 @@ grn_charlen(grn_ctx *ctx, const char *str, const char *end)
   return grn_charlen_(ctx, str, end, ctx->encoding);
 }
 
+static unsigned char symbol[] = {
+  ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
+  '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+inline static grn_rc
+normalize_euc(grn_ctx *ctx, grn_str *nstr)
+{
+  static uint16_t hankana[] = {
+    0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
+    0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
+    0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
+    0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
+    0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
+    0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
+    0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
+    0xa1eb
+  };
+  static unsigned char dakuten[] = {
+    0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
+    0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
+    0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
+    0, 0xdc
+  };
+  static unsigned char handaku[] = {
+    0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
+  };
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_, b;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    if ((*s & 0x80)) {
+      if (((s + 1) < e) && (*(s + 1) & 0x80)) {
+        unsigned char c1 = *s++, c2 = *s, c3 = 0;
+        switch (c1 >> 4) {
+        case 0x08 :
+          if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
+            uint16_t c = hankana[c2 - 0xa0];
+            switch (c) {
+            case 0xa1ab :
+              if (d > d0 + 1 && d[-2] == 0xa5
+                  && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
+                *(d - 1) = b;
+                if (ch) { ch[-1] += 2; s_ += 2; }
+                continue;
+              } else {
+                *d++ = c >> 8; *d = c & 0xff;
+              }
+              break;
+            case 0xa1eb :
+              if (d > d0 + 1 && d[-2] == 0xa5
+                  && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
+                *(d - 1) = b;
+                if (ch) { ch[-1] += 2; s_ += 2; }
+                continue;
+              } else {
+                *d++ = c >> 8; *d = c & 0xff;
+              }
+              break;
+            default :
+              *d++ = c >> 8; *d = c & 0xff;
+              break;
+            }
+            ctype = grn_str_katakana;
+          } else {
+            *d++ = c1; *d = c2;
+            ctype = grn_str_others;
+          }
+          break;
+        case 0x09 :
+          *d++ = c1; *d = c2;
+          ctype = grn_str_others;
+          break;
+        case 0x0a :
+          switch (c1 & 0x0f) {
+          case 1 :
+            switch (c2) {
+            case 0xbc :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_katakana;
+              break;
+            case 0xb9 :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_kanji;
+              break;
+            case 0xa1 :
+              if (removeblankp) {
+                if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+                continue;
+              } else {
+                *d = ' ';
+                ctype = GRN_STR_BLANK|grn_str_symbol;
+              }
+              break;
+            default :
+              if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
+                *d = c3;
+                ctype = grn_str_symbol;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_others;
+              }
+              break;
+            }
+            break;
+          case 2 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_symbol;
+            break;
+          case 3 :
+            c3 = c2 - 0x80;
+            if ('a' <= c3 && c3 <= 'z') {
+              ctype = grn_str_alpha;
+              *d = c3;
+            } else if ('A' <= c3 && c3 <= 'Z') {
+              ctype = grn_str_alpha;
+              *d = c3 + 0x20;
+            } else if ('0' <= c3 && c3 <= '9') {
+              ctype = grn_str_digit;
+              *d = c3;
+            } else {
+              ctype = grn_str_others;
+              *d++ = c1; *d = c2;
+            }
+            break;
+          case 4 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_hiragana;
+            break;
+          case 5 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_katakana;
+            break;
+          case 6 :
+          case 7 :
+          case 8 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_symbol;
+            break;
+          default :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_others;
+            break;
+          }
+          break;
+        default :
+          *d++ = c1; *d = c2;
+          ctype = grn_str_kanji;
+          break;
+        }
+      } else {
+        /* skip invalid character */
+        continue;
+      }
+    } else {
+      unsigned char c = *s;
+      switch (c >> 4) {
+      case 0 :
+      case 1 :
+        /* skip unprintable ascii */
+        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+        continue;
+      case 2 :
+        if (c == 0x20) {
+          if (removeblankp) {
+            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+            continue;
+          } else {
+            *d = ' ';
+            ctype = GRN_STR_BLANK|grn_str_symbol;
+          }
+        } else {
+          *d = c;
+          ctype = grn_str_symbol;
+        }
+        break;
+      case 3 :
+        *d = c;
+        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+        break;
+      case 4 :
+        *d = ('A' <= c) ? c + 0x20 : c;
+        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 5 :
+        *d = (c <= 'Z') ? c + 0x20 : c;
+        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+        break;
+      case 6 :
+        *d = c;
+        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 7 :
+        *d = c;
+        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+        break;
+      default :
+        *d = c;
+        ctype = grn_str_others;
+        break;
+      }
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+#ifndef NO_NFKC
+uint_least8_t grn_nfkc_ctype(const unsigned char *str);
+const char *grn_nfkc_map1(const unsigned char *str);
+const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);
+
+inline static grn_rc
+normalize_utf8(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
+  unsigned char *d, *d_, *de;
+  uint_least8_t *cp;
+  size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm); nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
+      if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+      GRN_FREE(nstr->norm); nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = nstr->ctypes;
+  d = (unsigned char *)nstr->norm;
+  de = d + ds;
+  d_ = NULL;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
+    if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
+      break;
+    }
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
+      pe = p + strlen((char *)p);
+    } else {
+      p = s;
+      pe = p + ls;
+    }
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
+      p = p2;
+      pe = p + strlen((char *)p);
+      if (cp) { cp--; }
+      if (ch) {
+        ch -= (d - d_);
+        s_ = s__;
+      }
+      d = d_;
+      length--;
+    }
+    for (; ; p += lp) {
+      if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
+        break;
+      }
+      if ((*p == ' ' && removeblankp) || *p < 0x20  /* skip unprintable ascii */ ) {
+        if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      } else {
+        if (de <= d + lp) {
+          unsigned char *norm;
+          ds += (ds >> 1) + lp;
+          if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
+            if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+            if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+            GRN_FREE(nstr->norm); nstr->norm = NULL;
+            return GRN_NO_MEMORY_AVAILABLE;
+          }
+          de = norm + ds;
+          d = norm + (d - (unsigned char *)nstr->norm);
+          nstr->norm = norm;
+          if (ch) {
+            int16_t *checks;
+            if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
+              if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+              GRN_FREE(nstr->checks); nstr->checks = NULL;
+              GRN_FREE(nstr->norm); nstr->norm = NULL;
+              return GRN_NO_MEMORY_AVAILABLE;
+            }
+            ch = checks + (ch - nstr->checks);
+            nstr->checks = checks;
+          }
+          if (cp) {
+            uint_least8_t *ctypes;
+            if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
+              GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
+              if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+              GRN_FREE(nstr->norm); nstr->norm = NULL;
+              return GRN_NO_MEMORY_AVAILABLE;
+            }
+            cp = ctypes + (cp - nstr->ctypes);
+            nstr->ctypes = ctypes;
+          }
+        }
+        memcpy(d, p, lp);
+        d_ = d;
+        d += lp;
+        length++;
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
+        if (ch) {
+          size_t i;
+          if (s_ == s + ls) {
+            *ch++ = -1;
+          } else {
+            *ch++ = (int16_t)(s + ls - s_);
+            s__ = s_;
+            s_ = s + ls;
+          }
+          for (i = lp; i > 1; i--) { *ch++ = 0; }
+        }
+      }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+#endif /* NO_NFKC */
+
+inline static grn_rc
+normalize_sjis(grn_ctx *ctx, grn_str *nstr)
+{
+  static uint16_t hankana[] = {
+    0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
+    0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
+    0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
+    0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
+    0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
+    0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
+    0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
+    0x814b
+  };
+  static unsigned char dakuten[] = {
+    0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
+    0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
+    0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
+    0, 0x7b
+  };
+  static unsigned char handaku[] = {
+    0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
+  };
+  int16_t *ch;
+  const unsigned char *s, *s_;
+  unsigned char *d, *d0, *d_, b, *e;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    if ((*s & 0x80)) {
+      if (0xa0 <= *s && *s <= 0xdf) {
+        uint16_t c = hankana[*s - 0xa0];
+        switch (c) {
+        case 0x814a :
+          if (d > d0 + 1 && d[-2] == 0x83
+              && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
+            *(d - 1) = b;
+            if (ch) { ch[-1]++; s_++; }
+            continue;
+          } else {
+            *d++ = c >> 8; *d = c & 0xff;
+          }
+          break;
+        case 0x814b :
+          if (d > d0 + 1 && d[-2] == 0x83
+              && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
+            *(d - 1) = b;
+            if (ch) { ch[-1]++; s_++; }
+            continue;
+          } else {
+            *d++ = c >> 8; *d = c & 0xff;
+          }
+          break;
+        default :
+          *d++ = c >> 8; *d = c & 0xff;
+          break;
+        }
+        ctype = grn_str_katakana;
+      } else {
+        if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
+          unsigned char c1 = *s++, c2 = *s, c3 = 0;
+          if (0x81 <= c1 && c1 <= 0x87) {
+            switch (c1 & 0x0f) {
+            case 1 :
+              switch (c2) {
+              case 0x5b :
+                *d++ = c1; *d = c2;
+                ctype = grn_str_katakana;
+                break;
+              case 0x58 :
+                *d++ = c1; *d = c2;
+                ctype = grn_str_kanji;
+                break;
+              case 0x40 :
+                if (removeblankp) {
+                  if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+                  continue;
+                } else {
+                  *d = ' ';
+                  ctype = GRN_STR_BLANK|grn_str_symbol;
+                }
+                break;
+              default :
+                if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
+                  *d = c3;
+                  ctype = grn_str_symbol;
+                } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
+                  *d = c3;
+                  ctype = grn_str_symbol;
+                } else {
+                  *d++ = c1; *d = c2;
+                  ctype = grn_str_others;
+                }
+                break;
+              }
+              break;
+            case 2 :
+              c3 = c2 - 0x1f;
+              if (0x4f <= c2 && c2 <= 0x58) {
+                ctype = grn_str_digit;
+                *d = c2 - 0x1f;
+              } else if (0x60 <= c2 && c2 <= 0x79) {
+                ctype = grn_str_alpha;
+                *d = c2 + 0x01;
+              } else if (0x81 <= c2 && c2 <= 0x9a) {
+                ctype = grn_str_alpha;
+                *d = c2 - 0x20;
+              } else if (0x9f <= c2 && c2 <= 0xf1) {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_hiragana;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_others;
+              }
+              break;
+            case 3 :
+              if (0x40 <= c2 && c2 <= 0x96) {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_katakana;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_symbol;
+              }
+              break;
+            case 4 :
+            case 7 :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_symbol;
+              break;
+            default :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_others;
+              break;
+            }
+          } else {
+            *d++ = c1; *d = c2;
+            ctype = grn_str_kanji;
+          }
+        } else {
+          /* skip invalid character */
+          continue;
+        }
+      }
+    } else {
+      unsigned char c = *s;
+      switch (c >> 4) {
+      case 0 :
+      case 1 :
+        /* skip unprintable ascii */
+        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+        continue;
+      case 2 :
+        if (c == 0x20) {
+          if (removeblankp) {
+            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+            continue;
+          } else {
+            *d = ' ';
+            ctype = GRN_STR_BLANK|grn_str_symbol;
+          }
+        } else {
+          *d = c;
+          ctype = grn_str_symbol;
+        }
+        break;
+      case 3 :
+        *d = c;
+        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+        break;
+      case 4 :
+        *d = ('A' <= c) ? c + 0x20 : c;
+        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 5 :
+        *d = (c <= 'Z') ? c + 0x20 : c;
+        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+        break;
+      case 6 :
+        *d = c;
+        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 7 :
+        *d = c;
+        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+        break;
+      default :
+        *d = c;
+        ctype = grn_str_others;
+        break;
+      }
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_none(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+/* use cp1252 as latin1 */
+inline static grn_rc
+normalize_latin1(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = strlen(nstr->orig), length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    case 8 :
+      if (c == 0x8a || c == 0x8c || c == 0x8e) {
+        *d = c + 0x10;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 9 :
+      if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
+        *d = (c == 0x9f) ? c + 0x60 : c;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 0x0c :
+      *d = c + 0x20;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0d :
+      *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
+      ctype = (c == 0xd7) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 0x0e :
+      *d = c;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0f :
+      *d = c;
+      ctype = (c == 0xf7) ? grn_str_symbol : grn_str_alpha;
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = strlen(nstr->orig), length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    case 0x0a :
+      *d = c;
+      ctype = (c == 0xa3) ? grn_str_alpha : grn_str_others;
+      break;
+    case 0x0b :
+      if (c == 0xb3) {
+        *d = c - 0x10;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_others;
+      }
+      break;
+    case 0x0c :
+    case 0x0d :
+      *d = c;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0e :
+    case 0x0f :
+      *d = c - 0x20;
+      ctype = grn_str_alpha;
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
 static grn_str *
 grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
 {
@@ -257,42 +1202,53 @@ grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding en
 grn_str *
 grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
 {
-  grn_str *nstr = NULL;
-  grn_id normalizer_id;
-  grn_obj *normalizer;
-  grn_obj *normalized_text_obj;
+  grn_rc rc;
+  grn_str *nstr;
   if (!str || !str_len) { return NULL; }
 
   if (!(flags & GRN_STR_NORMALIZE)) {
     return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
   }
 
-  normalizer_id = grn_normalizer_find(ctx, encoding);
-  normalizer = grn_ctx_at(ctx, normalizer_id);
-  normalized_text_obj = grn_normalized_text_open(ctx, normalizer, str, str_len,
-                                                 encoding, flags);
-  if (normalized_text_obj) {
-    grn_normalized_text *normalized_text;
-    if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
-      GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
-      grn_obj_close(ctx, normalized_text_obj);
-      return NULL;
-    }
-    normalized_text = (grn_normalized_text *)normalized_text_obj;
-    nstr->orig = normalized_text->orig;
-    nstr->orig_blen = normalized_text->orig_blen;
-    nstr->norm = normalized_text->norm;
-    normalized_text->norm = NULL;
-    nstr->norm_blen = normalized_text->norm_blen;
-    normalized_text->norm_blen = 0;
-    nstr->length = normalized_text->length;
-    nstr->checks = normalized_text->checks;
-    normalized_text->checks = NULL;
-    nstr->ctypes = normalized_text->ctypes;
-    normalized_text->ctypes = NULL;
-    nstr->encoding = encoding;
-    nstr->flags = flags;
-    grn_obj_close(ctx, normalized_text_obj);
+  if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
+    GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
+    return NULL;
+  }
+  nstr->orig = str;
+  nstr->orig_blen = str_len;
+  nstr->norm = NULL;
+  nstr->norm_blen = 0;
+  nstr->checks = NULL;
+  nstr->ctypes = NULL;
+  nstr->encoding = encoding;
+  nstr->flags = flags;
+  switch (encoding) {
+  case GRN_ENC_EUC_JP :
+    rc = normalize_euc(ctx, nstr);
+    break;
+  case GRN_ENC_UTF8 :
+#ifdef NO_NFKC
+    rc = normalize_none(ctx, nstr);
+#else /* NO_NFKC */
+    rc = normalize_utf8(ctx, nstr);
+#endif /* NO_NFKC */
+    break;
+  case GRN_ENC_SJIS :
+    rc = normalize_sjis(ctx, nstr);
+    break;
+  case GRN_ENC_LATIN1 :
+    rc = normalize_latin1(ctx, nstr);
+    break;
+  case GRN_ENC_KOI8R :
+    rc = normalize_koi8r(ctx, nstr);
+    break;
+  default :
+    rc = normalize_none(ctx, nstr);
+    break;
+  }
+  if (rc) {
+    grn_str_close(ctx, nstr);
+    return NULL;
   }
   return nstr;
 }

  Modified: lib/str.h (+0 -1)
===================================================================
--- lib/str.h    2012-02-14 14:00:31 +0900 (bf98e59)
+++ lib/str.h    2012-02-14 14:00:52 +0900 (6bf0ce0)
@@ -80,7 +80,6 @@ grn_rc grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int en
 void grn_logger_fin(void);
 
 GRN_API int grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding);
-GRN_API int grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end);
 GRN_API grn_str *grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding);
 
 #define GRN_BULK_INCR_LEN(buf,len) {\

  Modified: lib/util.c (+0 -3)
===================================================================
--- lib/util.c    2012-02-14 14:00:31 +0900 (4913edd)
+++ lib/util.c    2012-02-14 14:00:52 +0900 (fad7124)
@@ -214,9 +214,6 @@ grn_proc_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *obj)
   case GRN_PROC_HOOK :
     GRN_TEXT_PUTS(ctx, buf, "hook");
     break;
-  case GRN_PROC_NORMALIZER :
-    GRN_TEXT_PUTS(ctx, buf, "normalizer");
-    break;
   }
   GRN_TEXT_PUTS(ctx, buf, " ");
 

  Modified: test/unit/core/dat/test-dat.cpp (+0 -5)
===================================================================
--- test/unit/core/dat/test-dat.cpp    2012-02-14 14:00:31 +0900 (9841566)
+++ test/unit/core/dat/test-dat.cpp    2012-02-14 14:00:52 +0900 (1dce81d)
@@ -72,7 +72,6 @@ namespace test_dat
 {
   const char *base_dir;
   grn_ctx ctx;
-  grn_obj *db;
 
   void cut_setup(void)
   {
@@ -83,16 +82,12 @@ namespace test_dat
     g_mkdir_with_parents(base_dir, 0755);
 
     grn_ctx_init(&ctx, 0);
-    db = grn_db_create(&ctx, NULL, NULL);
     enter_api(&ctx);
   }
 
   void cut_teardown(void)
   {
     leave_api(&ctx);
-    if (db) {
-      grn_obj_unlink(&ctx, db);
-    }
     grn_ctx_fin(&ctx);
 
     if (base_dir) {

  Modified: test/unit/util/test-snip.c (+1 -7)
===================================================================
--- test/unit/util/test-snip.c    2012-02-14 14:00:31 +0900 (8573e1f)
+++ test/unit/util/test-snip.c    2012-02-14 14:00:52 +0900 (925431e)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
 /*
-  Copyright (C) 2008-2012  Kouhei Sutou <kou****@clear*****>
+  Copyright (C) 2008-2009  Kouhei Sutou <kou****@cozmi*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -45,7 +45,6 @@ void test_add_cond_with_too_large_keyword(void);
 void test_add_cond_with_copy_tag_flag(void);
 
 static grn_ctx context;
-static grn_obj *db;
 static grn_snip *snip;
 static gchar *keyword;
 static gchar *result;
@@ -198,7 +197,6 @@ void
 cut_setup(void)
 {
   grn_ctx_init(&context, GRN_CTX_USE_QL);
-  db = grn_db_create(&context, NULL, NULL);
 
   snip = NULL;
   keyword = NULL;
@@ -235,10 +233,6 @@ cut_teardown(void)
     g_free(default_close_tag);
   }
 
-  if (db) {
-    grn_obj_close(&context, db);
-  }
-
   grn_ctx_fin(&context);
 }
 

  Modified: test/unit/util/test-string.c (+1 -4)
===================================================================
--- test/unit/util/test-string.c    2012-02-14 14:00:31 +0900 (3e97014)
+++ test/unit/util/test-string.c    2012-02-14 14:00:52 +0900 (2417060)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
 /*
-  Copyright (C) 2008-2012  Kouhei Sutou <kou****@clear*****>
+  Copyright (C) 2008-2011  Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -53,7 +53,6 @@ void data_itoh(void);
 void test_itoh(gconstpointer data);
 
 static grn_ctx context;
-static grn_obj *db;
 static grn_obj buffer;
 
 static const gchar text_ja_utf8[] =
@@ -76,7 +75,6 @@ void
 setup (void)
 {
   grn_ctx_init(&context, GRN_CTX_USE_QL);
-  db = grn_db_create(&context, NULL, NULL);
   GRN_VOID_INIT(&buffer);
 }
 
@@ -84,7 +82,6 @@ void
 teardown (void)
 {
   GRN_OBJ_FIN(&context, &buffer);
-  grn_obj_unlink(&context, db);
   grn_ctx_fin(&context);
 }
 




Groonga-commit メーリングリストの案内
Zurück zum Archiv-Index