[Groonga-commit] groonga/groonga at 3328100 [master] Add space to cache tokenizer options to tables

Zurück zum Archiv-Index

Kouhei Sutou null+****@clear*****
Thu Apr 5 15:27:05 JST 2018


Kouhei Sutou	2018-04-05 15:27:05 +0900 (Thu, 05 Apr 2018)

  New Revision: 332810087215ab3c696bcd4138a6d69d69f9bcfe
  https://github.com/groonga/groonga/commit/332810087215ab3c696bcd4138a6d69d69f9bcfe

  Message:
    Add space to cache tokenizer options to tables

  Modified files:
    include/groonga/groonga.h
    include/groonga/table.h
    lib/c_sources.am
    lib/dat.cpp
    lib/db.c
    lib/grn_dat.h
    lib/grn_hash.h
    lib/grn_pat.h
    lib/hash.c
    lib/pat.c
    lib/table.c

  Modified: include/groonga/groonga.h (+2 -0)
===================================================================
--- include/groonga/groonga.h    2018-04-05 13:11:28 +0900 (26b443088)
+++ include/groonga/groonga.h    2018-04-05 15:27:05 +0900 (ec6b09829)
@@ -204,6 +204,8 @@ typedef struct _grn_ctx grn_ctx;
 #define GRN_CTX_MSGSIZE                (0x80)
 #define GRN_CTX_FIN                    (0xff)
 
+typedef void (*grn_close_func)(grn_ctx *ctx, void *data);
+
 typedef union {
   int int_value;
   grn_id id;

  Modified: include/groonga/table.h (+12 -0)
===================================================================
--- include/groonga/table.h    2018-04-05 13:11:28 +0900 (f9de43f46)
+++ include/groonga/table.h    2018-04-05 15:27:05 +0900 (83d69d8a7)
@@ -241,6 +241,18 @@ GRN_API grn_rc grn_table_apply_expr(grn_ctx *ctx,
 
 GRN_API grn_id grn_table_find_reference_object(grn_ctx *ctx, grn_obj *table);
 
+typedef void *(*grn_tokenizer_open_options_func)(grn_ctx *ctx,
+                                                 grn_obj *tokenizer,
+                                                 grn_obj *values,
+                                                 void *user_data);
+
+GRN_API void *
+grn_table_get_tokenizer_options(grn_ctx *ctx,
+                                grn_obj *table,
+                                grn_tokenizer_open_options_func open_options_func,
+                                grn_close_func close_options_func,
+                                void *user_data);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/c_sources.am (+1 -0)
===================================================================
--- lib/c_sources.am    2018-04-05 13:11:28 +0900 (4b0c67447)
+++ lib/c_sources.am    2018-04-05 15:27:05 +0900 (bf92ef613)
@@ -95,6 +95,7 @@ libgroonga_c_sources =				\
 	string.c				\
 	grn_string.h				\
 	table.c					\
+	grn_table.h				\
 	thread.c				\
 	time.c					\
 	grn_time.h				\

  Modified: lib/dat.cpp (+4 -4)
===================================================================
--- lib/dat.cpp    2018-04-05 13:11:28 +0900 (51f625f0b)
+++ lib/dat.cpp    2018-04-05 15:27:05 +0900 (cce66a586)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2011-2017 Brazil
+  Copyright(C) 2011-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -127,7 +127,6 @@ grn_dat_init(grn_ctx *, grn_dat *dat)
   dat->encoding = GRN_ENC_DEFAULT;
   dat->trie = NULL;
   dat->old_trie = NULL;
-  dat->tokenizer = NULL;
   dat->normalizer = NULL;
   GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
   CRITICAL_SECTION_INIT(dat->lock);
@@ -150,6 +149,7 @@ grn_dat_fin(grn_ctx *ctx, grn_dat *dat)
     grn_io_close(ctx, dat->io);
     dat->io = NULL;
   }
+  grn_table_tokenizer_fin(ctx, &(dat->tokenizer));
   GRN_OBJ_FIN(ctx, &(dat->token_filters));
 }
 
@@ -355,7 +355,7 @@ grn_dat_create(grn_ctx *ctx, const char *path, uint32_t,
     dat->header->normalizer = GRN_ID_NIL;
   }
   dat->encoding = encoding;
-  dat->tokenizer = NULL;
+  grn_table_tokenizer_init(ctx, &(dat->tokenizer), GRN_ID_NIL);
   GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
 
   dat->obj.header.flags = dat->header->flags;
@@ -391,7 +391,7 @@ grn_dat_open(grn_ctx *ctx, const char *path)
   }
   dat->file_id = dat->header->file_id;
   dat->encoding = dat->header->encoding;
-  dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer);
+  grn_table_tokenizer_init(ctx, &(dat->tokenizer), dat->header->tokenizer);
   if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) {
     dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
     dat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);

  Modified: lib/db.c (+16 -10)
===================================================================
--- lib/db.c    2018-04-05 13:11:28 +0900 (c8c458cac)
+++ lib/db.c    2018-04-05 15:27:05 +0900 (818de600c)
@@ -2418,7 +2418,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
     case GRN_TABLE_PAT_KEY :
       if (flags) { *flags = ((grn_pat *)table)->header->flags; }
       if (encoding) { *encoding = ((grn_pat *)table)->encoding; }
-      if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer; }
+      if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer; }
       if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filters); }
       rc = GRN_SUCCESS;
@@ -2426,7 +2426,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
     case GRN_TABLE_DAT_KEY :
       if (flags) { *flags = ((grn_dat *)table)->header->flags; }
       if (encoding) { *encoding = ((grn_dat *)table)->encoding; }
-      if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer; }
+      if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_dat *)table)->normalizer; }
       if (token_filters) { *token_filters = &(((grn_dat *)table)->token_filters); }
       rc = GRN_SUCCESS;
@@ -2434,7 +2434,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
     case GRN_TABLE_HASH_KEY :
       if (flags) { *flags = ((grn_hash *)table)->header.common->flags; }
       if (encoding) { *encoding = ((grn_hash *)table)->encoding; }
-      if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer; }
+      if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer; }
       if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filters); }
       rc = GRN_SUCCESS;
@@ -8318,13 +8318,13 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb
     case GRN_INFO_DEFAULT_TOKENIZER :
       switch (DB_OBJ(obj)->header.type) {
       case GRN_TABLE_HASH_KEY :
-        valuebuf = ((grn_hash *)obj)->tokenizer;
+        valuebuf = ((grn_hash *)obj)->tokenizer.proc;
         break;
       case GRN_TABLE_PAT_KEY :
-        valuebuf = ((grn_pat *)obj)->tokenizer;
+        valuebuf = ((grn_pat *)obj)->tokenizer.proc;
         break;
       case GRN_TABLE_DAT_KEY :
-        valuebuf = ((grn_dat *)obj)->tokenizer;
+        valuebuf = ((grn_dat *)obj)->tokenizer.proc;
         break;
       }
       break;
@@ -9039,12 +9039,16 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
     if (!value || DB_OBJ(value)->header.type == GRN_PROC) {
       switch (DB_OBJ(obj)->header.type) {
       case GRN_TABLE_HASH_KEY :
-        ((grn_hash *)obj)->tokenizer = value;
+        grn_table_tokenizer_set_proc(ctx,
+                                     &(((grn_hash *)obj)->tokenizer),
+                                     value);
         ((grn_hash *)obj)->header.common->tokenizer = grn_obj_id(ctx, value);
         rc = GRN_SUCCESS;
         break;
       case GRN_TABLE_PAT_KEY :
-        ((grn_pat *)obj)->tokenizer = value;
+        grn_table_tokenizer_set_proc(ctx,
+                                     &(((grn_pat *)obj)->tokenizer),
+                                     value);
         ((grn_pat *)obj)->header->tokenizer = grn_obj_id(ctx, value);
         grn_pat_cache_enable(ctx,
                              ((grn_pat *)obj),
@@ -9052,7 +9056,9 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
         rc = GRN_SUCCESS;
         break;
       case GRN_TABLE_DAT_KEY :
-        ((grn_dat *)obj)->tokenizer = value;
+        grn_table_tokenizer_set_proc(ctx,
+                                     &(((grn_dat *)obj)->tokenizer),
+                                     value);
         ((grn_dat *)obj)->header->tokenizer = grn_obj_id(ctx, value);
         rc = GRN_SUCCESS;
         break;
@@ -10686,7 +10692,7 @@ grn_ctx_at(grn_ctx *ctx, grn_id id)
                   grn_token_filters_unpack(ctx,
                                            &(pat->token_filters),
                                            &decoded_spec);
-                  if (pat->tokenizer) {
+                  if (pat->tokenizer.proc) {
                     grn_pat_cache_enable(ctx,
                                          pat,
                                          GRN_TABLE_PAT_KEY_CACHE_SIZE);

  Modified: lib/grn_dat.h (+3 -2)
===================================================================
--- lib/grn_dat.h    2018-04-05 13:11:28 +0900 (774c02694)
+++ lib/grn_dat.h    2018-04-05 15:27:05 +0900 (925c35c30)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2011-2017 Brazil
+  Copyright(C) 2011-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -20,6 +20,7 @@
 
 #include "grn.h"
 #include "grn_db.h"
+#include "grn_table.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -33,7 +34,7 @@ struct _grn_dat {
   grn_encoding encoding;
   void *trie;
   void *old_trie;
-  grn_obj *tokenizer;
+  grn_table_tokenizer tokenizer;
   grn_obj *normalizer;
   grn_obj token_filters;
   grn_critical_section lock;

  Modified: lib/grn_hash.h (+3 -2)
===================================================================
--- lib/grn_hash.h    2018-04-05 13:11:28 +0900 (6547394b5)
+++ lib/grn_hash.h    2018-04-05 15:27:05 +0900 (7c90ae356)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2016 Brazil
+  Copyright(C) 2009-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -20,6 +20,7 @@
 
 #include "grn.h"
 #include "grn_ctx.h"
+#include "grn_table.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -192,7 +193,7 @@ struct _grn_hash {
   uint32_t *n_garbages;
   uint32_t *n_entries;
   uint32_t *max_offset;
-  grn_obj *tokenizer;
+  grn_table_tokenizer tokenizer;
   grn_obj *normalizer;
   grn_obj token_filters;
 

  Modified: lib/grn_pat.h (+2 -1)
===================================================================
--- lib/grn_pat.h    2018-04-05 13:11:28 +0900 (69a5d0c10)
+++ lib/grn_pat.h    2018-04-05 15:27:05 +0900 (e07644f4f)
@@ -21,6 +21,7 @@
 #include "grn.h"
 #include "grn_db.h"
 #include "grn_hash.h"
+#include "grn_table.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -36,7 +37,7 @@ struct _grn_pat {
   grn_encoding encoding;
   uint32_t key_size;
   uint32_t value_size;
-  grn_obj *tokenizer;
+  grn_table_tokenizer tokenizer;
   grn_obj *normalizer;
   grn_obj token_filters;
   grn_id *cache;

  Modified: lib/hash.c (+5 -3)
===================================================================
--- lib/hash.c    2018-04-05 13:11:28 +0900 (74fab5b42)
+++ lib/hash.c    2018-04-05 15:27:05 +0900 (05990fc31)
@@ -1723,7 +1723,7 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
   hash->io = io;
   hash->header.common = header;
   hash->lock = &header->lock;
-  hash->tokenizer = NULL;
+  grn_table_tokenizer_init(ctx, &(hash->tokenizer), GRN_ID_NIL);
   return GRN_SUCCESS;
 }
 
@@ -1782,7 +1782,7 @@ grn_tiny_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
   hash->n_garbages_ = 0;
   hash->n_entries_ = 0;
   hash->garbages = GRN_ID_NIL;
-  hash->tokenizer = NULL;
+  grn_table_tokenizer_init(ctx, &(hash->tokenizer), GRN_ID_NIL);
   hash->normalizer = NULL;
   GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
   grn_tiny_array_init(ctx, &hash->a, entry_size, GRN_TINY_ARRAY_CLEAR);
@@ -1850,7 +1850,7 @@ grn_hash_open(grn_ctx *ctx, const char *path)
             hash->io = io;
             hash->header.common = header;
             hash->lock = &header->lock;
-            hash->tokenizer = grn_ctx_at(ctx, header->tokenizer);
+            grn_table_tokenizer_init(ctx, &(hash->tokenizer), header->tokenizer);
             if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
               header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
               hash->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
@@ -1904,6 +1904,7 @@ grn_tiny_hash_fin(grn_ctx *ctx, grn_hash *hash)
     return GRN_INVALID_ARGUMENT;
   }
 
+  grn_table_tokenizer_fin(ctx, &(hash->tokenizer));
   GRN_OBJ_FIN(ctx, &(hash->token_filters));
 
   if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
@@ -1935,6 +1936,7 @@ grn_hash_close(grn_ctx *ctx, grn_hash *hash)
   if (!ctx || !hash) { return GRN_INVALID_ARGUMENT; }
   if (grn_hash_is_io_hash(hash)) {
     rc = grn_io_close(ctx, hash->io);
+    grn_table_tokenizer_fin(ctx, &(hash->tokenizer));
     GRN_OBJ_FIN(ctx, &(hash->token_filters));
   } else {
     GRN_ASSERT(ctx == hash->ctx);

  Modified: lib/pat.c (+9 -8)
===================================================================
--- lib/pat.c    2018-04-05 13:11:28 +0900 (ca955af61)
+++ lib/pat.c    2018-04-05 15:27:05 +0900 (42c1656de)
@@ -514,7 +514,7 @@ _grn_pat_create(grn_ctx *ctx, grn_pat *pat,
   pat->header = header;
   pat->key_size = key_size;
   pat->value_size = value_size;
-  pat->tokenizer = NULL;
+  grn_table_tokenizer_init(ctx, &(pat->tokenizer), GRN_ID_NIL);
   pat->encoding = encoding;
   pat->obj.header.flags = header->flags;
   if (!(node0 = pat_get(ctx, pat, 0))) {
@@ -616,7 +616,7 @@ grn_pat_open(grn_ctx *ctx, const char *path)
   pat->key_size = header->key_size;
   pat->value_size = header->value_size;
   pat->encoding = header->encoding;
-  pat->tokenizer = grn_ctx_at(ctx, header->tokenizer);
+  grn_table_tokenizer_init(ctx, &(pat->tokenizer), header->tokenizer);
   if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
     header->flags &= ~GRN_OBJ_KEY_NORMALIZE;
     pat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
@@ -670,13 +670,14 @@ grn_pat_close(grn_ctx *ctx, grn_pat *pat)
     GRN_ATOMIC_ADD_EX(&(pat->header->n_dirty_opens), -1, n_dirty_opens);
   }
 
-  if ((rc = grn_io_close(ctx, pat->io))) {
-    ERR(rc, "grn_io_close failed");
-  } else {
-    grn_pvector_fin(ctx, &pat->token_filters);
-    if (pat->cache) { grn_pat_cache_disable(ctx, pat); }
-    GRN_FREE(pat);
+  rc = grn_io_close(ctx, pat->io);
+  if (rc != GRN_SUCCESS) {
+    ERR(rc, "[pat][close] failed to close IO");
   }
+  grn_table_tokenizer_fin(ctx, &(pat->tokenizer));
+  grn_pvector_fin(ctx, &pat->token_filters);
+  if (pat->cache) { grn_pat_cache_disable(ctx, pat); }
+  GRN_FREE(pat);
 
   return rc;
 }

  Modified: lib/table.c (+128 -1)
===================================================================
--- lib/table.c    2018-04-05 13:11:28 +0900 (72a2f280b)
+++ lib/table.c    2018-04-05 15:27:05 +0900 (2a3267ddf)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2017 Brazil
+  Copyright(C) 2017-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,10 @@
 
 #include "grn.h"
 #include "grn_ctx.h"
+#include "grn_dat.h"
 #include "grn_expr_executor.h"
+#include "grn_hash.h"
+#include "grn_pat.h"
 
 grn_rc
 grn_table_apply_expr(grn_ctx *ctx,
@@ -120,3 +123,127 @@ grn_table_find_reference_object(grn_ctx *ctx, grn_obj *table)
 
   GRN_API_RETURN(reference_object_id);
 }
+
+void
+grn_table_tokenizer_init(grn_ctx *ctx,
+                         grn_table_tokenizer *tokenizer,
+                         grn_id tokenizer_id)
+{
+  if (tokenizer_id == GRN_ID_NIL) {
+    tokenizer->proc = NULL;
+  } else {
+    tokenizer->proc = grn_ctx_at(ctx, tokenizer_id);
+  }
+  tokenizer->options = NULL;
+  tokenizer->options_revision = GRN_OPTION_REVISION_NONE;
+  tokenizer->options_close_func = NULL;
+}
+
+static void
+grn_table_tokenizer_fin_options(grn_ctx *ctx,
+                                grn_table_tokenizer *tokenizer)
+{
+  if (tokenizer->options && tokenizer->options_close_func) {
+    tokenizer->options_close_func(ctx, tokenizer->options);
+    tokenizer->options = NULL;
+    tokenizer->options_revision = GRN_OPTION_REVISION_NONE;
+    tokenizer->options_close_func = NULL;
+  }
+}
+
+void
+grn_table_tokenizer_fin(grn_ctx *ctx,
+                        grn_table_tokenizer *tokenizer)
+{
+  grn_table_tokenizer_fin_options(ctx, tokenizer);
+}
+
+void
+grn_table_tokenizer_set_proc(grn_ctx *ctx,
+                             grn_table_tokenizer *tokenizer,
+                             grn_obj *proc)
+{
+  grn_table_tokenizer_fin_options(ctx, tokenizer);
+
+  tokenizer->proc = proc;
+}
+
+void
+grn_table_tokenizer_set_options(grn_ctx *ctx,
+                                grn_table_tokenizer *tokenizer,
+                                void *options,
+                                grn_option_revision revision,
+                                grn_close_func close_func)
+{
+  grn_table_tokenizer_fin_options(ctx, tokenizer);
+
+  tokenizer->options = options;
+  tokenizer->options_revision = revision;
+  if (options) {
+    tokenizer->options_close_func = close_func;
+  }
+}
+
+void *
+grn_table_get_tokenizer_options(grn_ctx *ctx,
+                                grn_obj *table,
+                                grn_tokenizer_open_options_func open_options_func,
+                                grn_close_func close_options_func,
+                                void *user_data)
+{
+  grn_table_tokenizer *tokenizer;
+  grn_option_revision revision;
+  grn_obj raw_options;
+  void *options;
+
+  GRN_API_ENTER;
+
+  if (!table) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][tokenizer-options][set] table is NULL");
+    GRN_API_RETURN(NULL);
+  }
+
+  switch (table->header.type) {
+  case GRN_TABLE_HASH_KEY :
+    tokenizer = &(((grn_hash *)table)->tokenizer);
+    break;
+  case GRN_TABLE_PAT_KEY :
+    tokenizer = &(((grn_pat *)table)->tokenizer);
+    break;
+  case GRN_TABLE_DAT_KEY :
+    tokenizer = &(((grn_dat *)table)->tokenizer);
+    break;
+  default :
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][tokenizer-options][set] table must key table: %s",
+        grn_obj_type_to_string(table->header.type));
+    GRN_API_RETURN(NULL);
+    break;
+  }
+
+  GRN_VOID_INIT(&raw_options);
+  revision = grn_obj_get_option_values(ctx,
+                                       table,
+                                       "tokenizer",
+                                       -1,
+                                       tokenizer->options_revision,
+                                       &raw_options);
+  if ((revision == GRN_OPTION_REVISION_UNCHANGED) ||
+      (revision == GRN_OPTION_REVISION_NONE && tokenizer->options)) {
+    goto exit;
+  }
+
+  options = open_options_func(ctx, table, &raw_options, user_data);
+  grn_table_tokenizer_set_options(ctx,
+                                  tokenizer,
+                                  options,
+                                  revision,
+                                  close_options_func);
+
+exit :
+  GRN_OBJ_FIN(ctx, &raw_options);
+
+  GRN_API_RETURN(tokenizer->options);
+}
+
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/862807dc/attachment-0001.htm 



More information about the Groonga-commit mailing list
Zurück zum Archiv-Index