Kouhei Sutou
null+****@clear*****
Thu Apr 5 15:27:05 JST 2018
Kouhei Sutou 2018-04-05 15:27:05 +0900 (Thu, 05 Apr 2018) New Revision: 332810087215ab3c696bcd4138a6d69d69f9bcfe https://github.com/groonga/groonga/commit/332810087215ab3c696bcd4138a6d69d69f9bcfe Message: Add space to cache tokenizer options to tables Modified files: include/groonga/groonga.h include/groonga/table.h lib/c_sources.am lib/dat.cpp lib/db.c lib/grn_dat.h lib/grn_hash.h lib/grn_pat.h lib/hash.c lib/pat.c lib/table.c Modified: include/groonga/groonga.h (+2 -0) =================================================================== --- include/groonga/groonga.h 2018-04-05 13:11:28 +0900 (26b443088) +++ include/groonga/groonga.h 2018-04-05 15:27:05 +0900 (ec6b09829) @@ -204,6 +204,8 @@ typedef struct _grn_ctx grn_ctx; #define GRN_CTX_MSGSIZE (0x80) #define GRN_CTX_FIN (0xff) +typedef void (*grn_close_func)(grn_ctx *ctx, void *data); + typedef union { int int_value; grn_id id; Modified: include/groonga/table.h (+12 -0) =================================================================== --- include/groonga/table.h 2018-04-05 13:11:28 +0900 (f9de43f46) +++ include/groonga/table.h 2018-04-05 15:27:05 +0900 (83d69d8a7) @@ -241,6 +241,18 @@ GRN_API grn_rc grn_table_apply_expr(grn_ctx *ctx, GRN_API grn_id grn_table_find_reference_object(grn_ctx *ctx, grn_obj *table); +typedef void *(*grn_tokenizer_open_options_func)(grn_ctx *ctx, + grn_obj *tokenizer, + grn_obj *values, + void *user_data); + +GRN_API void * +grn_table_get_tokenizer_options(grn_ctx *ctx, + grn_obj *table, + grn_tokenizer_open_options_func open_options_func, + grn_close_func close_options_func, + void *user_data); + #ifdef __cplusplus } #endif Modified: lib/c_sources.am (+1 -0) =================================================================== --- lib/c_sources.am 2018-04-05 13:11:28 +0900 (4b0c67447) +++ lib/c_sources.am 2018-04-05 15:27:05 +0900 (bf92ef613) @@ -95,6 +95,7 @@ libgroonga_c_sources = \ string.c \ grn_string.h \ table.c \ + grn_table.h \ thread.c \ time.c \ grn_time.h \ Modified: lib/dat.cpp (+4 -4) =================================================================== --- lib/dat.cpp 2018-04-05 13:11:28 +0900 (51f625f0b) +++ lib/dat.cpp 2018-04-05 15:27:05 +0900 (cce66a586) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2011-2017 Brazil + Copyright(C) 2011-2018 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -127,7 +127,6 @@ grn_dat_init(grn_ctx *, grn_dat *dat) dat->encoding = GRN_ENC_DEFAULT; dat->trie = NULL; dat->old_trie = NULL; - dat->tokenizer = NULL; dat->normalizer = NULL; GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); CRITICAL_SECTION_INIT(dat->lock); @@ -150,6 +149,7 @@ grn_dat_fin(grn_ctx *ctx, grn_dat *dat) grn_io_close(ctx, dat->io); dat->io = NULL; } + grn_table_tokenizer_fin(ctx, &(dat->tokenizer)); GRN_OBJ_FIN(ctx, &(dat->token_filters)); } @@ -355,7 +355,7 @@ grn_dat_create(grn_ctx *ctx, const char *path, uint32_t, dat->header->normalizer = GRN_ID_NIL; } dat->encoding = encoding; - dat->tokenizer = NULL; + grn_table_tokenizer_init(ctx, &(dat->tokenizer), GRN_ID_NIL); GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); dat->obj.header.flags = dat->header->flags; @@ -391,7 +391,7 @@ grn_dat_open(grn_ctx *ctx, const char *path) } dat->file_id = dat->header->file_id; dat->encoding = dat->header->encoding; - dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer); + grn_table_tokenizer_init(ctx, &(dat->tokenizer), dat->header->tokenizer); if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) { dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE; dat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); Modified: lib/db.c (+16 -10) =================================================================== --- lib/db.c 2018-04-05 13:11:28 +0900 (c8c458cac) +++ lib/db.c 2018-04-05 15:27:05 +0900 (818de600c) @@ -2418,7 +2418,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags, case GRN_TABLE_PAT_KEY : if (flags) { *flags = ((grn_pat *)table)->header->flags; } if (encoding) { *encoding = ((grn_pat *)table)->encoding; } - if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer; } + if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer.proc; } if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer; } if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filters); } rc = GRN_SUCCESS; @@ -2426,7 +2426,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags, case GRN_TABLE_DAT_KEY : if (flags) { *flags = ((grn_dat *)table)->header->flags; } if (encoding) { *encoding = ((grn_dat *)table)->encoding; } - if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer; } + if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer.proc; } if (normalizer) { *normalizer = ((grn_dat *)table)->normalizer; } if (token_filters) { *token_filters = &(((grn_dat *)table)->token_filters); } rc = GRN_SUCCESS; @@ -2434,7 +2434,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags, case GRN_TABLE_HASH_KEY : if (flags) { *flags = ((grn_hash *)table)->header.common->flags; } if (encoding) { *encoding = ((grn_hash *)table)->encoding; } - if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer; } + if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer.proc; } if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer; } if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filters); } rc = GRN_SUCCESS; @@ -8318,13 +8318,13 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb case GRN_INFO_DEFAULT_TOKENIZER : switch (DB_OBJ(obj)->header.type) { case GRN_TABLE_HASH_KEY : - valuebuf = ((grn_hash *)obj)->tokenizer; + valuebuf = ((grn_hash *)obj)->tokenizer.proc; break; case GRN_TABLE_PAT_KEY : - valuebuf = ((grn_pat *)obj)->tokenizer; + valuebuf = ((grn_pat *)obj)->tokenizer.proc; break; case GRN_TABLE_DAT_KEY : - valuebuf = ((grn_dat *)obj)->tokenizer; + valuebuf = ((grn_dat *)obj)->tokenizer.proc; break; } break; @@ -9039,12 +9039,16 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) if (!value || DB_OBJ(value)->header.type == GRN_PROC) { switch (DB_OBJ(obj)->header.type) { case GRN_TABLE_HASH_KEY : - ((grn_hash *)obj)->tokenizer = value; + grn_table_tokenizer_set_proc(ctx, + &(((grn_hash *)obj)->tokenizer), + value); ((grn_hash *)obj)->header.common->tokenizer = grn_obj_id(ctx, value); rc = GRN_SUCCESS; break; case GRN_TABLE_PAT_KEY : - ((grn_pat *)obj)->tokenizer = value; + grn_table_tokenizer_set_proc(ctx, + &(((grn_pat *)obj)->tokenizer), + value); ((grn_pat *)obj)->header->tokenizer = grn_obj_id(ctx, value); grn_pat_cache_enable(ctx, ((grn_pat *)obj), @@ -9052,7 +9056,9 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) rc = GRN_SUCCESS; break; case GRN_TABLE_DAT_KEY : - ((grn_dat *)obj)->tokenizer = value; + grn_table_tokenizer_set_proc(ctx, + &(((grn_dat *)obj)->tokenizer), + value); ((grn_dat *)obj)->header->tokenizer = grn_obj_id(ctx, value); rc = GRN_SUCCESS; break; @@ -10686,7 +10692,7 @@ grn_ctx_at(grn_ctx *ctx, grn_id id) grn_token_filters_unpack(ctx, &(pat->token_filters), &decoded_spec); - if (pat->tokenizer) { + if (pat->tokenizer.proc) { grn_pat_cache_enable(ctx, pat, GRN_TABLE_PAT_KEY_CACHE_SIZE); Modified: lib/grn_dat.h (+3 -2) =================================================================== --- lib/grn_dat.h 2018-04-05 13:11:28 +0900 (774c02694) +++ lib/grn_dat.h 2018-04-05 15:27:05 +0900 (925c35c30) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2011-2017 Brazil + Copyright(C) 2011-2018 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,6 +20,7 @@ #include "grn.h" #include "grn_db.h" +#include "grn_table.h" #ifdef __cplusplus extern "C" { @@ -33,7 +34,7 @@ struct _grn_dat { grn_encoding encoding; void *trie; void *old_trie; - grn_obj *tokenizer; + grn_table_tokenizer tokenizer; grn_obj *normalizer; grn_obj token_filters; grn_critical_section lock; Modified: lib/grn_hash.h (+3 -2) =================================================================== --- lib/grn_hash.h 2018-04-05 13:11:28 +0900 (6547394b5) +++ lib/grn_hash.h 2018-04-05 15:27:05 +0900 (7c90ae356) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2016 Brazil + Copyright(C) 2009-2018 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,6 +20,7 @@ #include "grn.h" #include "grn_ctx.h" +#include "grn_table.h" #ifdef __cplusplus extern "C" { @@ -192,7 +193,7 @@ struct _grn_hash { uint32_t *n_garbages; uint32_t *n_entries; uint32_t *max_offset; - grn_obj *tokenizer; + grn_table_tokenizer tokenizer; grn_obj *normalizer; grn_obj token_filters; Modified: lib/grn_pat.h (+2 -1) =================================================================== --- lib/grn_pat.h 2018-04-05 13:11:28 +0900 (69a5d0c10) +++ lib/grn_pat.h 2018-04-05 15:27:05 +0900 (e07644f4f) @@ -21,6 +21,7 @@ #include "grn.h" #include "grn_db.h" #include "grn_hash.h" +#include "grn_table.h" #ifdef __cplusplus extern "C" { @@ -36,7 +37,7 @@ struct _grn_pat { grn_encoding encoding; uint32_t key_size; uint32_t value_size; - grn_obj *tokenizer; + grn_table_tokenizer tokenizer; grn_obj *normalizer; grn_obj token_filters; grn_id *cache; Modified: lib/hash.c (+5 -3) =================================================================== --- lib/hash.c 2018-04-05 13:11:28 +0900 (74fab5b42) +++ lib/hash.c 2018-04-05 15:27:05 +0900 (05990fc31) @@ -1723,7 +1723,7 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, hash->io = io; hash->header.common = header; hash->lock = &header->lock; - hash->tokenizer = NULL; + grn_table_tokenizer_init(ctx, &(hash->tokenizer), GRN_ID_NIL); return GRN_SUCCESS; } @@ -1782,7 +1782,7 @@ grn_tiny_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, hash->n_garbages_ = 0; hash->n_entries_ = 0; hash->garbages = GRN_ID_NIL; - hash->tokenizer = NULL; + grn_table_tokenizer_init(ctx, &(hash->tokenizer), GRN_ID_NIL); hash->normalizer = NULL; GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); grn_tiny_array_init(ctx, &hash->a, entry_size, GRN_TINY_ARRAY_CLEAR); @@ -1850,7 +1850,7 @@ grn_hash_open(grn_ctx *ctx, const char *path) hash->io = io; hash->header.common = header; hash->lock = &header->lock; - hash->tokenizer = grn_ctx_at(ctx, header->tokenizer); + grn_table_tokenizer_init(ctx, &(hash->tokenizer), header->tokenizer); if (header->flags & GRN_OBJ_KEY_NORMALIZE) { header->flags &= ~GRN_OBJ_KEY_NORMALIZE; hash->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); @@ -1904,6 +1904,7 @@ grn_tiny_hash_fin(grn_ctx *ctx, grn_hash *hash) return GRN_INVALID_ARGUMENT; } + grn_table_tokenizer_fin(ctx, &(hash->tokenizer)); GRN_OBJ_FIN(ctx, &(hash->token_filters)); if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) { @@ -1935,6 +1936,7 @@ grn_hash_close(grn_ctx *ctx, grn_hash *hash) if (!ctx || !hash) { return GRN_INVALID_ARGUMENT; } if (grn_hash_is_io_hash(hash)) { rc = grn_io_close(ctx, hash->io); + grn_table_tokenizer_fin(ctx, &(hash->tokenizer)); GRN_OBJ_FIN(ctx, &(hash->token_filters)); } else { GRN_ASSERT(ctx == hash->ctx); Modified: lib/pat.c (+9 -8) =================================================================== --- lib/pat.c 2018-04-05 13:11:28 +0900 (ca955af61) +++ lib/pat.c 2018-04-05 15:27:05 +0900 (42c1656de) @@ -514,7 +514,7 @@ _grn_pat_create(grn_ctx *ctx, grn_pat *pat, pat->header = header; pat->key_size = key_size; pat->value_size = value_size; - pat->tokenizer = NULL; + grn_table_tokenizer_init(ctx, &(pat->tokenizer), GRN_ID_NIL); pat->encoding = encoding; pat->obj.header.flags = header->flags; if (!(node0 = pat_get(ctx, pat, 0))) { @@ -616,7 +616,7 @@ grn_pat_open(grn_ctx *ctx, const char *path) pat->key_size = header->key_size; pat->value_size = header->value_size; pat->encoding = header->encoding; - pat->tokenizer = grn_ctx_at(ctx, header->tokenizer); + grn_table_tokenizer_init(ctx, &(pat->tokenizer), header->tokenizer); if (header->flags & GRN_OBJ_KEY_NORMALIZE) { header->flags &= ~GRN_OBJ_KEY_NORMALIZE; pat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); @@ -670,13 +670,14 @@ grn_pat_close(grn_ctx *ctx, grn_pat *pat) GRN_ATOMIC_ADD_EX(&(pat->header->n_dirty_opens), -1, n_dirty_opens); } - if ((rc = grn_io_close(ctx, pat->io))) { - ERR(rc, "grn_io_close failed"); - } else { - grn_pvector_fin(ctx, &pat->token_filters); - if (pat->cache) { grn_pat_cache_disable(ctx, pat); } - GRN_FREE(pat); + rc = grn_io_close(ctx, pat->io); + if (rc != GRN_SUCCESS) { + ERR(rc, "[pat][close] failed to close IO"); } + grn_table_tokenizer_fin(ctx, &(pat->tokenizer)); + grn_pvector_fin(ctx, &pat->token_filters); + if (pat->cache) { grn_pat_cache_disable(ctx, pat); } + GRN_FREE(pat); return rc; } Modified: lib/table.c (+128 -1) =================================================================== --- lib/table.c 2018-04-05 13:11:28 +0900 (72a2f280b) +++ lib/table.c 2018-04-05 15:27:05 +0900 (2a3267ddf) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2017 Brazil + Copyright(C) 2017-2018 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,10 @@ #include "grn.h" #include "grn_ctx.h" +#include "grn_dat.h" #include "grn_expr_executor.h" +#include "grn_hash.h" +#include "grn_pat.h" grn_rc grn_table_apply_expr(grn_ctx *ctx, @@ -120,3 +123,127 @@ grn_table_find_reference_object(grn_ctx *ctx, grn_obj *table) GRN_API_RETURN(reference_object_id); } + +void +grn_table_tokenizer_init(grn_ctx *ctx, + grn_table_tokenizer *tokenizer, + grn_id tokenizer_id) +{ + if (tokenizer_id == GRN_ID_NIL) { + tokenizer->proc = NULL; + } else { + tokenizer->proc = grn_ctx_at(ctx, tokenizer_id); + } + tokenizer->options = NULL; + tokenizer->options_revision = GRN_OPTION_REVISION_NONE; + tokenizer->options_close_func = NULL; +} + +static void +grn_table_tokenizer_fin_options(grn_ctx *ctx, + grn_table_tokenizer *tokenizer) +{ + if (tokenizer->options && tokenizer->options_close_func) { + tokenizer->options_close_func(ctx, tokenizer->options); + tokenizer->options = NULL; + tokenizer->options_revision = GRN_OPTION_REVISION_NONE; + tokenizer->options_close_func = NULL; + } +} + +void +grn_table_tokenizer_fin(grn_ctx *ctx, + grn_table_tokenizer *tokenizer) +{ + grn_table_tokenizer_fin_options(ctx, tokenizer); +} + +void +grn_table_tokenizer_set_proc(grn_ctx *ctx, + grn_table_tokenizer *tokenizer, + grn_obj *proc) +{ + grn_table_tokenizer_fin_options(ctx, tokenizer); + + tokenizer->proc = proc; +} + +void +grn_table_tokenizer_set_options(grn_ctx *ctx, + grn_table_tokenizer *tokenizer, + void *options, + grn_option_revision revision, + grn_close_func close_func) +{ + grn_table_tokenizer_fin_options(ctx, tokenizer); + + tokenizer->options = options; + tokenizer->options_revision = revision; + if (options) { + tokenizer->options_close_func = close_func; + } +} + +void * +grn_table_get_tokenizer_options(grn_ctx *ctx, + grn_obj *table, + grn_tokenizer_open_options_func open_options_func, + grn_close_func close_options_func, + void *user_data) +{ + grn_table_tokenizer *tokenizer; + grn_option_revision revision; + grn_obj raw_options; + void *options; + + GRN_API_ENTER; + + if (!table) { + ERR(GRN_INVALID_ARGUMENT, + "[table][tokenizer-options][set] table is NULL"); + GRN_API_RETURN(NULL); + } + + switch (table->header.type) { + case GRN_TABLE_HASH_KEY : + tokenizer = &(((grn_hash *)table)->tokenizer); + break; + case GRN_TABLE_PAT_KEY : + tokenizer = &(((grn_pat *)table)->tokenizer); + break; + case GRN_TABLE_DAT_KEY : + tokenizer = &(((grn_dat *)table)->tokenizer); + break; + default : + ERR(GRN_INVALID_ARGUMENT, + "[table][tokenizer-options][set] table must key table: %s", + grn_obj_type_to_string(table->header.type)); + GRN_API_RETURN(NULL); + break; + } + + GRN_VOID_INIT(&raw_options); + revision = grn_obj_get_option_values(ctx, + table, + "tokenizer", + -1, + tokenizer->options_revision, + &raw_options); + if ((revision == GRN_OPTION_REVISION_UNCHANGED) || + (revision == GRN_OPTION_REVISION_NONE && tokenizer->options)) { + goto exit; + } + + options = open_options_func(ctx, table, &raw_options, user_data); + grn_table_tokenizer_set_options(ctx, + tokenizer, + options, + revision, + close_options_func); + +exit : + GRN_OBJ_FIN(ctx, &raw_options); + + GRN_API_RETURN(tokenizer->options); +} + -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/862807dc/attachment-0001.htm