[Groonga-commit] groonga/groonga at 8c6ac59 [master] Support applying token filters

Zurück zum Archiv-Index

Kouhei Sutou null+****@clear*****
Thu Oct 2 22:48:17 JST 2014


Kouhei Sutou	2014-10-02 22:48:17 +0900 (Thu, 02 Oct 2014)

  New Revision: 8c6ac595c97d6c2be928be917861f1308f2142f0
  https://github.com/groonga/groonga/commit/8c6ac595c97d6c2be928be917861f1308f2142f0

  Merged fec9865: Merge pull request #209 from groonga/support-token-filters

  Message:
    Support applying token filters

  Modified files:
    lib/token.c
    lib/token.h

  Modified: lib/token.c (+125 -3)
===================================================================
--- lib/token.c    2014-10-02 22:40:17 +0900 (3056980)
+++ lib/token.c    2014-10-02 22:48:17 +0900 (618f852)
@@ -495,6 +495,56 @@ grn_token_fin(void)
   return GRN_SUCCESS;
 }
 
+static void
+grn_token_open_initialize_token_filters(grn_ctx *ctx, grn_token *token)
+{
+  grn_obj *token_filters = token->token_filters;
+  unsigned int i, n_token_filters;
+  grn_obj mode;
+
+  if (token_filters) {
+    n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
+  } else {
+    n_token_filters = 0;
+  }
+
+  if (n_token_filters == 0) {
+    token->token_filter_ctxs = NULL;
+    return;
+  }
+
+  token->token_filter_ctxs = GRN_MALLOC(sizeof(grn_proc_ctx) * n_token_filters);
+  if (!token->token_filter_ctxs) {
+    ERR(GRN_NO_MEMORY_AVAILABLE,
+        "[token][open] failed to allocate token filter contexts");
+    return;
+  }
+
+  GRN_UINT32_INIT(&mode, 0);
+  GRN_UINT32_SET(ctx, &mode, token->mode);
+  for (i = 0; i < n_token_filters; i++) {
+    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i];
+    int n_args = 0;
+    grn_obj *args[2];
+
+    token_filter_ctx->caller = NULL;
+    token_filter_ctx->user_data.ptr = NULL;
+    token_filter_ctx->proc = (grn_proc *)token_filter;
+    token_filter_ctx->hooks = NULL;
+    token_filter_ctx->currh = NULL;
+    token_filter_ctx->phase = PROC_INIT;
+
+    args[n_args++] = token->table;
+    args[n_args++] = &mode;
+    ((grn_proc *)token_filter)->funcs[PROC_INIT](ctx,
+                                                 n_args, args,
+                                                 &token_filter_ctx->user_data);
+  }
+  GRN_OBJ_FIN(ctx, &mode);
+}
+
+
 grn_token *
 grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
                grn_token_mode mode, unsigned int flags)
@@ -557,6 +607,9 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
       ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open");
     }
   }
+
+  grn_token_open_initialize_token_filters(ctx, token);
+
   if (ctx->rc) {
     grn_token_close(ctx, token);
     token = NULL;
@@ -564,6 +617,50 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
   return token;
 }
 
+static int
+grn_token_next_apply_token_filters(grn_ctx *ctx,
+                                   grn_token *token,
+                                   grn_obj *current_token,
+                                   grn_obj *status)
+{
+  grn_obj *token_filters = token->token_filters;
+  unsigned int i, n_token_filters;
+
+  if (token_filters) {
+    n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
+  } else {
+    n_token_filters = 0;
+  }
+  for (i = 0; i < n_token_filters; i++) {
+    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i];
+    int n_args = 0;
+    grn_obj *args[2];
+
+#define SKIP_FLAGS\
+    (GRN_TOKENIZER_TOKEN_SKIP |\
+     GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION)
+    if (GRN_INT32_VALUE(status) & SKIP_FLAGS) {
+      break;
+    }
+#undef SKIP_FLAGS
+
+    args[n_args++] = current_token;
+    args[n_args++] = status;
+    ((grn_proc *)token_filter)->funcs[PROC_NEXT](ctx,
+                                                 n_args,
+                                                 args,
+                                                 &token_filter_ctx->user_data);
+    status = grn_ctx_pop(ctx);
+    current_token = grn_ctx_pop(ctx);
+  }
+
+  token->curr = (const unsigned char *)GRN_TEXT_VALUE(current_token);
+  token->curr_size = GRN_TEXT_LEN(current_token);
+
+  return GRN_INT32_VALUE(status);
+}
+
 grn_id
 grn_token_next(grn_ctx *ctx, grn_token *token)
 {
@@ -577,9 +674,7 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
       ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data);
       stat_ = grn_ctx_pop(ctx);
       curr_ = grn_ctx_pop(ctx);
-      token->curr = (const unsigned char *)GRN_TEXT_VALUE(curr_);
-      token->curr_size = GRN_TEXT_LEN(curr_);
-      status = GRN_UINT32_VALUE(stat_);
+      status = grn_token_next_apply_token_filters(ctx, token, curr_, stat_);
       token->status = ((status & GRN_TOKENIZER_TOKEN_LAST) ||
                        (token->mode == GRN_TOKEN_GET &&
                         (status & GRN_TOKENIZER_TOKEN_REACH_END)))
@@ -688,6 +783,32 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
   return tid;
 }
 
+static void
+grn_token_close_token_filters(grn_ctx *ctx, grn_token *token)
+{
+  grn_obj *token_filters = token->token_filters;
+  unsigned int i, n_token_filters;
+
+  if (token_filters) {
+    n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
+  } else {
+    n_token_filters = 0;
+  }
+  for (i = 0; i < n_token_filters; i++) {
+    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i];
+
+    ((grn_proc *)token_filter)->funcs[PROC_FIN](ctx,
+                                                1,
+                                                &token->table,
+                                                &token_filter_ctx->user_data);
+  }
+
+  if (token->token_filter_ctxs) {
+    GRN_FREE(token->token_filter_ctxs);
+  }
+}
+
 grn_rc
 grn_token_close(grn_ctx *ctx, grn_token *token)
 {
@@ -696,6 +817,7 @@ grn_token_close(grn_ctx *ctx, grn_token *token)
       ((grn_proc *)token->tokenizer)->funcs[PROC_FIN](ctx, 1, &token->table,
                                                       &token->pctx.user_data);
     }
+    grn_token_close_token_filters(ctx, token);
     if (token->nstr) {
       grn_obj_close(ctx, token->nstr);
     }

  Modified: lib/token.h (+2 -0)
===================================================================
--- lib/token.h    2014-10-02 22:40:17 +0900 (f69e30d)
+++ lib/token.h    2014-10-02 22:48:17 +0900 (fee8d0e)
@@ -63,6 +63,8 @@ typedef struct {
   grn_encoding encoding;
   grn_obj *tokenizer;
   grn_proc_ctx pctx;
+  grn_obj *token_filters;
+  grn_proc_ctx *token_filter_ctxs;
   uint32_t variant;
   grn_obj *nstr;
 } grn_token;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Zurück zum Archiv-Index