Kouhei Sutou
null+****@clear*****
Wed Jun 22 10:28:56 JST 2016
Kouhei Sutou 2016-06-22 10:28:56 +0900 (Wed, 22 Jun 2016) New Revision: 1b70bd64154ed21955c01b935d6c3843263ce3d5 https://github.com/groonga/groonga/commit/1b70bd64154ed21955c01b935d6c3843263ce3d5 Message: Extract dump implementation Added files: lib/proc/proc_dump.c Modified files: lib/grn_proc.h lib/proc.c lib/proc/sources.am Modified: lib/grn_proc.h (+1 -0) =================================================================== --- lib/grn_proc.h 2016-06-22 10:28:28 +0900 (3f3bd37) +++ lib/grn_proc.h 2016-06-22 10:28:56 +0900 (2056439) @@ -44,6 +44,7 @@ void grn_proc_init_config_get(grn_ctx *ctx); void grn_proc_init_config_set(grn_ctx *ctx); void grn_proc_init_config_delete(grn_ctx *ctx); void grn_proc_init_define_selector(grn_ctx *ctx); +void grn_proc_init_dump(grn_ctx *ctx); void grn_proc_init_edit_distance(grn_ctx *ctx); void grn_proc_init_fuzzy_search(grn_ctx *ctx); void grn_proc_init_highlight(grn_ctx *ctx); Modified: lib/proc.c (+1 -767) =================================================================== --- lib/proc.c 2016-06-22 10:28:28 +0900 (7f8548e) +++ lib/proc.c 2016-06-22 10:28:56 +0900 (2fdf0ab) @@ -129,8 +129,6 @@ exit : /**** procs ****/ -#define DUMP_COLUMNS "_id, _key, _value, *" - static grn_obj * proc_load(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { @@ -567,708 +565,6 @@ exit : return NULL; } -static const size_t DUMP_FLUSH_THRESHOLD_SIZE = 256 * 1024; - -static void -dump_value(grn_ctx *ctx, grn_obj *outbuf, const char *value, int value_len) -{ - grn_obj escaped_value; - GRN_TEXT_INIT(&escaped_value, 0); - grn_text_esc(ctx, &escaped_value, value, value_len); - /* is no character escaped? */ - /* TODO false positive with spaces inside values */ - if (GRN_TEXT_LEN(&escaped_value) == value_len + 2) { - GRN_TEXT_PUT(ctx, outbuf, value, value_len); - } else { - GRN_TEXT_PUT(ctx, outbuf, - GRN_TEXT_VALUE(&escaped_value), GRN_TEXT_LEN(&escaped_value)); - } - grn_obj_close(ctx, &escaped_value); -} - -static void -dump_configs(grn_ctx *ctx, grn_obj *outbuf) -{ - grn_obj *config_cursor; - - config_cursor = grn_config_cursor_open(ctx); - if (!config_cursor) - return; - - while (grn_config_cursor_next(ctx, config_cursor)) { - const char *key; - uint32_t key_size; - const char *value; - uint32_t value_size; - - key_size = grn_config_cursor_get_key(ctx, config_cursor, &key); - value_size = grn_config_cursor_get_value(ctx, config_cursor, &value); - - GRN_TEXT_PUTS(ctx, outbuf, "config_set "); - dump_value(ctx, outbuf, key, key_size); - GRN_TEXT_PUTS(ctx, outbuf, " "); - dump_value(ctx, outbuf, value, value_size); - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - } - grn_obj_close(ctx, config_cursor); -} - -static void -dump_plugins(grn_ctx *ctx, grn_obj *outbuf) -{ - grn_obj plugin_names; - unsigned int i, n; - - GRN_TEXT_INIT(&plugin_names, GRN_OBJ_VECTOR); - - grn_plugin_get_names(ctx, &plugin_names); - - n = grn_vector_size(ctx, &plugin_names); - if (n == 0) { - GRN_OBJ_FIN(ctx, &plugin_names); - return; - } - - if (GRN_TEXT_LEN(outbuf) > 0) { - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - grn_ctx_output_flush(ctx, 0); - } - for (i = 0; i < n; i++) { - const char *name; - unsigned int name_size; - - name_size = grn_vector_get_element(ctx, &plugin_names, i, &name, NULL, NULL); - grn_text_printf(ctx, outbuf, "plugin_register %.*s\n", - (int)name_size, name); - } - - GRN_OBJ_FIN(ctx, &plugin_names); -} - -static void -dump_obj_name(grn_ctx *ctx, grn_obj *outbuf, grn_obj *obj) -{ - char name[GRN_TABLE_MAX_KEY_SIZE]; - int name_len; - name_len = grn_obj_name(ctx, obj, name, GRN_TABLE_MAX_KEY_SIZE); - dump_value(ctx, outbuf, name, name_len); -} - -static void -dump_column_name(grn_ctx *ctx, grn_obj *outbuf, grn_obj *column) -{ - char name[GRN_TABLE_MAX_KEY_SIZE]; - int name_len; - name_len = grn_column_name(ctx, column, name, GRN_TABLE_MAX_KEY_SIZE); - dump_value(ctx, outbuf, name, name_len); -} - -static void -dump_index_column_sources(grn_ctx *ctx, grn_obj *outbuf, grn_obj *column) -{ - grn_obj sources; - grn_id *source_ids; - int i, n; - - GRN_OBJ_INIT(&sources, GRN_BULK, 0, GRN_ID_NIL); - grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &sources); - - n = GRN_BULK_VSIZE(&sources) / sizeof(grn_id); - source_ids = (grn_id *)GRN_BULK_HEAD(&sources); - if (n > 0) { - GRN_TEXT_PUTC(ctx, outbuf, ' '); - } - for (i = 0; i < n; i++) { - grn_obj *source; - if ((source = grn_ctx_at(ctx, *source_ids))) { - if (i) { GRN_TEXT_PUTC(ctx, outbuf, ','); } - switch (source->header.type) { - case GRN_TABLE_PAT_KEY: - case GRN_TABLE_DAT_KEY: - case GRN_TABLE_HASH_KEY: - GRN_TEXT_PUT(ctx, outbuf, GRN_COLUMN_NAME_KEY, GRN_COLUMN_NAME_KEY_LEN); - break; - default: - dump_column_name(ctx, outbuf, source); - break; - } - } - source_ids++; - } - grn_obj_close(ctx, &sources); -} - -static void -dump_column(grn_ctx *ctx, grn_obj *outbuf , grn_obj *table, grn_obj *column) -{ - grn_obj *type; - grn_obj_flags default_flags = GRN_OBJ_PERSISTENT; - - type = grn_ctx_at(ctx, ((grn_db_obj *)column)->range); - if (!type) { - // ERR(GRN_RANGE_ERROR, "couldn't get column's type object"); - return; - } - - GRN_TEXT_PUTS(ctx, outbuf, "column_create "); - dump_obj_name(ctx, outbuf, table); - GRN_TEXT_PUTC(ctx, outbuf, ' '); - dump_column_name(ctx, outbuf, column); - GRN_TEXT_PUTC(ctx, outbuf, ' '); - if (type->header.type == GRN_TYPE) { - default_flags |= type->header.flags; - } - grn_dump_column_create_flags(ctx, - column->header.flags & ~default_flags, - outbuf); - GRN_TEXT_PUTC(ctx, outbuf, ' '); - dump_obj_name(ctx, outbuf, type); - if (column->header.flags & GRN_OBJ_COLUMN_INDEX) { - dump_index_column_sources(ctx, outbuf, column); - } - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - - grn_obj_unlink(ctx, type); -} - -static void -dump_columns(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table, - grn_obj *pending_reference_columns) -{ - grn_hash *columns; - columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, - GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); - if (!columns) { - ERR(GRN_NO_MEMORY_AVAILABLE, "couldn't create a hash to hold columns"); - return; - } - - if (grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns) >= 0) { - grn_id *key; - - GRN_HASH_EACH(ctx, columns, id, &key, NULL, NULL, { - grn_obj *column; - if ((column = grn_ctx_at(ctx, *key))) { - if (GRN_OBJ_INDEX_COLUMNP(column)) { - /* do nothing */ - } else if (grn_obj_is_reference_column(ctx, column)) { - GRN_PTR_PUT(ctx, pending_reference_columns, column); - } else { - dump_column(ctx, outbuf, table, column); - grn_obj_unlink(ctx, column); - } - } - }); - } - grn_hash_close(ctx, columns); -} - -static void -dump_record_column_vector(grn_ctx *ctx, grn_obj *outbuf, grn_id id, - grn_obj *column, grn_id range_id, grn_obj *buf) -{ - grn_obj *range; - - range = grn_ctx_at(ctx, range_id); - if (GRN_OBJ_TABLEP(range) || - (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) == 0) { - GRN_OBJ_INIT(buf, GRN_UVECTOR, 0, range_id); - grn_obj_get_value(ctx, column, id, buf); - grn_text_otoj(ctx, outbuf, buf, NULL); - } else { - grn_obj_format *format_argument = NULL; - grn_obj_format format; - if (column->header.flags & GRN_OBJ_WITH_WEIGHT) { - format.flags = GRN_OBJ_FORMAT_WITH_WEIGHT; - format_argument = &format; - } - GRN_OBJ_INIT(buf, GRN_VECTOR, 0, range_id); - grn_obj_get_value(ctx, column, id, buf); - grn_text_otoj(ctx, outbuf, buf, format_argument); - } - grn_obj_unlink(ctx, range); - grn_obj_unlink(ctx, buf); -} - -static void -dump_records(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table) -{ - grn_obj **columns; - grn_id old_id = 0, id; - grn_table_cursor *cursor; - int i, ncolumns, n_use_columns; - grn_obj columnbuf, delete_commands, use_columns, column_name; - grn_bool have_index_column = GRN_FALSE; - grn_bool have_data_column = GRN_FALSE; - - switch (table->header.type) { - case GRN_TABLE_HASH_KEY: - case GRN_TABLE_PAT_KEY: - case GRN_TABLE_DAT_KEY: - case GRN_TABLE_NO_KEY: - break; - default: - return; - } - - if (grn_table_size(ctx, table) == 0) { - return; - } - - GRN_PTR_INIT(&columnbuf, GRN_OBJ_VECTOR, GRN_ID_NIL); - grn_obj_columns(ctx, table, DUMP_COLUMNS, strlen(DUMP_COLUMNS), &columnbuf); - columns = (grn_obj **)GRN_BULK_HEAD(&columnbuf); - ncolumns = GRN_BULK_VSIZE(&columnbuf)/sizeof(grn_obj *); - - GRN_PTR_INIT(&use_columns, GRN_OBJ_VECTOR, GRN_ID_NIL); - GRN_TEXT_INIT(&column_name, 0); - for (i = 0; i < ncolumns; i++) { - if (GRN_OBJ_INDEX_COLUMNP(columns[i])) { - have_index_column = GRN_TRUE; - continue; - } - - if (columns[i]->header.type != GRN_ACCESSOR) { - have_data_column = GRN_TRUE; - } - - GRN_BULK_REWIND(&column_name); - grn_column_name_(ctx, columns[i], &column_name); - if (table->header.type != GRN_TABLE_NO_KEY && - GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_ID_LEN && - memcmp(GRN_TEXT_VALUE(&column_name), - GRN_COLUMN_NAME_ID, - GRN_COLUMN_NAME_ID_LEN) == 0) { - continue; - } - - if (table->header.type == GRN_TABLE_NO_KEY && - GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_KEY_LEN && - memcmp(GRN_TEXT_VALUE(&column_name), - GRN_COLUMN_NAME_KEY, - GRN_COLUMN_NAME_KEY_LEN) == 0) { - continue; - } - - GRN_PTR_PUT(ctx, &use_columns, columns[i]); - } - - if (have_index_column && !have_data_column) { - goto exit; - } - - if (GRN_TEXT_LEN(outbuf) > 0) { - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - } - - GRN_TEXT_PUTS(ctx, outbuf, "load --table "); - dump_obj_name(ctx, outbuf, table); - GRN_TEXT_PUTS(ctx, outbuf, "\n[\n"); - - n_use_columns = GRN_BULK_VSIZE(&use_columns) / sizeof(grn_obj *); - GRN_TEXT_PUTC(ctx, outbuf, '['); - for (i = 0; i < n_use_columns; i++) { - grn_obj *column; - column = *((grn_obj **)GRN_BULK_HEAD(&use_columns) + i); - if (i) { GRN_TEXT_PUTC(ctx, outbuf, ','); } - GRN_BULK_REWIND(&column_name); - grn_column_name_(ctx, column, &column_name); - grn_text_otoj(ctx, outbuf, &column_name, NULL); - } - GRN_TEXT_PUTS(ctx, outbuf, "],\n"); - - GRN_TEXT_INIT(&delete_commands, 0); - cursor = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, - GRN_CURSOR_BY_KEY); - for (i = 0; (id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL; - ++i, old_id = id) { - int is_value_column; - int j; - grn_obj buf; - if (i) { GRN_TEXT_PUTS(ctx, outbuf, ",\n"); } - if (table->header.type == GRN_TABLE_NO_KEY && old_id + 1 < id) { - grn_id current_id; - for (current_id = old_id + 1; current_id < id; current_id++) { - GRN_TEXT_PUTS(ctx, outbuf, "[],\n"); - GRN_TEXT_PUTS(ctx, &delete_commands, "delete --table "); - dump_obj_name(ctx, &delete_commands, table); - GRN_TEXT_PUTS(ctx, &delete_commands, " --id "); - grn_text_lltoa(ctx, &delete_commands, current_id); - GRN_TEXT_PUTC(ctx, &delete_commands, '\n'); - } - } - GRN_TEXT_PUTC(ctx, outbuf, '['); - for (j = 0; j < n_use_columns; j++) { - grn_id range; - grn_obj *column; - column = *((grn_obj **)GRN_BULK_HEAD(&use_columns) + j); - GRN_BULK_REWIND(&column_name); - grn_column_name_(ctx, column, &column_name); - if (GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_VALUE_LEN && - !memcmp(GRN_TEXT_VALUE(&column_name), - GRN_COLUMN_NAME_VALUE, - GRN_COLUMN_NAME_VALUE_LEN)) { - is_value_column = 1; - } else { - is_value_column = 0; - } - range = grn_obj_get_range(ctx, column); - - if (j) { GRN_TEXT_PUTC(ctx, outbuf, ','); } - switch (column->header.type) { - case GRN_COLUMN_VAR_SIZE: - case GRN_COLUMN_FIX_SIZE: - switch (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) { - case GRN_OBJ_COLUMN_VECTOR: - dump_record_column_vector(ctx, outbuf, id, column, range, &buf); - break; - case GRN_OBJ_COLUMN_SCALAR: - { - GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); - grn_obj_get_value(ctx, column, id, &buf); - grn_text_otoj(ctx, outbuf, &buf, NULL); - grn_obj_unlink(ctx, &buf); - } - break; - default: - ERR(GRN_OPERATION_NOT_SUPPORTED, - "unsupported column type: %#x", - column->header.type); - break; - } - break; - case GRN_COLUMN_INDEX: - break; - case GRN_ACCESSOR: - { - GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); - grn_obj_get_value(ctx, column, id, &buf); - /* XXX maybe, grn_obj_get_range() should not unconditionally return - GRN_DB_INT32 when column is GRN_ACCESSOR and - GRN_ACCESSOR_GET_VALUE */ - if (is_value_column) { - buf.header.domain = ((grn_db_obj *)table)->range; - } - grn_text_otoj(ctx, outbuf, &buf, NULL); - grn_obj_unlink(ctx, &buf); - } - break; - default: - ERR(GRN_OPERATION_NOT_SUPPORTED, - "unsupported header type %#x", - column->header.type); - break; - } - } - GRN_TEXT_PUTC(ctx, outbuf, ']'); - if (GRN_TEXT_LEN(outbuf) >= DUMP_FLUSH_THRESHOLD_SIZE) { - grn_ctx_output_flush(ctx, 0); - } - } - grn_table_cursor_close(ctx, cursor); - GRN_TEXT_PUTS(ctx, outbuf, "\n]\n"); - GRN_TEXT_PUT(ctx, outbuf, GRN_TEXT_VALUE(&delete_commands), - GRN_TEXT_LEN(&delete_commands)); - grn_obj_unlink(ctx, &delete_commands); - -exit : - grn_obj_unlink(ctx, &column_name); - grn_obj_unlink(ctx, &use_columns); - - for (i = 0; i < ncolumns; i++) { - grn_obj_unlink(ctx, columns[i]); - } - grn_obj_unlink(ctx, &columnbuf); -} - -static void -dump_table(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table, - grn_obj *pending_reference_columns) -{ - grn_obj *domain = NULL, *range = NULL; - grn_table_flags flags; - grn_table_flags default_flags = GRN_OBJ_PERSISTENT; - grn_obj *default_tokenizer; - grn_obj *normalizer; - grn_obj *token_filters; - - switch (table->header.type) { - case GRN_TABLE_HASH_KEY: - case GRN_TABLE_PAT_KEY: - case GRN_TABLE_DAT_KEY: - domain = grn_ctx_at(ctx, table->header.domain); - break; - default: - break; - } - - if (GRN_TEXT_LEN(outbuf) > 0) { - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - grn_ctx_output_flush(ctx, 0); - } - - grn_table_get_info(ctx, table, - &flags, - NULL, - &default_tokenizer, - &normalizer, - &token_filters); - - GRN_TEXT_PUTS(ctx, outbuf, "table_create "); - dump_obj_name(ctx, outbuf, table); - GRN_TEXT_PUTC(ctx, outbuf, ' '); - grn_dump_table_create_flags(ctx, - flags & ~default_flags, - outbuf); - if (domain) { - GRN_TEXT_PUTC(ctx, outbuf, ' '); - dump_obj_name(ctx, outbuf, domain); - } - if (((grn_db_obj *)table)->range != GRN_ID_NIL) { - range = grn_ctx_at(ctx, ((grn_db_obj *)table)->range); - if (!range) { - // ERR(GRN_RANGE_ERROR, "couldn't get table's value_type object"); - return; - } - if (table->header.type != GRN_TABLE_NO_KEY) { - GRN_TEXT_PUTC(ctx, outbuf, ' '); - } else { - GRN_TEXT_PUTS(ctx, outbuf, " --value_type "); - } - dump_obj_name(ctx, outbuf, range); - grn_obj_unlink(ctx, range); - } - if (default_tokenizer) { - GRN_TEXT_PUTS(ctx, outbuf, " --default_tokenizer "); - dump_obj_name(ctx, outbuf, default_tokenizer); - } - if (normalizer) { - GRN_TEXT_PUTS(ctx, outbuf, " --normalizer "); - dump_obj_name(ctx, outbuf, normalizer); - } - if (table->header.type != GRN_TABLE_NO_KEY) { - int n_token_filters; - - n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); - if (n_token_filters > 0) { - int i; - GRN_TEXT_PUTS(ctx, outbuf, " --token_filters "); - for (i = 0; i < n_token_filters; i++) { - grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); - if (i > 0) { - GRN_TEXT_PUTC(ctx, outbuf, ','); - } - dump_obj_name(ctx, outbuf, token_filter); - } - } - } - - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - - if (domain) { - grn_obj_unlink(ctx, domain); - } - - dump_columns(ctx, outbuf, table, pending_reference_columns); -} - -static void -dump_pending_columns(grn_ctx *ctx, grn_obj *outbuf, grn_obj *pending_columns) -{ - size_t i, n_columns; - - n_columns = GRN_BULK_VSIZE(pending_columns) / sizeof(grn_obj *); - if (n_columns == 0) { - return; - } - - if (GRN_TEXT_LEN(outbuf) > 0) { - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - grn_ctx_output_flush(ctx, 0); - } - - for (i = 0; i < n_columns; i++) { - grn_obj *table, *column; - - column = GRN_PTR_VALUE_AT(pending_columns, i); - table = grn_ctx_at(ctx, column->header.domain); - dump_column(ctx, outbuf, table, column); - grn_obj_unlink(ctx, column); - grn_obj_unlink(ctx, table); - } -} - -static void -dump_schema(grn_ctx *ctx, grn_obj *outbuf) -{ - grn_obj *db = ctx->impl->db; - grn_table_cursor *cur; - grn_id id; - grn_obj pending_reference_columns; - - cur = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, - GRN_CURSOR_BY_ID); - if (!cur) { - return; - } - - GRN_PTR_INIT(&pending_reference_columns, GRN_OBJ_VECTOR, GRN_ID_NIL); - while ((id = grn_table_cursor_next(ctx, cur)) != GRN_ID_NIL) { - grn_obj *object; - - if ((object = grn_ctx_at(ctx, id))) { - switch (object->header.type) { - case GRN_TABLE_HASH_KEY: - case GRN_TABLE_PAT_KEY: - case GRN_TABLE_DAT_KEY: - case GRN_TABLE_NO_KEY: - dump_table(ctx, outbuf, object, &pending_reference_columns); - break; - default: - break; - } - grn_obj_unlink(ctx, object); - } else { - /* XXX: this clause is executed when MeCab tokenizer is enabled in - database but the groonga isn't supported MeCab. - We should return error mesage about it and error exit status - but it's too difficult for this architecture. :< */ - ERRCLR(ctx); - } - } - grn_table_cursor_close(ctx, cur); - - dump_pending_columns(ctx, outbuf, &pending_reference_columns); - grn_obj_close(ctx, &pending_reference_columns); -} - -static void -dump_selected_tables_records(grn_ctx *ctx, grn_obj *outbuf, grn_obj *tables) -{ - const char *p, *e; - - p = GRN_TEXT_VALUE(tables); - e = p + GRN_TEXT_LEN(tables); - while (p < e) { - int len; - grn_obj *table; - const char *token, *token_e; - - if ((len = grn_isspace(p, ctx->encoding))) { - p += len; - continue; - } - - token = p; - if (!(('a' <= *p && *p <= 'z') || - ('A' <= *p && *p <= 'Z') || - (*p == '_'))) { - while (p < e && !grn_isspace(p, ctx->encoding)) { - p++; - } - GRN_LOG(ctx, GRN_LOG_WARNING, "invalid table name is ignored: <%.*s>\n", - (int)(p - token), token); - continue; - } - while (p < e && - (('a' <= *p && *p <= 'z') || - ('A' <= *p && *p <= 'Z') || - ('0' <= *p && *p <= '9') || - (*p == '_'))) { - p++; - } - token_e = p; - while (p < e && (len = grn_isspace(p, ctx->encoding))) { - p += len; - continue; - } - if (p < e && *p == ',') { - p++; - } - - if ((table = grn_ctx_get(ctx, token, token_e - token))) { - dump_records(ctx, outbuf, table); - grn_obj_unlink(ctx, table); - } else { - GRN_LOG(ctx, GRN_LOG_WARNING, - "nonexistent table name is ignored: <%.*s>\n", - (int)(token_e - token), token); - } - } -} - -static void -dump_all_records(grn_ctx *ctx, grn_obj *outbuf) -{ - grn_obj *db = ctx->impl->db; - grn_table_cursor *cur; - if ((cur = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, - GRN_CURSOR_BY_ID))) { - grn_id id; - - while ((id = grn_table_cursor_next(ctx, cur)) != GRN_ID_NIL) { - grn_obj *table; - - if ((table = grn_ctx_at(ctx, id))) { - dump_records(ctx, outbuf, table); - grn_obj_unlink(ctx, table); - } else { - /* XXX: this clause is executed when MeCab tokenizer is enabled in - database but the groonga isn't supported MeCab. - We should return error mesage about it and error exit status - but it's too difficult for this architecture. :< */ - ERRCLR(ctx); - } - } - grn_table_cursor_close(ctx, cur); - } -} - -static void -dump_indexes(grn_ctx *ctx, grn_obj *outbuf) -{ - grn_obj *db = ctx->impl->db; - grn_table_cursor *cursor; - grn_id id; - grn_bool is_first_index_column = GRN_TRUE; - - cursor = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, - GRN_CURSOR_BY_ID); - if (!cursor) { - return; - } - - while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) { - grn_obj *object; - - object = grn_ctx_at(ctx, id); - if (!object) { - /* XXX: this clause is executed when MeCab tokenizer is enabled in - database but the groonga isn't supported MeCab. - We should return error mesage about it and error exit status - but it's too difficult for this architecture. :< */ - ERRCLR(ctx); - continue; - } - - if (object->header.type == GRN_COLUMN_INDEX) { - grn_obj *table; - grn_obj *column = object; - - if (is_first_index_column && GRN_TEXT_LEN(outbuf) > 0) { - GRN_TEXT_PUTC(ctx, outbuf, '\n'); - } - is_first_index_column = GRN_FALSE; - - table = grn_ctx_at(ctx, column->header.domain); - dump_column(ctx, outbuf, table, column); - grn_obj_unlink(ctx, table); - } - grn_obj_unlink(ctx, object); - } - grn_table_cursor_close(ctx, cursor); -} - grn_bool grn_proc_option_value_bool(grn_ctx *ctx, grn_obj *option, @@ -1358,62 +654,6 @@ grn_proc_option_value_string(grn_ctx *ctx, } static grn_obj * -proc_dump(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_obj *outbuf = ctx->impl->output.buf; - grn_obj *tables = VAR(0); - grn_obj *dump_plugins_raw = VAR(1); - grn_obj *dump_schema_raw = VAR(2); - grn_obj *dump_records_raw = VAR(3); - grn_obj *dump_indexes_raw = VAR(4); - grn_obj *dump_configs_raw = VAR(5); - grn_bool is_dump_plugins; - grn_bool is_dump_schema; - grn_bool is_dump_records; - grn_bool is_dump_indexes; - grn_bool is_dump_configs; - - grn_ctx_set_output_type(ctx, GRN_CONTENT_GROONGA_COMMAND_LIST); - - is_dump_plugins = grn_proc_option_value_bool(ctx, dump_plugins_raw, GRN_TRUE); - is_dump_schema = grn_proc_option_value_bool(ctx, dump_schema_raw, GRN_TRUE); - is_dump_records = grn_proc_option_value_bool(ctx, dump_records_raw, GRN_TRUE); - is_dump_indexes = grn_proc_option_value_bool(ctx, dump_indexes_raw, GRN_TRUE); - is_dump_configs = grn_proc_option_value_bool(ctx, dump_configs_raw, GRN_TRUE); - - if (is_dump_configs) { - dump_configs(ctx, outbuf); - } - if (is_dump_plugins) { - dump_plugins(ctx, outbuf); - } - if (is_dump_schema) { - dump_schema(ctx, outbuf); - } - if (is_dump_records) { - /* To update index columns correctly, we first create the whole schema, then - load non-derivative records, while skipping records of index columns. That - way, groonga will silently do the job of updating index columns for us. */ - if (GRN_TEXT_LEN(tables) > 0) { - dump_selected_tables_records(ctx, outbuf, tables); - } else { - dump_all_records(ctx, outbuf); - } - } - if (is_dump_indexes) { - dump_indexes(ctx, outbuf); - } - - /* remove the last newline because another one will be added by the caller. - maybe, the caller of proc functions currently doesn't consider the - possibility of multiple-line output from proc functions. */ - if (GRN_BULK_VSIZE(outbuf) > 0) { - grn_bulk_truncate(ctx, outbuf, GRN_BULK_VSIZE(outbuf) - 1); - } - return NULL; -} - -static grn_obj * proc_cache_limit(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { grn_cache *cache; @@ -4154,13 +3394,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx) DEF_VAR(vars[0], "max"); DEF_COMMAND("cache_limit", proc_cache_limit, 1, vars); - DEF_VAR(vars[0], "tables"); - DEF_VAR(vars[1], "dump_plugins"); - DEF_VAR(vars[2], "dump_schema"); - DEF_VAR(vars[3], "dump_records"); - DEF_VAR(vars[4], "dump_indexes"); - DEF_VAR(vars[5], "dump_configs"); - DEF_COMMAND("dump", proc_dump, 6, vars); + grn_proc_init_dump(ctx); /* Deprecated. Use "plugin_register" instead. */ DEF_VAR(vars[0], "path"); Added: lib/proc/proc_dump.c (+819 -0) 100644 =================================================================== --- /dev/null +++ lib/proc/proc_dump.c 2016-06-22 10:28:56 +0900 (8af0fb2) @@ -0,0 +1,819 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "../grn_proc.h" +#include "../grn_ctx_impl.h" +#include "../grn_db.h" +#include "../grn_str.h" + +#include <groonga/plugin.h> + +#define DUMP_COLUMNS "_id, _key, _value, *" + +static const size_t DUMP_FLUSH_THRESHOLD_SIZE = 256 * 1024; + +static void +dump_value(grn_ctx *ctx, grn_obj *outbuf, const char *value, int value_len) +{ + grn_obj escaped_value; + GRN_TEXT_INIT(&escaped_value, 0); + grn_text_esc(ctx, &escaped_value, value, value_len); + /* is no character escaped? */ + /* TODO false positive with spaces inside values */ + if (GRN_TEXT_LEN(&escaped_value) == value_len + 2) { + GRN_TEXT_PUT(ctx, outbuf, value, value_len); + } else { + GRN_TEXT_PUT(ctx, outbuf, + GRN_TEXT_VALUE(&escaped_value), GRN_TEXT_LEN(&escaped_value)); + } + grn_obj_close(ctx, &escaped_value); +} + +static void +dump_configs(grn_ctx *ctx, grn_obj *outbuf) +{ + grn_obj *config_cursor; + + config_cursor = grn_config_cursor_open(ctx); + if (!config_cursor) + return; + + while (grn_config_cursor_next(ctx, config_cursor)) { + const char *key; + uint32_t key_size; + const char *value; + uint32_t value_size; + + key_size = grn_config_cursor_get_key(ctx, config_cursor, &key); + value_size = grn_config_cursor_get_value(ctx, config_cursor, &value); + + GRN_TEXT_PUTS(ctx, outbuf, "config_set "); + dump_value(ctx, outbuf, key, key_size); + GRN_TEXT_PUTS(ctx, outbuf, " "); + dump_value(ctx, outbuf, value, value_size); + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + } + grn_obj_close(ctx, config_cursor); +} + +static void +dump_plugins(grn_ctx *ctx, grn_obj *outbuf) +{ + grn_obj plugin_names; + unsigned int i, n; + + GRN_TEXT_INIT(&plugin_names, GRN_OBJ_VECTOR); + + grn_plugin_get_names(ctx, &plugin_names); + + n = grn_vector_size(ctx, &plugin_names); + if (n == 0) { + GRN_OBJ_FIN(ctx, &plugin_names); + return; + } + + if (GRN_TEXT_LEN(outbuf) > 0) { + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + grn_ctx_output_flush(ctx, 0); + } + for (i = 0; i < n; i++) { + const char *name; + unsigned int name_size; + + name_size = grn_vector_get_element(ctx, &plugin_names, i, &name, NULL, NULL); + grn_text_printf(ctx, outbuf, "plugin_register %.*s\n", + (int)name_size, name); + } + + GRN_OBJ_FIN(ctx, &plugin_names); +} + +static void +dump_obj_name(grn_ctx *ctx, grn_obj *outbuf, grn_obj *obj) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_len; + name_len = grn_obj_name(ctx, obj, name, GRN_TABLE_MAX_KEY_SIZE); + dump_value(ctx, outbuf, name, name_len); +} + +static void +dump_column_name(grn_ctx *ctx, grn_obj *outbuf, grn_obj *column) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_len; + name_len = grn_column_name(ctx, column, name, GRN_TABLE_MAX_KEY_SIZE); + dump_value(ctx, outbuf, name, name_len); +} + +static void +dump_index_column_sources(grn_ctx *ctx, grn_obj *outbuf, grn_obj *column) +{ + grn_obj sources; + grn_id *source_ids; + int i, n; + + GRN_OBJ_INIT(&sources, GRN_BULK, 0, GRN_ID_NIL); + grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &sources); + + n = GRN_BULK_VSIZE(&sources) / sizeof(grn_id); + source_ids = (grn_id *)GRN_BULK_HEAD(&sources); + if (n > 0) { + GRN_TEXT_PUTC(ctx, outbuf, ' '); + } + for (i = 0; i < n; i++) { + grn_obj *source; + if ((source = grn_ctx_at(ctx, *source_ids))) { + if (i) { GRN_TEXT_PUTC(ctx, outbuf, ','); } + switch (source->header.type) { + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_HASH_KEY: + GRN_TEXT_PUT(ctx, outbuf, GRN_COLUMN_NAME_KEY, GRN_COLUMN_NAME_KEY_LEN); + break; + default: + dump_column_name(ctx, outbuf, source); + break; + } + } + source_ids++; + } + grn_obj_close(ctx, &sources); +} + +static void +dump_column(grn_ctx *ctx, grn_obj *outbuf , grn_obj *table, grn_obj *column) +{ + grn_obj *type; + grn_obj_flags default_flags = GRN_OBJ_PERSISTENT; + + type = grn_ctx_at(ctx, grn_obj_get_range(ctx, column)); + if (!type) { + // ERR(GRN_RANGE_ERROR, "couldn't get column's type object"); + return; + } + + GRN_TEXT_PUTS(ctx, outbuf, "column_create "); + dump_obj_name(ctx, outbuf, table); + GRN_TEXT_PUTC(ctx, outbuf, ' '); + dump_column_name(ctx, outbuf, column); + GRN_TEXT_PUTC(ctx, outbuf, ' '); + if (type->header.type == GRN_TYPE) { + default_flags |= type->header.flags; + } + grn_dump_column_create_flags(ctx, + column->header.flags & ~default_flags, + outbuf); + GRN_TEXT_PUTC(ctx, outbuf, ' '); + dump_obj_name(ctx, outbuf, type); + if (column->header.flags & GRN_OBJ_COLUMN_INDEX) { + dump_index_column_sources(ctx, outbuf, column); + } + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + + grn_obj_unlink(ctx, type); +} + +static void +dump_columns(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table, + grn_obj *pending_reference_columns) +{ + grn_hash *columns; + columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, + GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); + if (!columns) { + GRN_PLUGIN_ERROR(ctx, + GRN_NO_MEMORY_AVAILABLE, + "couldn't create a hash to hold columns"); + return; + } + + if (grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns) >= 0) { + grn_id *key; + + GRN_HASH_EACH(ctx, columns, id, &key, NULL, NULL, { + grn_obj *column; + if ((column = grn_ctx_at(ctx, *key))) { + if (grn_obj_is_index_column(ctx, column)) { + /* do nothing */ + } else if (grn_obj_is_reference_column(ctx, column)) { + GRN_PTR_PUT(ctx, pending_reference_columns, column); + } else { + dump_column(ctx, outbuf, table, column); + grn_obj_unlink(ctx, column); + } + } + }); + } + grn_hash_close(ctx, columns); +} + +static void +dump_record_column_vector(grn_ctx *ctx, grn_obj *outbuf, grn_id id, + grn_obj *column, grn_id range_id, grn_obj *buf) +{ + grn_obj *range; + + range = grn_ctx_at(ctx, range_id); + if (grn_obj_is_table(ctx, range) || + (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) == 0) { + GRN_OBJ_INIT(buf, GRN_UVECTOR, 0, range_id); + grn_obj_get_value(ctx, column, id, buf); + grn_text_otoj(ctx, outbuf, buf, NULL); + } else { + grn_obj_format *format_argument = NULL; + grn_obj_format format; + if (column->header.flags & GRN_OBJ_WITH_WEIGHT) { + format.flags = GRN_OBJ_FORMAT_WITH_WEIGHT; + format_argument = &format; + } + GRN_OBJ_INIT(buf, GRN_VECTOR, 0, range_id); + grn_obj_get_value(ctx, column, id, buf); + grn_text_otoj(ctx, outbuf, buf, format_argument); + } + grn_obj_unlink(ctx, range); + grn_obj_unlink(ctx, buf); +} + +static void +dump_records(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table) +{ + grn_obj **columns; + grn_id old_id = 0, id; + grn_table_cursor *cursor; + int i, ncolumns, n_use_columns; + grn_obj columnbuf, delete_commands, use_columns, column_name; + grn_bool have_index_column = GRN_FALSE; + grn_bool have_data_column = GRN_FALSE; + + switch (table->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + break; + default: + return; + } + + if (grn_table_size(ctx, table) == 0) { + return; + } + + GRN_PTR_INIT(&columnbuf, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_obj_columns(ctx, table, DUMP_COLUMNS, strlen(DUMP_COLUMNS), &columnbuf); + columns = (grn_obj **)GRN_BULK_HEAD(&columnbuf); + ncolumns = GRN_BULK_VSIZE(&columnbuf)/sizeof(grn_obj *); + + GRN_PTR_INIT(&use_columns, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_TEXT_INIT(&column_name, 0); + for (i = 0; i < ncolumns; i++) { + if (grn_obj_is_index_column(ctx, columns[i])) { + have_index_column = GRN_TRUE; + continue; + } + + if (columns[i]->header.type != GRN_ACCESSOR) { + have_data_column = GRN_TRUE; + } + + GRN_BULK_REWIND(&column_name); + grn_column_name_(ctx, columns[i], &column_name); + if (table->header.type != GRN_TABLE_NO_KEY && + GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_ID_LEN && + memcmp(GRN_TEXT_VALUE(&column_name), + GRN_COLUMN_NAME_ID, + GRN_COLUMN_NAME_ID_LEN) == 0) { + continue; + } + + if (table->header.type == GRN_TABLE_NO_KEY && + GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_KEY_LEN && + memcmp(GRN_TEXT_VALUE(&column_name), + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN) == 0) { + continue; + } + + GRN_PTR_PUT(ctx, &use_columns, columns[i]); + } + + if (have_index_column && !have_data_column) { + goto exit; + } + + if (GRN_TEXT_LEN(outbuf) > 0) { + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + } + + GRN_TEXT_PUTS(ctx, outbuf, "load --table "); + dump_obj_name(ctx, outbuf, table); + GRN_TEXT_PUTS(ctx, outbuf, "\n[\n"); + + n_use_columns = GRN_BULK_VSIZE(&use_columns) / sizeof(grn_obj *); + GRN_TEXT_PUTC(ctx, outbuf, '['); + for (i = 0; i < n_use_columns; i++) { + grn_obj *column; + column = *((grn_obj **)GRN_BULK_HEAD(&use_columns) + i); + if (i) { GRN_TEXT_PUTC(ctx, outbuf, ','); } + GRN_BULK_REWIND(&column_name); + grn_column_name_(ctx, column, &column_name); + grn_text_otoj(ctx, outbuf, &column_name, NULL); + } + GRN_TEXT_PUTS(ctx, outbuf, "],\n"); + + GRN_TEXT_INIT(&delete_commands, 0); + cursor = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, + GRN_CURSOR_BY_KEY); + for (i = 0; (id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL; + ++i, old_id = id) { + int is_value_column; + int j; + grn_obj buf; + if (i) { GRN_TEXT_PUTS(ctx, outbuf, ",\n"); } + if (table->header.type == GRN_TABLE_NO_KEY && old_id + 1 < id) { + grn_id current_id; + for (current_id = old_id + 1; current_id < id; current_id++) { + GRN_TEXT_PUTS(ctx, outbuf, "[],\n"); + GRN_TEXT_PUTS(ctx, &delete_commands, "delete --table "); + dump_obj_name(ctx, &delete_commands, table); + GRN_TEXT_PUTS(ctx, &delete_commands, " --id "); + grn_text_lltoa(ctx, &delete_commands, current_id); + GRN_TEXT_PUTC(ctx, &delete_commands, '\n'); + } + } + GRN_TEXT_PUTC(ctx, outbuf, '['); + for (j = 0; j < n_use_columns; j++) { + grn_id range; + grn_obj *column; + column = *((grn_obj **)GRN_BULK_HEAD(&use_columns) + j); + GRN_BULK_REWIND(&column_name); + grn_column_name_(ctx, column, &column_name); + if (GRN_TEXT_LEN(&column_name) == GRN_COLUMN_NAME_VALUE_LEN && + !memcmp(GRN_TEXT_VALUE(&column_name), + GRN_COLUMN_NAME_VALUE, + GRN_COLUMN_NAME_VALUE_LEN)) { + is_value_column = 1; + } else { + is_value_column = 0; + } + range = grn_obj_get_range(ctx, column); + + if (j) { GRN_TEXT_PUTC(ctx, outbuf, ','); } + switch (column->header.type) { + case GRN_COLUMN_VAR_SIZE: + case GRN_COLUMN_FIX_SIZE: + switch (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) { + case GRN_OBJ_COLUMN_VECTOR: + dump_record_column_vector(ctx, outbuf, id, column, range, &buf); + break; + case GRN_OBJ_COLUMN_SCALAR: + { + GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); + grn_obj_get_value(ctx, column, id, &buf); + grn_text_otoj(ctx, outbuf, &buf, NULL); + grn_obj_unlink(ctx, &buf); + } + break; + default: + GRN_PLUGIN_ERROR(ctx, + GRN_OPERATION_NOT_SUPPORTED, + "unsupported column type: %#x", + column->header.type); + break; + } + break; + case GRN_COLUMN_INDEX: + break; + case GRN_ACCESSOR: + { + GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); + grn_obj_get_value(ctx, column, id, &buf); + /* XXX maybe, grn_obj_get_range() should not unconditionally return + GRN_DB_INT32 when column is GRN_ACCESSOR and + GRN_ACCESSOR_GET_VALUE */ + if (is_value_column) { + buf.header.domain = grn_obj_get_range(ctx, table); + } + grn_text_otoj(ctx, outbuf, &buf, NULL); + grn_obj_unlink(ctx, &buf); + } + break; + default: + GRN_PLUGIN_ERROR(ctx, + GRN_OPERATION_NOT_SUPPORTED, + "unsupported header type %#x", + column->header.type); + break; + } + } + GRN_TEXT_PUTC(ctx, outbuf, ']'); + if (GRN_TEXT_LEN(outbuf) >= DUMP_FLUSH_THRESHOLD_SIZE) { + grn_ctx_output_flush(ctx, 0); + } + } + grn_table_cursor_close(ctx, cursor); + GRN_TEXT_PUTS(ctx, outbuf, "\n]\n"); + GRN_TEXT_PUT(ctx, outbuf, GRN_TEXT_VALUE(&delete_commands), + GRN_TEXT_LEN(&delete_commands)); + grn_obj_unlink(ctx, &delete_commands); + +exit : + grn_obj_unlink(ctx, &column_name); + grn_obj_unlink(ctx, &use_columns); + + for (i = 0; i < ncolumns; i++) { + grn_obj_unlink(ctx, columns[i]); + } + grn_obj_unlink(ctx, &columnbuf); +} + +static void +dump_table(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table, + grn_obj *pending_reference_columns) +{ + grn_obj *domain = NULL; + grn_id range_id; + grn_obj *range = NULL; + grn_table_flags flags; + grn_table_flags default_flags = GRN_OBJ_PERSISTENT; + grn_obj *default_tokenizer; + grn_obj *normalizer; + grn_obj *token_filters; + + switch (table->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + domain = grn_ctx_at(ctx, table->header.domain); + break; + default: + break; + } + + if (GRN_TEXT_LEN(outbuf) > 0) { + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + grn_ctx_output_flush(ctx, 0); + } + + grn_table_get_info(ctx, table, + &flags, + NULL, + &default_tokenizer, + &normalizer, + &token_filters); + + GRN_TEXT_PUTS(ctx, outbuf, "table_create "); + dump_obj_name(ctx, outbuf, table); + GRN_TEXT_PUTC(ctx, outbuf, ' '); + grn_dump_table_create_flags(ctx, + flags & ~default_flags, + outbuf); + if (domain) { + GRN_TEXT_PUTC(ctx, outbuf, ' '); + dump_obj_name(ctx, outbuf, domain); + } + range_id = grn_obj_get_range(ctx, table); + if (range_id != GRN_ID_NIL) { + range = grn_ctx_at(ctx, range_id); + if (!range) { + // ERR(GRN_RANGE_ERROR, "couldn't get table's value_type object"); + return; + } + if (table->header.type != GRN_TABLE_NO_KEY) { + GRN_TEXT_PUTC(ctx, outbuf, ' '); + } else { + GRN_TEXT_PUTS(ctx, outbuf, " --value_type "); + } + dump_obj_name(ctx, outbuf, range); + grn_obj_unlink(ctx, range); + } + if (default_tokenizer) { + GRN_TEXT_PUTS(ctx, outbuf, " --default_tokenizer "); + dump_obj_name(ctx, outbuf, default_tokenizer); + } + if (normalizer) { + GRN_TEXT_PUTS(ctx, outbuf, " --normalizer "); + dump_obj_name(ctx, outbuf, normalizer); + } + if (table->header.type != GRN_TABLE_NO_KEY) { + int n_token_filters; + + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + if (n_token_filters > 0) { + int i; + GRN_TEXT_PUTS(ctx, outbuf, " --token_filters "); + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); + if (i > 0) { + GRN_TEXT_PUTC(ctx, outbuf, ','); + } + dump_obj_name(ctx, outbuf, token_filter); + } + } + } + + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + + if (domain) { + grn_obj_unlink(ctx, domain); + } + + dump_columns(ctx, outbuf, table, pending_reference_columns); +} + +static void +dump_pending_columns(grn_ctx *ctx, grn_obj *outbuf, grn_obj *pending_columns) +{ + size_t i, n_columns; + + n_columns = GRN_BULK_VSIZE(pending_columns) / sizeof(grn_obj *); + if (n_columns == 0) { + return; + } + + if (GRN_TEXT_LEN(outbuf) > 0) { + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + grn_ctx_output_flush(ctx, 0); + } + + for (i = 0; i < n_columns; i++) { + grn_obj *table, *column; + + column = GRN_PTR_VALUE_AT(pending_columns, i); + table = grn_ctx_at(ctx, column->header.domain); + dump_column(ctx, outbuf, table, column); + grn_obj_unlink(ctx, column); + grn_obj_unlink(ctx, table); + } +} + +static void +dump_schema(grn_ctx *ctx, grn_obj *outbuf) +{ + grn_obj *db; + grn_table_cursor *cur; + grn_id id; + grn_obj pending_reference_columns; + + db = grn_ctx_db(ctx); + cur = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, + GRN_CURSOR_BY_ID); + if (!cur) { + return; + } + + GRN_PTR_INIT(&pending_reference_columns, GRN_OBJ_VECTOR, GRN_ID_NIL); + while ((id = grn_table_cursor_next(ctx, cur)) != GRN_ID_NIL) { + grn_obj *object; + + if ((object = grn_ctx_at(ctx, id))) { + switch (object->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + dump_table(ctx, outbuf, object, &pending_reference_columns); + break; + default: + break; + } + grn_obj_unlink(ctx, object); + } else { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + } + grn_table_cursor_close(ctx, cur); + + dump_pending_columns(ctx, outbuf, &pending_reference_columns); + grn_obj_close(ctx, &pending_reference_columns); +} + +static void +dump_selected_tables_records(grn_ctx *ctx, grn_obj *outbuf, grn_obj *tables) +{ + const char *p, *e; + + p = GRN_TEXT_VALUE(tables); + e = p + GRN_TEXT_LEN(tables); + while (p < e) { + int len; + grn_obj *table; + const char *token, *token_e; + + if ((len = grn_isspace(p, ctx->encoding))) { + p += len; + continue; + } + + token = p; + if (!(('a' <= *p && *p <= 'z') || + ('A' <= *p && *p <= 'Z') || + (*p == '_'))) { + while (p < e && !grn_isspace(p, ctx->encoding)) { + p++; + } + GRN_LOG(ctx, GRN_LOG_WARNING, "invalid table name is ignored: <%.*s>\n", + (int)(p - token), token); + continue; + } + while (p < e && + (('a' <= *p && *p <= 'z') || + ('A' <= *p && *p <= 'Z') || + ('0' <= *p && *p <= '9') || + (*p == '_'))) { + p++; + } + token_e = p; + while (p < e && (len = grn_isspace(p, ctx->encoding))) { + p += len; + continue; + } + if (p < e && *p == ',') { + p++; + } + + if ((table = grn_ctx_get(ctx, token, token_e - token))) { + dump_records(ctx, outbuf, table); + grn_obj_unlink(ctx, table); + } else { + GRN_LOG(ctx, GRN_LOG_WARNING, + "nonexistent table name is ignored: <%.*s>\n", + (int)(token_e - token), token); + } + } +} + +static void +dump_all_records(grn_ctx *ctx, grn_obj *outbuf) +{ + grn_obj *db; + grn_table_cursor *cur; + + db = grn_ctx_db(ctx); + if ((cur = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, + GRN_CURSOR_BY_ID))) { + grn_id id; + + while ((id = grn_table_cursor_next(ctx, cur)) != GRN_ID_NIL) { + grn_obj *table; + + if ((table = grn_ctx_at(ctx, id))) { + dump_records(ctx, outbuf, table); + grn_obj_unlink(ctx, table); + } else { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + } + grn_table_cursor_close(ctx, cur); + } +} + +static void +dump_indexes(grn_ctx *ctx, grn_obj *outbuf) +{ + grn_obj *db; + grn_table_cursor *cursor; + grn_id id; + grn_bool is_first_index_column = GRN_TRUE; + + db = grn_ctx_db(ctx); + cursor = grn_table_cursor_open(ctx, db, NULL, 0, NULL, 0, 0, -1, + GRN_CURSOR_BY_ID); + if (!cursor) { + return; + } + + while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) { + grn_obj *object; + + object = grn_ctx_at(ctx, id); + if (!object) { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + continue; + } + + if (object->header.type == GRN_COLUMN_INDEX) { + grn_obj *table; + grn_obj *column = object; + + if (is_first_index_column && GRN_TEXT_LEN(outbuf) > 0) { + GRN_TEXT_PUTC(ctx, outbuf, '\n'); + } + is_first_index_column = GRN_FALSE; + + table = grn_ctx_at(ctx, column->header.domain); + dump_column(ctx, outbuf, table, column); + grn_obj_unlink(ctx, table); + } + grn_obj_unlink(ctx, object); + } + grn_table_cursor_close(ctx, cursor); +} + +static grn_obj * +command_dump(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) +{ + grn_obj *outbuf = ctx->impl->output.buf; + grn_obj *tables; + grn_bool is_dump_plugins; + grn_bool is_dump_schema; + grn_bool is_dump_records; + grn_bool is_dump_indexes; + grn_bool is_dump_configs; + + tables = grn_plugin_proc_get_var(ctx, user_data, "tables", -1); + is_dump_plugins = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_plugins", -1, + GRN_TRUE); + is_dump_schema = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_schema", -1, + GRN_TRUE); + is_dump_records = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_records", -1, + GRN_TRUE); + is_dump_indexes = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_indexes", -1, + GRN_TRUE); + is_dump_configs = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_configs", -1, + GRN_TRUE); + + grn_ctx_set_output_type(ctx, GRN_CONTENT_GROONGA_COMMAND_LIST); + + if (is_dump_configs) { + dump_configs(ctx, outbuf); + } + if (is_dump_plugins) { + dump_plugins(ctx, outbuf); + } + if (is_dump_schema) { + dump_schema(ctx, outbuf); + } + if (is_dump_records) { + /* To update index columns correctly, we first create the whole schema, then + load non-derivative records, while skipping records of index columns. That + way, groonga will silently do the job of updating index columns for us. */ + if (GRN_TEXT_LEN(tables) > 0) { + dump_selected_tables_records(ctx, outbuf, tables); + } else { + dump_all_records(ctx, outbuf); + } + } + if (is_dump_indexes) { + dump_indexes(ctx, outbuf); + } + + /* remove the last newline because another one will be added by the caller. + maybe, the caller of proc functions currently doesn't consider the + possibility of multiple-line output from proc functions. */ + if (GRN_BULK_VSIZE(outbuf) > 0) { + grn_bulk_truncate(ctx, outbuf, GRN_BULK_VSIZE(outbuf) - 1); + } + return NULL; +} + +void +grn_proc_init_dump(grn_ctx *ctx) +{ + grn_expr_var vars[6]; + + grn_plugin_expr_var_init(ctx, &(vars[0]), "tables", -1); + grn_plugin_expr_var_init(ctx, &(vars[1]), "dump_plugins", -1); + grn_plugin_expr_var_init(ctx, &(vars[2]), "dump_schema", -1); + grn_plugin_expr_var_init(ctx, &(vars[3]), "dump_records", -1); + grn_plugin_expr_var_init(ctx, &(vars[4]), "dump_indexes", -1); + grn_plugin_expr_var_init(ctx, &(vars[5]), "dump_configs", -1); + grn_plugin_command_create(ctx, + "dump", -1, + command_dump, + 6, + vars); +} Modified: lib/proc/sources.am (+1 -0) =================================================================== --- lib/proc/sources.am 2016-06-22 10:28:28 +0900 (83045e9) +++ lib/proc/sources.am 2016-06-22 10:28:56 +0900 (f3d0cc0) @@ -1,6 +1,7 @@ libgrnproc_la_SOURCES = \ proc_column.c \ proc_config.c \ + proc_dump.c \ proc_fuzzy_search.c \ proc_lock.c \ proc_object.c \ -------------- next part -------------- HTML����������������������������...Download