Kouhei Sutou
null+****@clear*****
Sat Apr 23 22:13:39 JST 2016
Kouhei Sutou 2016-04-23 22:13:39 +0900 (Sat, 23 Apr 2016) New Revision: 6f3b998d6f112aecaddf04960e757d800bafff54 https://github.com/pgroonga/pgroonga/commit/6f3b998d6f112aecaddf04960e757d800bafff54 Message: Add pgroonga.highlight_html Added files: expected/function/command/after-truncate.out expected/function/command/select.out expected/function/highlight-html/different-size-keyword.out expected/function/highlight-html/escape.out expected/function/highlight-html/multibyte.out expected/function/highlight-html/multiple-keywords.out expected/function/highlight-html/one-keyword.out expected/function/snippet-html/keywords/multiple.out expected/function/snippet-html/keywords/one.out sql/function/command/after-truncate.sql sql/function/command/select.sql sql/function/highlight-html/different-size-keyword.sql sql/function/highlight-html/escape.sql sql/function/highlight-html/multibyte.sql sql/function/highlight-html/multiple-keywords.sql sql/function/highlight-html/one-keyword.sql sql/function/snippet-html/keywords/multiple.sql sql/function/snippet-html/keywords/one.sql src/pgrn_highlight_html.c src/pgrn_highlight_html.h Modified files: CMakeLists.txt Makefile pgroonga--1.0.6--1.0.7.sql pgroonga.sql src/pgroonga.c src/pgroonga.h Modified: CMakeLists.txt (+1 -0) =================================================================== --- CMakeLists.txt 2016-04-23 21:35:46 +0900 (c3ef069) +++ CMakeLists.txt 2016-04-23 22:13:39 +0900 (520aa6c) @@ -62,6 +62,7 @@ set(PGRN_SOURCES "src/pgrn_create.c" "src/pgrn_global.c" "src/pgrn_groonga.c" + "src/pgrn_highlight_html.c" "src/pgrn_jsonb.c" "src/pgrn_options.c" "src/pgrn_snippet_html.c" Modified: Makefile (+1 -0) =================================================================== --- Makefile 2016-04-23 21:35:46 +0900 (f161ea8) +++ Makefile 2016-04-23 22:13:39 +0900 (b2e5e1d) @@ -9,6 +9,7 @@ SRCS = \ src/pgrn_create.c \ src/pgrn_global.c \ src/pgrn_groonga.c \ + src/pgrn_highlight_html.c \ src/pgrn_jsonb.c \ src/pgrn_options.c \ src/pgrn_snippet_html.c \ Added: expected/function/command/after-truncate.out (+18 -0) 100644 =================================================================== --- /dev/null +++ expected/function/command/after-truncate.out 2016-04-23 22:13:39 +0900 (a7166f4) @@ -0,0 +1,18 @@ +CREATE TABLE memos ( + content text +); +CREATE INDEX pgroonga_index ON memos USING pgroonga (content); +TRUNCATE memos; +INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES ('Groonga is fast full text search engine.'); +INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.'); +SELECT pgroonga.command('select ' || + pgroonga.table_name('pgroonga_index') || + ' --output_columns content')::json->>1 + AS body; + body +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [[[3],[["content","LongText"]],["PostgreSQL is a RDBMS."],["Groonga is fast full text search engine."],["PGroonga is a PostgreSQL extension that uses Groonga."]]] +(1 row) + +DROP TABLE memos; Added: expected/function/command/select.out (+17 -0) 100644 =================================================================== --- /dev/null +++ expected/function/command/select.out 2016-04-23 22:13:39 +0900 (cbffc30) @@ -0,0 +1,17 @@ +CREATE TABLE memos ( + content text +); +INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES ('Groonga is fast full text search engine.'); +INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.'); +CREATE INDEX pgroonga_index ON memos USING pgroonga (content); +SELECT pgroonga.command('select ' || + pgroonga.table_name('pgroonga_index') || + ' --output_columns content')::json->>1 + AS body; + body +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [[[3],[["content","LongText"]],["PostgreSQL is a RDBMS."],["Groonga is fast full text search engine."],["PGroonga is a PostgreSQL extension that uses Groonga."]]] +(1 row) + +DROP TABLE memos; Added: expected/function/highlight-html/different-size-keyword.out (+8 -0) 100644 =================================================================== --- /dev/null +++ expected/function/highlight-html/different-size-keyword.out 2016-04-23 22:13:39 +0900 (f375f55) @@ -0,0 +1,8 @@ +SELECT pgroonga.highlight_html( + '100㍉メートル', + ARRAY['ミリ']); + highlight_html +-------------------------------------------- + 100<span class="keyword">㍉</span>メートル +(1 row) + Added: expected/function/highlight-html/escape.out (+9 -0) 100644 =================================================================== --- /dev/null +++ expected/function/highlight-html/escape.out 2016-04-23 22:13:39 +0900 (53578fc) @@ -0,0 +1,9 @@ +SELECT pgroonga.highlight_html( + '<p>Groonga is a fast and accurate full text search engine based on ' || + 'inverted index.</p>', + ARRAY['Groonga']); + highlight_html +--------------------------------------------------------------------------------------------------------------------------------- + <p><span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index.</p> +(1 row) + Added: expected/function/highlight-html/multibyte.out (+9 -0) 100644 =================================================================== --- /dev/null +++ expected/function/highlight-html/multibyte.out 2016-04-23 22:13:39 +0900 (b0bbe00) @@ -0,0 +1,9 @@ +SELECT pgroonga.highlight_html( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); + highlight_html +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Groongaは転置索引を用いた高速・高精度な全文<span class="keyword">検索</span>エンジンであり、登録された文書をすぐに<span class="keyword">検索</span>結果に反映できます。 +(1 row) + Added: expected/function/highlight-html/multiple-keywords.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/highlight-html/multiple-keywords.out 2016-04-23 22:13:39 +0900 (61776c2) @@ -0,0 +1,19 @@ +SELECT pgroonga.highlight_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); + highlight_html +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Groonga is a <span class="keyword">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Groonga allows updates without read locks. These characteristics result in superior performance on real-time applications.\n\nGroonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class="keyword">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga can cover weakness of row-oriented systems. +(1 row) + Added: expected/function/highlight-html/one-keyword.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/function/highlight-html/one-keyword.out 2016-04-23 22:13:39 +0900 (c771ce7) @@ -0,0 +1,19 @@ +SELECT pgroonga.highlight_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); + highlight_html +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + <span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class="keyword">Groonga</span> is that a newly registered document instantly appears in search results. Also, <span class="keyword">Groonga</span> allows updates without read locks. These characteristics result in superior performance on real-time applications.\n\n<span class="keyword">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class="keyword">Groonga</span> can cover weakness of row-oriented systems. +(1 row) + Added: expected/function/snippet-html/keywords/multiple.out (+20 -0) 100644 =================================================================== --- /dev/null +++ expected/function/snippet-html/keywords/multiple.out 2016-04-23 22:13:39 +0900 (7e36d5f) @@ -0,0 +1,20 @@ +SELECT unnest(pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL'])); + unnest +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Groonga is a <span class="keyword">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Gro + ase management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class="keyword">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga +(2 rows) + Added: expected/function/snippet-html/keywords/one.out (+21 -0) 100644 =================================================================== --- /dev/null +++ expected/function/snippet-html/keywords/one.out 2016-04-23 22:13:39 +0900 (63e3320) @@ -0,0 +1,21 @@ +SELECT unnest(pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga'])); + unnest +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + <span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class="keyword">Groonga</span> is that a newly registered document instantly appears in search results. Also, Gro + t read locks. These characteristics result in superior performance on real-time applications.\n\n<span class="keyword">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-orien + ted systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class="keyword">Groonga</span> can cover weakness of row-oriented systems. +(3 rows) + Modified: pgroonga--1.0.6--1.0.7.sql (+6 -0) =================================================================== --- pgroonga--1.0.6--1.0.7.sql 2016-04-23 21:35:46 +0900 (e69de29) +++ pgroonga--1.0.6--1.0.7.sql 2016-04-23 22:13:39 +0900 (87416d7) @@ -0,0 +1,6 @@ +CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[]) + RETURNS text + AS 'MODULE_PATHNAME', 'pgroonga_highlight_html' + LANGUAGE C + VOLATILE + STRICT; Modified: pgroonga.sql (+7 -0) =================================================================== --- pgroonga.sql 2016-04-23 21:35:46 +0900 (4cfc3c1) +++ pgroonga.sql 2016-04-23 22:13:39 +0900 (9c30f6d) @@ -30,6 +30,13 @@ CREATE FUNCTION pgroonga.snippet_html(target text, keywords text[]) VOLATILE STRICT; +CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[]) + RETURNS text + AS 'MODULE_PATHNAME', 'pgroonga_highlight_html' + LANGUAGE C + VOLATILE + STRICT; + CREATE FUNCTION pgroonga.match_term(target text, term text) RETURNS bool AS 'MODULE_PATHNAME', 'pgroonga_match_term_text' Added: sql/function/command/after-truncate.sql (+18 -0) 100644 =================================================================== --- /dev/null +++ sql/function/command/after-truncate.sql 2016-04-23 22:13:39 +0900 (eba0d6f) @@ -0,0 +1,18 @@ +CREATE TABLE memos ( + content text +); + +CREATE INDEX pgroonga_index ON memos USING pgroonga (content); + +TRUNCATE memos; + +INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES ('Groonga is fast full text search engine.'); +INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.'); + +SELECT pgroonga.command('select ' || + pgroonga.table_name('pgroonga_index') || + ' --output_columns content')::json->>1 + AS body; + +DROP TABLE memos; Added: sql/function/command/select.sql (+16 -0) 100644 =================================================================== --- /dev/null +++ sql/function/command/select.sql 2016-04-23 22:13:39 +0900 (742202a) @@ -0,0 +1,16 @@ +CREATE TABLE memos ( + content text +); + +INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.'); +INSERT INTO memos VALUES ('Groonga is fast full text search engine.'); +INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.'); + +CREATE INDEX pgroonga_index ON memos USING pgroonga (content); + +SELECT pgroonga.command('select ' || + pgroonga.table_name('pgroonga_index') || + ' --output_columns content')::json->>1 + AS body; + +DROP TABLE memos; Added: sql/function/highlight-html/different-size-keyword.sql (+3 -0) 100644 =================================================================== --- /dev/null +++ sql/function/highlight-html/different-size-keyword.sql 2016-04-23 22:13:39 +0900 (66032ce) @@ -0,0 +1,3 @@ +SELECT pgroonga.highlight_html( + '100㍉メートル', + ARRAY['ミリ']); Added: sql/function/highlight-html/escape.sql (+4 -0) 100644 =================================================================== --- /dev/null +++ sql/function/highlight-html/escape.sql 2016-04-23 22:13:39 +0900 (e4c8172) @@ -0,0 +1,4 @@ +SELECT pgroonga.highlight_html( + '<p>Groonga is a fast and accurate full text search engine based on ' || + 'inverted index.</p>', + ARRAY['Groonga']); Added: sql/function/highlight-html/multibyte.sql (+4 -0) 100644 =================================================================== --- /dev/null +++ sql/function/highlight-html/multibyte.sql 2016-04-23 22:13:39 +0900 (fee281c) @@ -0,0 +1,4 @@ +SELECT pgroonga.highlight_html( + 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || + '登録された文書をすぐに検索結果に反映できます。', + ARRAY['検索']); Added: sql/function/highlight-html/multiple-keywords.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/highlight-html/multiple-keywords.sql 2016-04-23 22:13:39 +0900 (709aaba) @@ -0,0 +1,14 @@ +SELECT pgroonga.highlight_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); Added: sql/function/highlight-html/one-keyword.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/highlight-html/one-keyword.sql 2016-04-23 22:13:39 +0900 (9eb45b1) @@ -0,0 +1,14 @@ +SELECT pgroonga.highlight_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); Added: sql/function/snippet-html/keywords/multiple.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/snippet-html/keywords/multiple.sql 2016-04-23 22:13:39 +0900 (e6a7c57) @@ -0,0 +1,14 @@ +SELECT unnest(pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL'])); Added: sql/function/snippet-html/keywords/one.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/function/snippet-html/keywords/one.sql 2016-04-23 22:13:39 +0900 (e3a30df) @@ -0,0 +1,14 @@ +SELECT unnest(pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga'])); Added: src/pgrn_highlight_html.c (+198 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_highlight_html.c 2016-04-23 22:13:39 +0900 (e4e7f95) @@ -0,0 +1,198 @@ +#include "pgroonga.h" + +#include "pgrn_global.h" +#include "pgrn_groonga.h" +#include "pgrn_highlight_html.h" + +#include <catalog/pg_type.h> +#include <utils/array.h> +#include <utils/builtins.h> + +static grn_ctx *ctx = &PGrnContext; +static grn_obj *PGrnKeywordsTable = NULL; +static grn_obj PGrnKeywordIDs; + +void +PGrnInitializeHighlightHTML(void) +{ + PGrnKeywordsTable = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), + 0); + grn_obj_set_info(ctx, + PGrnKeywordsTable, + GRN_INFO_NORMALIZER, + grn_ctx_get(ctx, "NormalizerAuto", -1)); + + GRN_RECORD_INIT(&PGrnKeywordIDs, + GRN_OBJ_VECTOR, + grn_obj_id(ctx, PGrnKeywordsTable)); +} + +void +PGrnFinalizeHighlightHTML(void) +{ + if (!PGrnKeywordsTable) + return; + + GRN_OBJ_FIN(ctx, &PGrnKeywordIDs); + + grn_obj_close(ctx, PGrnKeywordsTable); + PGrnKeywordsTable = NULL; +} + +static void +PGrnKeywordsTableUpdate(ArrayType *keywords) +{ + { + int i, n; + + GRN_BULK_REWIND(&PGrnKeywordIDs); + + n = ARR_DIMS(keywords)[0]; + for (i = 1; i <= n; i++) + { + Datum keywordDatum; + text *keyword; + bool isNULL; + grn_id id; + + keywordDatum = array_ref(keywords, 1, &i, -1, -1, false, + 'i', &isNULL); + if (isNULL) + continue; + + keyword = DatumGetTextPP(keywordDatum); + id = grn_table_add(ctx, PGrnKeywordsTable, + VARDATA_ANY(keyword), + VARSIZE_ANY_EXHDR(keyword), + NULL); + if (id == GRN_ID_NIL) + continue; + GRN_RECORD_PUT(ctx, &PGrnKeywordIDs, id); + } + } + + { + grn_table_cursor *cursor; + grn_id id; + size_t nIDs; + + cursor = grn_table_cursor_open(ctx, + PGrnKeywordsTable, + NULL, 0, + NULL, 0, + 0, -1, 0); + if (!cursor) { + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("pgroonga: " + "failed to create cursor for PGrnKeywordsTable: %s", + ctx->errbuf))); + } + + nIDs = GRN_BULK_VSIZE(&PGrnKeywordIDs) / sizeof(grn_id); + while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) + { + size_t i; + bool specified = false; + + for (i = 0; i < nIDs; i++) + { + if (id == GRN_RECORD_VALUE_AT(&PGrnKeywordIDs, i)) + { + specified = true; + break; + } + } + + if (specified) + continue; + + grn_table_cursor_delete(ctx, cursor); + } + + grn_table_cursor_close(ctx, cursor); + } +} + +static text * +PGrnHighlightHTML(text *target) +{ + grn_obj buffer; + text *highlighted; + + GRN_TEXT_INIT(&buffer, 0); + + { + const char *openTag = "<span class=\"keyword\">"; + size_t openTagLength = strlen(openTag); + const char *closeTag = "</span>"; + size_t closeTagLength = strlen(closeTag); + const char *string; + size_t stringLength; + + string = VARDATA_ANY(target); + stringLength = VARSIZE_ANY_EXHDR(target); + + while (stringLength > 0) { +#define MAX_N_HITS 16 + grn_pat_scan_hit hits[MAX_N_HITS]; + const char *rest; + int i, nHits; + size_t previous = 0; + size_t chunkLength; + + nHits = grn_pat_scan(ctx, (grn_pat *)PGrnKeywordsTable, + string, stringLength, + hits, MAX_N_HITS, &rest); + for (i = 0; i < nHits; i++) { + if ((hits[i].offset - previous) > 0) { + grn_text_escape_xml(ctx, + &buffer, + string + previous, + hits[i].offset - previous); + } + GRN_TEXT_PUT(ctx, &buffer, openTag, openTagLength); + grn_text_escape_xml(ctx, + &buffer, + string + hits[i].offset, + hits[i].length); + GRN_TEXT_PUT(ctx, &buffer, closeTag, closeTagLength); + previous = hits[i].offset + hits[i].length; + } + + chunkLength = rest - string; + if ((chunkLength - previous) > 0) { + grn_text_escape_xml(ctx, + &buffer, + string + previous, + stringLength - previous); + } + stringLength -= chunkLength; + string = rest; +#undef MAX_N_HITS + } + } + + highlighted = cstring_to_text_with_len(GRN_TEXT_VALUE(&buffer), + GRN_TEXT_LEN(&buffer)); + GRN_OBJ_FIN(ctx, &buffer); + return highlighted; +} + +/** + * pgroonga.highlight_html(target text, keywords text[]) : text + */ +Datum +pgroonga_highlight_html(PG_FUNCTION_ARGS) +{ + text *target = PG_GETARG_TEXT_PP(0); + ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1); + text *highlighted; + + PGrnKeywordsTableUpdate(keywords); + highlighted = PGrnHighlightHTML(target); + + PG_RETURN_TEXT_P(highlighted); +} Added: src/pgrn_highlight_html.h (+4 -0) 100644 =================================================================== --- /dev/null +++ src/pgrn_highlight_html.h 2016-04-23 22:13:39 +0900 (c1da617) @@ -0,0 +1,4 @@ +#pragma once + +void PGrnInitializeHighlightHTML(void); +void PGrnFinalizeHighlightHTML(void); Modified: src/pgroonga.c (+6 -0) =================================================================== --- src/pgroonga.c 2016-04-23 21:35:46 +0900 (abba946) +++ src/pgroonga.c 2016-04-23 22:13:39 +0900 (7ffbc99) @@ -6,6 +6,7 @@ #include "pgrn_create.h" #include "pgrn_global.h" #include "pgrn_groonga.h" +#include "pgrn_highlight_html.h" #include "pgrn_jsonb.h" #include "pgrn_options.h" #include "pgrn_search.h" @@ -135,6 +136,7 @@ PG_FUNCTION_INFO_V1(pgroonga_score); PG_FUNCTION_INFO_V1(pgroonga_table_name); PG_FUNCTION_INFO_V1(pgroonga_command); PG_FUNCTION_INFO_V1(pgroonga_snippet_html); +PG_FUNCTION_INFO_V1(pgroonga_highlight_html); PG_FUNCTION_INFO_V1(pgroonga_match_term_text); PG_FUNCTION_INFO_V1(pgroonga_match_term_text_array); @@ -261,6 +263,8 @@ PGrnOnProcExit(int code, Datum arg) { grn_obj *db; + PGrnFinalizeHighlightHTML(); + PGrnFinalizeJSONB(); PGrnFinalizeMatchSequentialSearchData(); @@ -364,6 +368,8 @@ _PG_init(void) PGrnInitializePrefixRKSequentialSearchData(); PGrnInitializeJSONB(); + + PGrnInitializeHighlightHTML(); } static grn_id Modified: src/pgroonga.h (+1 -0) =================================================================== --- src/pgroonga.h 2016-04-23 21:35:46 +0900 (b29bd62) +++ src/pgroonga.h 2016-04-23 22:13:39 +0900 (f916b86) @@ -56,6 +56,7 @@ extern Datum PGDLLEXPORT pgroonga_score(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_highlight_html(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_match_term_text(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_match_term_text_array(PG_FUNCTION_ARGS); -------------- next part -------------- HTML����������������������������...Download