[Groonga-commit] pgroonga/pgroonga at 6f3b998 [master] Add pgroonga.highlight_html

Zurück zum Archiv-Index

Kouhei Sutou null+****@clear*****
Sat Apr 23 22:13:39 JST 2016


Kouhei Sutou	2016-04-23 22:13:39 +0900 (Sat, 23 Apr 2016)

  New Revision: 6f3b998d6f112aecaddf04960e757d800bafff54
  https://github.com/pgroonga/pgroonga/commit/6f3b998d6f112aecaddf04960e757d800bafff54

  Message:
    Add pgroonga.highlight_html

  Added files:
    expected/function/command/after-truncate.out
    expected/function/command/select.out
    expected/function/highlight-html/different-size-keyword.out
    expected/function/highlight-html/escape.out
    expected/function/highlight-html/multibyte.out
    expected/function/highlight-html/multiple-keywords.out
    expected/function/highlight-html/one-keyword.out
    expected/function/snippet-html/keywords/multiple.out
    expected/function/snippet-html/keywords/one.out
    sql/function/command/after-truncate.sql
    sql/function/command/select.sql
    sql/function/highlight-html/different-size-keyword.sql
    sql/function/highlight-html/escape.sql
    sql/function/highlight-html/multibyte.sql
    sql/function/highlight-html/multiple-keywords.sql
    sql/function/highlight-html/one-keyword.sql
    sql/function/snippet-html/keywords/multiple.sql
    sql/function/snippet-html/keywords/one.sql
    src/pgrn_highlight_html.c
    src/pgrn_highlight_html.h
  Modified files:
    CMakeLists.txt
    Makefile
    pgroonga--1.0.6--1.0.7.sql
    pgroonga.sql
    src/pgroonga.c
    src/pgroonga.h

  Modified: CMakeLists.txt (+1 -0)
===================================================================
--- CMakeLists.txt    2016-04-23 21:35:46 +0900 (c3ef069)
+++ CMakeLists.txt    2016-04-23 22:13:39 +0900 (520aa6c)
@@ -62,6 +62,7 @@ set(PGRN_SOURCES
   "src/pgrn_create.c"
   "src/pgrn_global.c"
   "src/pgrn_groonga.c"
+  "src/pgrn_highlight_html.c"
   "src/pgrn_jsonb.c"
   "src/pgrn_options.c"
   "src/pgrn_snippet_html.c"

  Modified: Makefile (+1 -0)
===================================================================
--- Makefile    2016-04-23 21:35:46 +0900 (f161ea8)
+++ Makefile    2016-04-23 22:13:39 +0900 (b2e5e1d)
@@ -9,6 +9,7 @@ SRCS =						\
 	src/pgrn_create.c			\
 	src/pgrn_global.c			\
 	src/pgrn_groonga.c			\
+	src/pgrn_highlight_html.c		\
 	src/pgrn_jsonb.c			\
 	src/pgrn_options.c			\
 	src/pgrn_snippet_html.c			\

  Added: expected/function/command/after-truncate.out (+18 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/command/after-truncate.out    2016-04-23 22:13:39 +0900 (a7166f4)
@@ -0,0 +1,18 @@
+CREATE TABLE memos (
+  content text
+);
+CREATE INDEX pgroonga_index ON memos USING pgroonga (content);
+TRUNCATE memos;
+INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES ('Groonga is fast full text search engine.');
+INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.');
+SELECT pgroonga.command('select ' ||
+                        pgroonga.table_name('pgroonga_index') ||
+                        ' --output_columns content')::json->>1
+    AS body;
+                                                                                body                                                                                
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [[[3],[["content","LongText"]],["PostgreSQL is a RDBMS."],["Groonga is fast full text search engine."],["PGroonga is a PostgreSQL extension that uses Groonga."]]]
+(1 row)
+
+DROP TABLE memos;

  Added: expected/function/command/select.out (+17 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/command/select.out    2016-04-23 22:13:39 +0900 (cbffc30)
@@ -0,0 +1,17 @@
+CREATE TABLE memos (
+  content text
+);
+INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES ('Groonga is fast full text search engine.');
+INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.');
+CREATE INDEX pgroonga_index ON memos USING pgroonga (content);
+SELECT pgroonga.command('select ' ||
+                        pgroonga.table_name('pgroonga_index') ||
+                        ' --output_columns content')::json->>1
+    AS body;
+                                                                                body                                                                                
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [[[3],[["content","LongText"]],["PostgreSQL is a RDBMS."],["Groonga is fast full text search engine."],["PGroonga is a PostgreSQL extension that uses Groonga."]]]
+(1 row)
+
+DROP TABLE memos;

  Added: expected/function/highlight-html/different-size-keyword.out (+8 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/highlight-html/different-size-keyword.out    2016-04-23 22:13:39 +0900 (f375f55)
@@ -0,0 +1,8 @@
+SELECT pgroonga.highlight_html(
+  '100㍉メートル',
+  ARRAY['ミリ']);
+               highlight_html               
+--------------------------------------------
+ 100<span class="keyword">㍉</span>メートル
+(1 row)
+

  Added: expected/function/highlight-html/escape.out (+9 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/highlight-html/escape.out    2016-04-23 22:13:39 +0900 (53578fc)
@@ -0,0 +1,9 @@
+SELECT pgroonga.highlight_html(
+  '<p>Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index.</p>',
+  ARRAY['Groonga']);
+                                                         highlight_html                                                          
+---------------------------------------------------------------------------------------------------------------------------------
+ &lt;p&gt;<span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index.&lt;/p&gt;
+(1 row)
+

  Added: expected/function/highlight-html/multibyte.out (+9 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/highlight-html/multibyte.out    2016-04-23 22:13:39 +0900 (b0bbe00)
@@ -0,0 +1,9 @@
+SELECT pgroonga.highlight_html(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);
+                                                                             highlight_html                                                                              
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Groongaは転置索引を用いた高速・高精度な全文<span class="keyword">検索</span>エンジンであり、登録された文書をすぐに<span class="keyword">検索</span>結果に反映できます。
+(1 row)
+

  Added: expected/function/highlight-html/multiple-keywords.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/highlight-html/multiple-keywords.out    2016-04-23 22:13:39 +0900 (61776c2)
@@ -0,0 +1,19 @@
+SELECT pgroonga.highlight_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);
+                                                                                                                                                                                                                                                                                                                                      highlight_html                                                                                                                                                                                                                                                                                                                                      
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Groonga is a <span class="keyword">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Groonga allows updates without read locks. These characteristics result in superior performance on real-time applications.\n\nGroonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class="keyword">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga can cover weakness of row-oriented systems.
+(1 row)
+

  Added: expected/function/highlight-html/one-keyword.out (+19 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/highlight-html/one-keyword.out    2016-04-23 22:13:39 +0900 (c771ce7)
@@ -0,0 +1,19 @@
+SELECT pgroonga.highlight_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);
+                                                                                                                                                                                                                                                                                                                                                                                 highlight_html                                                                                                                                                                                                                                                                                                                                                                                  
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class="keyword">Groonga</span> is that a newly registered document instantly appears in search results. Also, <span class="keyword">Groonga</span> allows updates without read locks. These characteristics result in superior performance on real-time applications.\n\n<span class="keyword">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class="keyword">Groonga</span> can cover weakness of row-oriented systems.
+(1 row)
+

  Added: expected/function/snippet-html/keywords/multiple.out (+20 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/snippet-html/keywords/multiple.out    2016-04-23 22:13:39 +0900 (7e36d5f)
@@ -0,0 +1,20 @@
+SELECT unnest(pgroonga.snippet_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']));
+                                                                                                                unnest                                                                                                                 
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Groonga is a <span class="keyword">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Gro
+ ase management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class="keyword">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga
+(2 rows)
+

  Added: expected/function/snippet-html/keywords/one.out (+21 -0) 100644
===================================================================
--- /dev/null
+++ expected/function/snippet-html/keywords/one.out    2016-04-23 22:13:39 +0900 (63e3320)
@@ -0,0 +1,21 @@
+SELECT unnest(pgroonga.snippet_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']));
+                                                                                                                               unnest                                                                                                                               
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <span class="keyword">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class="keyword">Groonga</span> is that a newly registered document instantly appears in search results. Also, Gro
+ t read locks. These characteristics result in superior performance on real-time applications.\n\n<span class="keyword">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-orien
+ ted systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class="keyword">Groonga</span> can cover weakness of row-oriented systems.
+(3 rows)
+

  Modified: pgroonga--1.0.6--1.0.7.sql (+6 -0)
===================================================================
--- pgroonga--1.0.6--1.0.7.sql    2016-04-23 21:35:46 +0900 (e69de29)
+++ pgroonga--1.0.6--1.0.7.sql    2016-04-23 22:13:39 +0900 (87416d7)
@@ -0,0 +1,6 @@
+CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[])
+	RETURNS text
+	AS 'MODULE_PATHNAME', 'pgroonga_highlight_html'
+	LANGUAGE C
+	VOLATILE
+	STRICT;

  Modified: pgroonga.sql (+7 -0)
===================================================================
--- pgroonga.sql    2016-04-23 21:35:46 +0900 (4cfc3c1)
+++ pgroonga.sql    2016-04-23 22:13:39 +0900 (9c30f6d)
@@ -30,6 +30,13 @@ CREATE FUNCTION pgroonga.snippet_html(target text, keywords text[])
 	VOLATILE
 	STRICT;
 
+CREATE FUNCTION pgroonga.highlight_html(target text, keywords text[])
+	RETURNS text
+	AS 'MODULE_PATHNAME', 'pgroonga_highlight_html'
+	LANGUAGE C
+	VOLATILE
+	STRICT;
+
 CREATE FUNCTION pgroonga.match_term(target text, term text)
 	RETURNS bool
 	AS 'MODULE_PATHNAME', 'pgroonga_match_term_text'

  Added: sql/function/command/after-truncate.sql (+18 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/command/after-truncate.sql    2016-04-23 22:13:39 +0900 (eba0d6f)
@@ -0,0 +1,18 @@
+CREATE TABLE memos (
+  content text
+);
+
+CREATE INDEX pgroonga_index ON memos USING pgroonga (content);
+
+TRUNCATE memos;
+
+INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES ('Groonga is fast full text search engine.');
+INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.');
+
+SELECT pgroonga.command('select ' ||
+                        pgroonga.table_name('pgroonga_index') ||
+                        ' --output_columns content')::json->>1
+    AS body;
+
+DROP TABLE memos;

  Added: sql/function/command/select.sql (+16 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/command/select.sql    2016-04-23 22:13:39 +0900 (742202a)
@@ -0,0 +1,16 @@
+CREATE TABLE memos (
+  content text
+);
+
+INSERT INTO memos VALUES ('PostgreSQL is a RDBMS.');
+INSERT INTO memos VALUES ('Groonga is fast full text search engine.');
+INSERT INTO memos VALUES ('PGroonga is a PostgreSQL extension that uses Groonga.');
+
+CREATE INDEX pgroonga_index ON memos USING pgroonga (content);
+
+SELECT pgroonga.command('select ' ||
+                        pgroonga.table_name('pgroonga_index') ||
+                        ' --output_columns content')::json->>1
+    AS body;
+
+DROP TABLE memos;

  Added: sql/function/highlight-html/different-size-keyword.sql (+3 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/highlight-html/different-size-keyword.sql    2016-04-23 22:13:39 +0900 (66032ce)
@@ -0,0 +1,3 @@
+SELECT pgroonga.highlight_html(
+  '100㍉メートル',
+  ARRAY['ミリ']);

  Added: sql/function/highlight-html/escape.sql (+4 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/highlight-html/escape.sql    2016-04-23 22:13:39 +0900 (e4c8172)
@@ -0,0 +1,4 @@
+SELECT pgroonga.highlight_html(
+  '<p>Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index.</p>',
+  ARRAY['Groonga']);

  Added: sql/function/highlight-html/multibyte.sql (+4 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/highlight-html/multibyte.sql    2016-04-23 22:13:39 +0900 (fee281c)
@@ -0,0 +1,4 @@
+SELECT pgroonga.highlight_html(
+  'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
+  '登録された文書をすぐに検索結果に反映できます。',
+  ARRAY['検索']);

  Added: sql/function/highlight-html/multiple-keywords.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/highlight-html/multiple-keywords.sql    2016-04-23 22:13:39 +0900 (709aaba)
@@ -0,0 +1,14 @@
+SELECT pgroonga.highlight_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']);

  Added: sql/function/highlight-html/one-keyword.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/highlight-html/one-keyword.sql    2016-04-23 22:13:39 +0900 (9eb45b1)
@@ -0,0 +1,14 @@
+SELECT pgroonga.highlight_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']);

  Added: sql/function/snippet-html/keywords/multiple.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/snippet-html/keywords/multiple.sql    2016-04-23 22:13:39 +0900 (e6a7c57)
@@ -0,0 +1,14 @@
+SELECT unnest(pgroonga.snippet_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['fast', 'PostgreSQL']));

  Added: sql/function/snippet-html/keywords/one.sql (+14 -0) 100644
===================================================================
--- /dev/null
+++ sql/function/snippet-html/keywords/one.sql    2016-04-23 22:13:39 +0900 (e3a30df)
@@ -0,0 +1,14 @@
+SELECT unnest(pgroonga.snippet_html(
+  'Groonga is a fast and accurate full text search engine based on ' ||
+  'inverted index. One of the characteristics of Groonga is that a ' ||
+  'newly registered document instantly appears in search results. ' ||
+  'Also, Groonga allows updates without read locks. These characteristics ' ||
+  'result in superior performance on real-time applications.' ||
+  '\n' ||
+  '\n' ||
+  'Groonga is also a column-oriented database management system (DBMS). ' ||
+  'Compared with well-known row-oriented systems, such as MySQL and ' ||
+  'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
+  'queries. Due to this advantage, Groonga can cover weakness of ' ||
+  'row-oriented systems.',
+  ARRAY['Groonga']));

  Added: src/pgrn_highlight_html.c (+198 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_highlight_html.c    2016-04-23 22:13:39 +0900 (e4e7f95)
@@ -0,0 +1,198 @@
+#include "pgroonga.h"
+
+#include "pgrn_global.h"
+#include "pgrn_groonga.h"
+#include "pgrn_highlight_html.h"
+
+#include <catalog/pg_type.h>
+#include <utils/array.h>
+#include <utils/builtins.h>
+
+static grn_ctx *ctx = &PGrnContext;
+static grn_obj *PGrnKeywordsTable = NULL;
+static grn_obj PGrnKeywordIDs;
+
+void
+PGrnInitializeHighlightHTML(void)
+{
+	PGrnKeywordsTable = grn_table_create(ctx, NULL, 0, NULL,
+										 GRN_OBJ_TABLE_PAT_KEY,
+										 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
+										 0);
+	grn_obj_set_info(ctx,
+					 PGrnKeywordsTable,
+					 GRN_INFO_NORMALIZER,
+					 grn_ctx_get(ctx, "NormalizerAuto", -1));
+
+	GRN_RECORD_INIT(&PGrnKeywordIDs,
+					GRN_OBJ_VECTOR,
+					grn_obj_id(ctx, PGrnKeywordsTable));
+}
+
+void
+PGrnFinalizeHighlightHTML(void)
+{
+	if (!PGrnKeywordsTable)
+		return;
+
+	GRN_OBJ_FIN(ctx, &PGrnKeywordIDs);
+
+	grn_obj_close(ctx, PGrnKeywordsTable);
+	PGrnKeywordsTable = NULL;
+}
+
+static void
+PGrnKeywordsTableUpdate(ArrayType *keywords)
+{
+	{
+		int i, n;
+
+		GRN_BULK_REWIND(&PGrnKeywordIDs);
+
+		n = ARR_DIMS(keywords)[0];
+		for (i = 1; i <= n; i++)
+		{
+			Datum keywordDatum;
+			text *keyword;
+			bool isNULL;
+			grn_id id;
+
+			keywordDatum = array_ref(keywords, 1, &i, -1, -1, false,
+									 'i', &isNULL);
+			if (isNULL)
+				continue;
+
+			keyword = DatumGetTextPP(keywordDatum);
+			id = grn_table_add(ctx, PGrnKeywordsTable,
+							   VARDATA_ANY(keyword),
+							   VARSIZE_ANY_EXHDR(keyword),
+							   NULL);
+			if (id == GRN_ID_NIL)
+				continue;
+			GRN_RECORD_PUT(ctx, &PGrnKeywordIDs, id);
+		}
+	}
+
+	{
+		grn_table_cursor *cursor;
+		grn_id id;
+		size_t nIDs;
+
+		cursor = grn_table_cursor_open(ctx,
+									   PGrnKeywordsTable,
+									   NULL, 0,
+									   NULL, 0,
+									   0, -1, 0);
+		if (!cursor) {
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("pgroonga: "
+							"failed to create cursor for PGrnKeywordsTable: %s",
+							ctx->errbuf)));
+		}
+
+		nIDs = GRN_BULK_VSIZE(&PGrnKeywordIDs) / sizeof(grn_id);
+		while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL)
+		{
+			size_t i;
+			bool specified = false;
+
+			for (i = 0; i < nIDs; i++)
+			{
+				if (id == GRN_RECORD_VALUE_AT(&PGrnKeywordIDs, i))
+				{
+					specified = true;
+					break;
+				}
+			}
+
+			if (specified)
+				continue;
+
+			grn_table_cursor_delete(ctx, cursor);
+		}
+
+		grn_table_cursor_close(ctx, cursor);
+	}
+}
+
+static text *
+PGrnHighlightHTML(text *target)
+{
+	grn_obj buffer;
+	text *highlighted;
+
+	GRN_TEXT_INIT(&buffer, 0);
+
+	{
+		const char *openTag = "<span class=\"keyword\">";
+		size_t openTagLength = strlen(openTag);
+		const char *closeTag = "</span>";
+		size_t closeTagLength = strlen(closeTag);
+		const char *string;
+		size_t stringLength;
+
+		string = VARDATA_ANY(target);
+		stringLength = VARSIZE_ANY_EXHDR(target);
+
+		while (stringLength > 0) {
+#define MAX_N_HITS 16
+			grn_pat_scan_hit hits[MAX_N_HITS];
+			const char *rest;
+			int i, nHits;
+			size_t previous = 0;
+			size_t chunkLength;
+
+			nHits = grn_pat_scan(ctx, (grn_pat *)PGrnKeywordsTable,
+								 string, stringLength,
+								 hits, MAX_N_HITS, &rest);
+			for (i = 0; i < nHits; i++) {
+				if ((hits[i].offset - previous) > 0) {
+					grn_text_escape_xml(ctx,
+										&buffer,
+										string + previous,
+										hits[i].offset - previous);
+				}
+				GRN_TEXT_PUT(ctx, &buffer, openTag, openTagLength);
+				grn_text_escape_xml(ctx,
+									&buffer,
+									string + hits[i].offset,
+									hits[i].length);
+				GRN_TEXT_PUT(ctx, &buffer, closeTag, closeTagLength);
+				previous = hits[i].offset + hits[i].length;
+			}
+
+			chunkLength = rest - string;
+			if ((chunkLength - previous) > 0) {
+				grn_text_escape_xml(ctx,
+									&buffer,
+									string + previous,
+									stringLength - previous);
+			}
+			stringLength -= chunkLength;
+			string = rest;
+#undef MAX_N_HITS
+		}
+	}
+
+	highlighted = cstring_to_text_with_len(GRN_TEXT_VALUE(&buffer),
+										   GRN_TEXT_LEN(&buffer));
+	GRN_OBJ_FIN(ctx, &buffer);
+	return highlighted;
+}
+
+/**
+ * pgroonga.highlight_html(target text, keywords text[]) : text
+ */
+Datum
+pgroonga_highlight_html(PG_FUNCTION_ARGS)
+{
+	text *target = PG_GETARG_TEXT_PP(0);
+	ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1);
+	text *highlighted;
+
+	PGrnKeywordsTableUpdate(keywords);
+	highlighted = PGrnHighlightHTML(target);
+
+	PG_RETURN_TEXT_P(highlighted);
+}

  Added: src/pgrn_highlight_html.h (+4 -0) 100644
===================================================================
--- /dev/null
+++ src/pgrn_highlight_html.h    2016-04-23 22:13:39 +0900 (c1da617)
@@ -0,0 +1,4 @@
+#pragma once
+
+void PGrnInitializeHighlightHTML(void);
+void PGrnFinalizeHighlightHTML(void);

  Modified: src/pgroonga.c (+6 -0)
===================================================================
--- src/pgroonga.c    2016-04-23 21:35:46 +0900 (abba946)
+++ src/pgroonga.c    2016-04-23 22:13:39 +0900 (7ffbc99)
@@ -6,6 +6,7 @@
 #include "pgrn_create.h"
 #include "pgrn_global.h"
 #include "pgrn_groonga.h"
+#include "pgrn_highlight_html.h"
 #include "pgrn_jsonb.h"
 #include "pgrn_options.h"
 #include "pgrn_search.h"
@@ -135,6 +136,7 @@ PG_FUNCTION_INFO_V1(pgroonga_score);
 PG_FUNCTION_INFO_V1(pgroonga_table_name);
 PG_FUNCTION_INFO_V1(pgroonga_command);
 PG_FUNCTION_INFO_V1(pgroonga_snippet_html);
+PG_FUNCTION_INFO_V1(pgroonga_highlight_html);
 
 PG_FUNCTION_INFO_V1(pgroonga_match_term_text);
 PG_FUNCTION_INFO_V1(pgroonga_match_term_text_array);
@@ -261,6 +263,8 @@ PGrnOnProcExit(int code, Datum arg)
 	{
 		grn_obj *db;
 
+		PGrnFinalizeHighlightHTML();
+
 		PGrnFinalizeJSONB();
 
 		PGrnFinalizeMatchSequentialSearchData();
@@ -364,6 +368,8 @@ _PG_init(void)
 	PGrnInitializePrefixRKSequentialSearchData();
 
 	PGrnInitializeJSONB();
+
+	PGrnInitializeHighlightHTML();
 }
 
 static grn_id

  Modified: src/pgroonga.h (+1 -0)
===================================================================
--- src/pgroonga.h    2016-04-23 21:35:46 +0900 (b29bd62)
+++ src/pgroonga.h    2016-04-23 22:13:39 +0900 (f916b86)
@@ -56,6 +56,7 @@ extern Datum PGDLLEXPORT pgroonga_score(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS);
+extern Datum PGDLLEXPORT pgroonga_highlight_html(PG_FUNCTION_ARGS);
 
 extern Datum PGDLLEXPORT pgroonga_match_term_text(PG_FUNCTION_ARGS);
 extern Datum PGDLLEXPORT pgroonga_match_term_text_array(PG_FUNCTION_ARGS);
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Zurück zum Archiv-Index