null+****@clear*****
null+****@clear*****
2010年 9月 6日 (月) 15:55:19 JST
Kouhei Sutou 2010-09-06 06:55:19 +0000 (Mon, 06 Sep 2010) New Revision: c365d69a531a97d8ccc6c6fa1eef6351a9c330cb Log: work with UTF-8 BOM input. #448 Added files: test/unit/command/test-bom.rb Modified files: src/groonga.c test/unit/command/Makefile.am Modified: src/groonga.c (+13 -3) =================================================================== --- src/groonga.c 2010-09-04 09:39:22 +0000 (5960c81) +++ src/groonga.c 2010-09-06 06:55:19 +0000 (855b6c0) @@ -166,8 +166,9 @@ show_version(void) #define BUFSIZE 0x1000000 inline static int -prompt(char *buf) +prompt(grn_ctx *ctx, char *buf) { + static int the_first_read = GRN_TRUE; int len; if (!batchmode) { #ifdef HAVE_LIBEDIT @@ -204,6 +205,15 @@ prompt(char *buf) len = 0; } } + if (the_first_read && len > 0) { + const char bom[] = {0xef, 0xbb, 0xbf}; + if (GRN_CTX_GET_ENCODING(ctx) == GRN_ENC_UTF8 && + len > 3 && !memcmp(buf, bom, 3)) { + memmove(buf, buf + 3, len - 3); + len -= 3; + } + the_first_read = GRN_FALSE; + } return len; } @@ -631,7 +641,7 @@ do_alone(int argc, char **argv) if (!rc) { char *buf = GRN_TEXT_VALUE(&text); int len; - while ((len = prompt(buf))) { + while ((len = prompt(ctx, buf))) { uint32_t size = len - 1; grn_ctx_send(ctx, buf, size, 0); if (ctx->stat == GRN_CTX_QUIT) { break; } @@ -715,7 +725,7 @@ g_client(int argc, char **argv) char *buf = GRN_TEXT_VALUE(&text); int len; if (batchmode) { BATCHMODE(ctx); } - while ((len = prompt(buf))) { + while ((len = prompt(ctx, buf))) { uint32_t size = len - 1; grn_ctx_send(ctx, buf, size, 0); rc = ctx->rc; Modified: test/unit/command/Makefile.am (+2 -2) =================================================================== --- test/unit/command/Makefile.am 2010-09-04 09:39:22 +0000 (4596f5a) +++ test/unit/command/Makefile.am 2010-09-06 06:55:19 +0000 (020194b) @@ -1,4 +1,4 @@ EXTRA_DIST = \ test-config-file.rb \ - test-option-help.rb \ - test-option-pid-file.rb + test-option.rb \ + test-option-bom.rb Added: test/unit/command/test-bom.rb (+46 -0) 100644 =================================================================== --- /dev/null +++ test/unit/command/test-bom.rb 2010-09-06 06:55:19 +0000 (e8ffa67) @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Kouhei Sutou <kou****@clear*****> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1 as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +class BOMTest < Test::Unit::TestCase + include GroongaTestUtils + + def setup + setup_database_path + @input_file = File.join(@tmp_dir, "commands") + end + + def teardown + teardown_database_path + end + + def test_no_bom + open(@input_file, "w") do |file| + file.puts("defrag") + end + assert_equal("[[0,0.0,0.0],true]\n", + run_groonga("--file", @input_file, "-n", @database_path)) + end + + def test_bom + open(@input_file, "w") do |file| + file.print("\xef\xbb\xbf") + file.puts("defrag") + end + assert_equal("[[0,0.0,0.0],true]\n", + run_groonga("--file", @input_file, "-n", @database_path)) + end +end