NIIBE Yutaka
gniib****@fsij*****
2010年 7月 6日 (火) 12:20:18 JST
janitor/mkdepgraph-cleanup branch に入れた mkdepgraph の変更です。 anthy/logger の利用は明らかな間違い。stderr に出力してユーザに見せなくては。 anthy/conf は使う必要無し。 anthy/ruleparser も使わないで済ませました。 Don't use anthy/ruleparser, anthy/logger and anthy/conf for mkdepgraph. 2010-07-06 NIIBE Yutaka <gniib****@fsij*****> * depgraph/Makefile.am (DEPWORDS): Change order (as same as read order in master.depword). (CLEANFILES): Added all.depword. (all.depword): Added a rule to generate. * depgraph/master.depword: Comment out all include. * depgraph/mkdepgraph.c (parse_dep): Call fprintf instead of anthy_log. (check_nodes): Likewise. (get_tokens): New. (init_depword_tab): Simply use fopen/fgets/fclose. (init_indep_word_seq_tab): Likewise. (parse_indep): Error check when realloc fails. (main): Don't use anthy/conf. diff --git a/depgraph/Makefile.am b/depgraph/Makefile.am index 94e75b4..713cfcd 100644 --- a/depgraph/Makefile.am +++ b/depgraph/Makefile.am @@ -1,11 +1,9 @@ # Files -DEPWORDS = master.depword noun.depword v.depword \ - av.depword a.depword\ - ajv.depword noun-variant.depword \ - fix.depword conjugate.depword \ - conjugate.table +DEPWORDS = conjugate.table conjugate.depword fix.depword noun.depword \ + noun-variant.depword av.depword v.depword \ + a.depword ajv.depword master.depword INCLUDES = -I$(top_srcdir)/ -DSRCDIR=\"$(srcdir)\" -CLEANFILES = anthy.dep +CLEANFILES = anthy.dep all.depword EXTRA_DIST = indepword.txt $(DEPWORDS) # Generate the dictionary @@ -13,7 +11,10 @@ noinst_PROGRAMS = mkdepgraph mkdepgraph_SOURCES = mkdepgraph.c mkdepgraph_LDADD = ../src-main/libanthy.la ../src-worddic/libanthydic.la -anthy.dep : mkdepgraph $(DEPWORDS) +anthy.dep : mkdepgraph all.depword indepword.txt ./mkdepgraph -noinst_DATA = anthy.dep +all.depword: $(DEPWORDS) + cat $(DEPWORDS) | sed -n /^[^#]/p > $@ + +noinst_DATA = anthy.dep all.depword diff --git a/depgraph/master.depword b/depgraph/master.depword index 5962ec6..bb5be41 100644 --- a/depgraph/master.depword +++ b/depgraph/master.depword @@ -5,23 +5,23 @@ # indepword.txtに各品詞ごとの最初のノードが定義してある。 # # 活用表 -\\include conjugate.table +# \\include conjugate.table # 活用するもの -\\include conjugate.depword +# \\include conjugate.depword # 活用しないもの -\\include fix.depword +# \\include fix.depword # # 名詞 -\\include noun.depword -\\include noun-variant.depword +# \\include noun.depword +# \\include noun-variant.depword # 副詞 -\\include av.depword +# \\include av.depword # 動詞 -\\include v.depword +# \\include v.depword # 形容詞 -\\include a.depword +# \\include a.depword # 形容動詞 -\\include ajv.depword +# \\include ajv.depword # # @接続詞 "" Sy@ Sy@間投助詞 diff --git a/depgraph/mkdepgraph.c b/depgraph/mkdepgraph.c index 5842946..4cc4778 100644 --- a/depgraph/mkdepgraph.c +++ b/depgraph/mkdepgraph.c @@ -27,22 +27,12 @@ #include <string.h> #include <stdlib.h> -#include <anthy/alloc.h> -#include <anthy/conf.h> -#include <anthy/ruleparser.h> #include <anthy/xstr.h> -#include <anthy/logger.h> #include <anthy/splitter.h> #include <anthy/anthy.h> #include <anthy/depgraph.h> #include <anthy/diclib.h> -#ifndef SRCDIR -#define SRCDIR "." -#endif - -static int verbose; - static struct dep_node* gNodes; static char** gNodeNames; static int nrNodes; @@ -210,8 +200,9 @@ parse_dep(char **tokens, int nr) /* 遷移条件がない時は警告を出して、空の遷移条件を追加する */ if (nr_strs == 0) { char *s; - anthy_log(0, "node %s has a branch without any transition condition.\n", - tokens[0]); + + fprintf (stderr, "node %s has a branch without any transition condition.\n", + tokens[0]); s = strdup(""); strs[0] = anthy_cstr_to_xstr(s, ANTHY_EUC_JP_ENCODING); nr_strs = 1; @@ -238,52 +229,93 @@ check_nodes(void) int i; for (i = 1; i < nrNodes; i++) { if (gNodes[i].nr_branch == 0) { - anthy_log(0, "node %s has no branch.\n", gNodeNames); + fprintf (stderr, "node %s has no branch.\n", gNodeNames[i]); } } } - static int +get_tokens (char *buf, char **tokens, int n) +{ + int i; + char *p = buf; + + for (i = 0; i < n; i++) + { + tokens[i] = p; + p = strchr (p, ' '); + if (p == NULL) + return i + 1; + + *p++ = '\0'; + } + + return -1; +} + +#define MAX_TOKEN 256 +#define BUFSIZE 1024 +#define DEPWORD_INPUT_FILENAME "all.depword" +#define INDEPWORD_INPUT_FILENAME "indepword.txt" + +static void init_depword_tab(void) { - const char *fn; - char **tokens; - int nr; + FILE *fp; + char buf[BUFSIZE]; + int lineno = 0; - /* id 0 を空ノードに割当てる */ - get_node_id_by_name("@"); + get_node_id_by_name ("@"); - /**/ - fn = anthy_conf_get_str("DEPWORD"); - if (!fn) { - anthy_log(0, "Dependent word dictionary is unspecified.\n"); - return -1; - } - if (anthy_open_file(fn) == -1) { - anthy_log(0, "Failed to open dep word dict (%s).\n", fn); - return -1; - } - /* 一行ずつ付属語グラフを読む */ - while (!anthy_read_line(&tokens, &nr)) { - parse_dep(tokens, nr); - anthy_free_line(); - } - anthy_close_file(); - check_nodes(); - return 0; + if ((fp = fopen (DEPWORD_INPUT_FILENAME, "r")) == NULL) + { + fprintf (stderr, "Failed to open (%s).\n", DEPWORD_INPUT_FILENAME); + exit (1); + } + + while (fgets (buf, BUFSIZE, fp) != NULL) + { + char *tokens[MAX_TOKEN]; + int nr; + char *p; + + if ((p = strchr (buf, '\n')) == NULL) + goto error; + + *p = '\0'; + + lineno++; + nr = get_tokens (buf, tokens, MAX_TOKEN); + if (nr < 0) + { + error: + fprintf (stderr, "Too long line (%d): ignored\n", lineno); + continue; + } + + parse_dep (tokens, nr); + } + + fclose (fp); + + check_nodes (); } static void -parse_indep(char **tokens, int nr) +parse_indep (char **tokens, int nr, int lineno) { if (nr < 2) { - printf("Syntex error in indepword defs" - " :%d.\n", anthy_get_line_number()); - return ; + fprintf(stderr, "%d: Syntex error in indepword defs.\n", lineno); + return; } - gRules = realloc(gRules, sizeof(struct wordseq_rule)*(nrRules+1)); + + gRules = (struct wordseq_rule*)realloc (gRules, sizeof(struct wordseq_rule)*(nrRules+1)); + if (gRules == NULL) + { + fprintf (stderr, "%d: malloc failed.\n", lineno); + exit (1); + } /* 行の先頭には品詞の名前が入っている */ gRules[nrRules].wt = anthy_init_wtype_by_name(tokens[0]); @@ -291,38 +323,47 @@ parse_indep(char **tokens, int nr) /* その次にはノード名が入っている */ gRules[nrRules].node_id = get_node_id_by_name(tokens[1]); - if (verbose) { - printf("%d (%s)\n", nrRules, tokens[0]); - } - nrRules ++; } /** 自立語からの遷移表 */ -static int -init_indep_word_seq_tab(void) +static void +init_indep_word_seq_tab (void) { - const char *fn; - char **tokens; - int nr; - - fn = anthy_conf_get_str("INDEPWORD"); - if (!fn){ - printf("independent word dict unspecified.\n"); - return -1; - } - if (anthy_open_file(fn) == -1) { - printf("Failed to open indep word dict (%s).\n", fn); - return -1; - } - /* ファイルを一行ずつ読む */ - while (!anthy_read_line(&tokens, &nr)) { - parse_indep(tokens, nr); - anthy_free_line(); - } - anthy_close_file(); + FILE *fp; + char buf[BUFSIZE]; + int lineno = 0; + + if ((fp = fopen (INDEPWORD_INPUT_FILENAME, "r")) == NULL) + { + fprintf (stderr, "Failed to open (%s).\n", INDEPWORD_INPUT_FILENAME); + exit (1); + } - return 0; + while (fgets (buf, BUFSIZE, fp) != NULL) + { + char *tokens[MAX_TOKEN]; + int nr; + char *p; + + if ((p = strchr (buf, '\n')) == NULL) + goto error; + + *p = '\0'; + if (buf[0] == '#') + continue; + + lineno++; + nr = get_tokens (buf, tokens, MAX_TOKEN); + if (nr < 0) + { + error: + fprintf (stderr, "Too long line (%d): ignored\n", lineno); + continue; + } + + parse_indep (tokens, nr, lineno); + } } /* @@ -425,12 +466,8 @@ write_file(const char* file_name) int main(int argc, char* argv[]) { - /* 付属語辞書を読み込んでファイルに書き出す */ - anthy_conf_override("CONFFILE", "../anthy-conf"); - anthy_conf_override("ANTHYDIR", SRCDIR "/../depgraph/"); + anthy_init_wtypes (); - anthy_init_wtypes(); - anthy_do_conf_init(); /* 付属語グラフ */ init_depword_tab(); /* 自立語からの遷移表 */ --