mkdepgraph cleanup (Anthy-dev 3822) - Anthy

janitor/mkdepgraph-cleanup branch に入れた mkdepgraph の変更です。
anthy/logger の利用は明らかな間違い。stderr に出力してユーザに見せなくては。
anthy/conf は使う必要無し。
anthy/ruleparser も使わないで済ませました。

Don't use anthy/ruleparser, anthy/logger and anthy/conf for mkdepgraph.

2010-07-06  NIIBE Yutaka  <gniib****@fsij*****>

	* depgraph/Makefile.am (DEPWORDS): Change order (as same as
        read order in master.depword).
	(CLEANFILES): Added all.depword.
	(all.depword): Added a rule to generate.

	* depgraph/master.depword: Comment out all include.

	* depgraph/mkdepgraph.c (parse_dep): Call fprintf instead of
          anthy_log.
	(check_nodes): Likewise.
	(get_tokens): New.
	(init_depword_tab): Simply use fopen/fgets/fclose.
	(init_indep_word_seq_tab): Likewise.
	(parse_indep): Error check when realloc fails.
	(main): Don't use anthy/conf.

diff --git a/depgraph/Makefile.am b/depgraph/Makefile.am
index 94e75b4..713cfcd 100644
--- a/depgraph/Makefile.am
+++ b/depgraph/Makefile.am
@@ -1,11 +1,9 @@
 # Files
-DEPWORDS = master.depword noun.depword v.depword \
- av.depword a.depword\
- ajv.depword noun-variant.depword \
- fix.depword conjugate.depword \
- conjugate.table
+DEPWORDS = conjugate.table conjugate.depword fix.depword noun.depword \
+	   noun-variant.depword av.depword v.depword \
+	   a.depword  ajv.depword master.depword
 INCLUDES = -I$(top_srcdir)/ -DSRCDIR=\"$(srcdir)\"
-CLEANFILES = anthy.dep
+CLEANFILES = anthy.dep all.depword
 EXTRA_DIST = indepword.txt $(DEPWORDS)

 # Generate the dictionary
@@ -13,7 +11,10 @@ noinst_PROGRAMS = mkdepgraph
 mkdepgraph_SOURCES = mkdepgraph.c
 mkdepgraph_LDADD =  ../src-main/libanthy.la ../src-worddic/libanthydic.la

-anthy.dep : mkdepgraph $(DEPWORDS)
+anthy.dep : mkdepgraph all.depword indepword.txt
 	./mkdepgraph

-noinst_DATA = anthy.dep
+all.depword: $(DEPWORDS)
+	cat $(DEPWORDS) | sed -n /^[^#]/p > $@
+
+noinst_DATA = anthy.dep all.depword
diff --git a/depgraph/master.depword b/depgraph/master.depword
index 5962ec6..bb5be41 100644
--- a/depgraph/master.depword
+++ b/depgraph/master.depword
@@ -5,23 +5,23 @@
 # indepword.txtに各品詞ごとの最初のノードが定義してある。
 #
 # 活用表
-\\include conjugate.table
+# \\include conjugate.table
 # 活用するもの
-\\include conjugate.depword
+# \\include conjugate.depword
 # 活用しないもの
-\\include fix.depword
+# \\include fix.depword
 #
 # 名詞
-\\include noun.depword
-\\include noun-variant.depword
+# \\include noun.depword
+# \\include noun-variant.depword
 # 副詞
-\\include av.depword
+# \\include av.depword
 # 動詞
-\\include v.depword
+# \\include v.depword
 # 形容詞
-\\include a.depword
+# \\include a.depword
 # 形容動詞
-\\include ajv.depword
+# \\include ajv.depword
 #
 #
 @接続詞 "" Sy@ Sy@間投助詞
diff --git a/depgraph/mkdepgraph.c b/depgraph/mkdepgraph.c
index 5842946..4cc4778 100644
--- a/depgraph/mkdepgraph.c
+++ b/depgraph/mkdepgraph.c
@@ -27,22 +27,12 @@
 #include <string.h>
 #include <stdlib.h>

-#include <anthy/alloc.h>
-#include <anthy/conf.h>
-#include <anthy/ruleparser.h>
 #include <anthy/xstr.h>
-#include <anthy/logger.h>
 #include <anthy/splitter.h>
 #include <anthy/anthy.h>
 #include <anthy/depgraph.h>
 #include <anthy/diclib.h>

-#ifndef SRCDIR
-#define SRCDIR "."
-#endif
-
-static int verbose;
-
 static struct dep_node* gNodes;
 static char** gNodeNames;
 static int nrNodes;
@@ -210,8 +200,9 @@ parse_dep(char **tokens, int nr)
   /* 遷移条件がない時は警告を出して、空の遷移条件を追加する */
   if (nr_strs == 0) {
     char *s;
-    anthy_log(0, "node %s has a branch without any transition condition.\n",
-	      tokens[0]);
+
+    fprintf (stderr, "node %s has a branch without any transition condition.\n",
+	     tokens[0]);
     s = strdup("");
     strs[0] = anthy_cstr_to_xstr(s, ANTHY_EUC_JP_ENCODING);
     nr_strs = 1;
@@ -238,52 +229,93 @@ check_nodes(void)
   int i;
   for (i = 1; i < nrNodes; i++) {
     if (gNodes[i].nr_branch == 0) {
-      anthy_log(0, "node %s has no branch.\n", gNodeNames);
+      fprintf (stderr, "node %s has no branch.\n", gNodeNames[i]);
     }
   }
 }

-
 static int
+get_tokens (char *buf, char **tokens, int n)
+{
+  int i;
+  char *p = buf;
+
+  for (i = 0; i < n; i++)
+    {
+      tokens[i] = p;
+      p = strchr (p, ' ');
+      if (p == NULL)
+	return i + 1;
+
+      *p++ = '\0';
+    }
+
+  return -1;
+}
+
+#define MAX_TOKEN 256
+#define BUFSIZE 1024
+#define DEPWORD_INPUT_FILENAME "all.depword"
+#define INDEPWORD_INPUT_FILENAME "indepword.txt"
+
+static void
 init_depword_tab(void)
 {
-  const char *fn;
-  char **tokens;
-  int nr;
+  FILE *fp;
+  char buf[BUFSIZE];
+  int lineno = 0;

-  /* id 0 を空ノードに割当てる */
-  get_node_id_by_name("@");
+  get_node_id_by_name ("@");

-  /**/
-  fn = anthy_conf_get_str("DEPWORD");
-  if (!fn) {
-    anthy_log(0, "Dependent word dictionary is unspecified.\n");
-    return -1;
-  }
-  if (anthy_open_file(fn) == -1) {
-    anthy_log(0, "Failed to open dep word dict (%s).\n", fn);
-    return -1;
-  }
-  /* 一行ずつ付属語グラフを読む */
-  while (!anthy_read_line(&tokens, &nr)) {
-    parse_dep(tokens, nr);
-    anthy_free_line();
-  }
-  anthy_close_file();
-  check_nodes();
-  return 0;
+  if ((fp = fopen (DEPWORD_INPUT_FILENAME, "r")) == NULL)
+    {
+      fprintf (stderr, "Failed to open (%s).\n", DEPWORD_INPUT_FILENAME);
+      exit (1);
+    }
+
+  while (fgets (buf, BUFSIZE, fp) != NULL)
+    {
+      char *tokens[MAX_TOKEN];
+      int nr;
+      char *p;
+
+      if ((p = strchr (buf, '\n')) == NULL)
+	goto error;
+
+      *p = '\0';
+
+      lineno++;
+      nr = get_tokens (buf, tokens, MAX_TOKEN);
+      if (nr < 0)
+	{
+	error:
+	  fprintf (stderr, "Too long line (%d): ignored\n", lineno);
+	  continue;
+	}
+
+      parse_dep (tokens, nr);
+    }
+
+  fclose (fp);
+
+  check_nodes ();
 }


 static void
-parse_indep(char **tokens, int nr)
+parse_indep (char **tokens, int nr, int lineno)
 {
   if (nr < 2) {
-    printf("Syntex error in indepword defs"
-	   " :%d.\n", anthy_get_line_number());
-    return ;
+    fprintf(stderr, "%d: Syntex error in indepword defs.\n", lineno);
+    return;
   }
-  gRules = realloc(gRules, sizeof(struct wordseq_rule)*(nrRules+1));
+
+  gRules = (struct wordseq_rule*)realloc (gRules, sizeof(struct wordseq_rule)*(nrRules+1));
+  if (gRules == NULL)
+    {
+      fprintf (stderr, "%d: malloc failed.\n", lineno);
+      exit (1);
+    }

   /* 行の先頭には品詞の名前が入っている */
   gRules[nrRules].wt = anthy_init_wtype_by_name(tokens[0]);
@@ -291,38 +323,47 @@ parse_indep(char **tokens, int nr)
   /* その次にはノード名が入っている */
   gRules[nrRules].node_id = get_node_id_by_name(tokens[1]);

-  if (verbose) {
-    printf("%d (%s)\n", nrRules, tokens[0]);
-  }
-
   nrRules ++;
 }

 /** 自立語からの遷移表 */
-static int
-init_indep_word_seq_tab(void)
+static void
+init_indep_word_seq_tab (void)
 {
-  const char *fn;
-  char **tokens;
-  int nr;
-
-  fn = anthy_conf_get_str("INDEPWORD");
-  if (!fn){
-    printf("independent word dict unspecified.\n");
-    return -1;
-  }
-  if (anthy_open_file(fn) == -1) {
-    printf("Failed to open indep word dict (%s).\n", fn);
-    return -1;
-  }
-  /* ファイルを一行ずつ読む */
-  while (!anthy_read_line(&tokens, &nr)) {
-    parse_indep(tokens, nr);
-    anthy_free_line();
-  }
-  anthy_close_file();
+  FILE *fp;
+  char buf[BUFSIZE];
+  int lineno = 0;
+
+  if ((fp = fopen (INDEPWORD_INPUT_FILENAME, "r")) == NULL)
+    {
+      fprintf (stderr, "Failed to open (%s).\n", INDEPWORD_INPUT_FILENAME);
+      exit (1);
+    }

-  return 0;
+  while (fgets (buf, BUFSIZE, fp) != NULL)
+    {
+      char *tokens[MAX_TOKEN];
+      int nr;
+      char *p;
+
+      if ((p = strchr (buf, '\n')) == NULL)
+	goto error;
+
+      *p = '\0';
+      if (buf[0] == '#')
+	continue;
+
+      lineno++;
+      nr = get_tokens (buf, tokens, MAX_TOKEN);
+      if (nr < 0)
+	{
+	error:
+	  fprintf (stderr, "Too long line (%d): ignored\n", lineno);
+	  continue;
+	}
+
+      parse_indep (tokens, nr, lineno);
+    }
 }

 /*
@@ -425,12 +466,8 @@ write_file(const char* file_name)
 int
 main(int argc, char* argv[])
 {
-  /* 付属語辞書を読み込んでファイルに書き出す */
-  anthy_conf_override("CONFFILE", "../anthy-conf");
-  anthy_conf_override("ANTHYDIR", SRCDIR "/../depgraph/");
+  anthy_init_wtypes ();

-  anthy_init_wtypes();
-  anthy_do_conf_init();
   /* 付属語グラフ */
   init_depword_tab();
   /* 自立語からの遷移表 */
-- 



Anthy

[Anthy-dev 3822] mkdepgraph cleanup