[Anthy-dev 2320] r5rs: write/ss 実装

Zurück zum Archiv-Index

Jun Inoue jun.l****@gmail*****
2005年 8月 31日 (水) 19:10:02 JST


prime.scm の動作を追ってるときに、write/ss が欲しくなったので書きまし
た。 = の siod 互換追加も <s>わざわざ外すのがだるかったから</s> 含めてあ
ります。ちなみに shiro さんの syntax + apply の話からの類推で bug ではな
く extension と見なしました。
read/ss を実装してないのでテストは肉眼で確認するしかありません。一時ファ
イルにダンプして文字列として読み込めばいいっぽいですが、やってません。

それと、以前 apply パッチに含めたテストですが、最後のは eq? であってま
す。R5RS には "The resulting list is always newly allocated, except that
it shares structure with the last list argument." とあって、SRFI-38 にも
あるように、この "shares structure" というのは eq? という意味だと思われ
ます。

-- 
Jun Inoue
jun.l****@gmail*****
-------------- next part --------------
diff -ur sigscheme/debug.c ../.r5rs/sigscheme/debug.c
--- sigscheme/debug.c	2005-08-29 22:53:36.000000000 -0700
+++ ../.r5rs/sigscheme/debug.c	2005-08-31 02:55:22.000000000 -0700
@@ -51,13 +51,47 @@
     UNKNOWN
 };
 
+#if SCM_USE_SRFI38
+typedef size_t hashval_t;
+typedef struct {
+    ScmObj key;
+    int datum;
+} hash_entry;
+
+typedef struct {
+    size_t size;                /* capacity; MUST be a power of 2 */
+    size_t used;                /* population */
+    hash_entry *ents;
+} hash_table;
+
+typedef struct {
+    hash_table seen;            /* a table of seen objects */
+    int next_index;             /* the next index to use for #N# */
+} write_ss_context;
+#endif
+
 /*=======================================
   File Local Macro Declarations
 =======================================*/
+#if SCM_USE_SRFI38
+#define INTERESTINGP(obj)  \
+    (CONSP(obj) \
+     || (STRINGP(obj) && SCM_STRING_LEN(obj)) \
+     || VECTORP(obj))
+#define SCM_INVALID NULL
+#define OCCUPIED(ent)      (!EQ((ent)->key, SCM_INVALID))
+#define HASH_EMPTY(table)  (!(table).used)
+#define DEFINING_DATUM      (-1)
+#define GET_DEFINDEX(x)    ((unsigned)(x) >> 1)
+#endif
 
 /*=======================================
   Variable Declarations
 =======================================*/
+#if SCM_USE_SRFI38
+/* list of shared structures in the object we're writing out */
+static write_ss_context *write_ss_ctx;
+#endif
 
 /*=======================================
   File Local Function Declarations
@@ -70,6 +104,13 @@
 static void print_port(FILE *f, ScmObj port, enum OutputType otype);
 static void print_etc(FILE *f, ScmObj obj, enum  OutputType otype);
 
+#if SCM_USE_SRFI38
+static void hash_grow(hash_table *tab);
+static hash_entry *hash_lookup(hash_table *tab, ScmObj key, int insert);
+static void write_ss_scan(ScmObj obj, write_ss_context *ctx);
+static int  get_shared_index(ScmObj obj);
+#endif
+
 /*=======================================
    Function Implementations
 =======================================*/
@@ -107,6 +148,21 @@
 
 static void print_ScmObj_internal(FILE *f, ScmObj obj, enum OutputType otype)
 {
+#if SCM_USE_SRFI38
+    if (INTERESTINGP(obj)) {
+        int index = get_shared_index(obj);
+        if (index > 0) {
+            /* defined datum */
+            fprintf(f, "#%d#", index);
+            return;
+        }
+        if (index < 0) {
+            /* defining datum, with the new index negated */
+            fprintf(f, "#%d=", -index);
+            /* Print it; the next time it'll be defined. */
+        }
+    }
+#endif
     switch (SCM_TYPE(obj)) {
     case ScmInt:
         fprintf(f, "%d", SCM_INT_VALUE(obj));
@@ -229,42 +285,54 @@
 static void print_list(FILE *f, ScmObj list, enum OutputType otype)
 {
     ScmObj car = SCM_NULL;
-    ScmObj cdr = SCM_NULL;
-    ScmObj tmp = SCM_NULL;
+#if SCM_USE_SRFI38
+    int index;
+    int necessary_close_parens = 1;
+  cheap_recursion:
+#endif
 
     /* print left parenthesis */
     fprintf(f, "(");
 
-    /* get car and cdr */
-    car = CAR(list);
-    cdr = CDR(list);
-    
-    /* print car */
-    print_ScmObj_internal(f, car, otype);
-    if (!NULLP(cdr))
-        fprintf(f, " ");
-
-    /* print else for-each */
-    for (tmp = cdr; ; tmp = CDR(tmp)) {
-        if (CONSP(tmp)) {
-            print_ScmObj_internal(f, CAR(tmp), otype);
-            if (NULLP(CDR(tmp))) {
-                fprintf(f, ")");
-                return;
-            } else {
-                if (!NULLP(CDR(tmp)))
-                    fprintf(f, " ");
-            }
-        } else {
-            if (!NULLP(tmp)) {
-                fprintf(f, ". ");
-                print_ScmObj_internal(f, tmp, otype);
-            }
-
-            fprintf(f, ")");
-            return;
+    for (;;) {
+        car = CAR(list);
+        print_ScmObj_internal(f, car, otype);
+        list = CDR(list);
+        if (!CONSP(list))
+            break;
+        fputs(" ", f);
+
+#if SCM_USE_SRFI38
+        /* See if the next pair is shared.  Note that the case
+         * where the first pair is shared is handled in
+         * print_ScmObj_internal(). */
+        index = get_shared_index(list);
+        if (index > 0) {
+            /* defined datum */
+            fprintf(f, ". #%d#", index);
+            goto close_parens_and_return;
         }
+        if (index < 0) {
+            /* defining datum, with the new index negated */
+            fprintf(f, ". #%d=", -index);
+            necessary_close_parens++;
+            goto cheap_recursion;
+        }
+#endif
+    }
+
+    /* last item */
+    if (!NULLP(list)) {
+        fputs(" . ", f);
+        /* Callee takes care of shared data. */
+        print_ScmObj_internal(f, list, otype);
     }
+
+#if SCM_USE_SRFI38
+  close_parens_and_return:
+    while (necessary_close_parens--)
+#endif
+        fputc(')', f);
 }
 
 static void print_vector(FILE *f, ScmObj vec, enum OutputType otype)
@@ -331,3 +399,157 @@
     else if (EQ(obj, SCM_UNDEF))
         fprintf(f, "#<undef>");
 }
+
+#if SCM_USE_SRFI38
+
+static void hash_grow(hash_table *tab)
+{
+    size_t old_size = tab->size;
+    size_t new_size = old_size * 2;
+    size_t i;
+    hash_entry *old_ents = tab->ents;
+    hash_entry *new_ent;
+
+    tab->ents = calloc(new_size, sizeof(hash_entry));
+    tab->size = new_size;
+
+    for (i=0; i < old_size; i++) {
+        /* Don't change the last argument, or hash_lookup() will call
+         * us again. */
+        new_ent = hash_lookup(tab, old_ents[i].key, 0);
+        *new_ent = old_ents[i];
+    }
+
+    free (old_ents);
+}
+
+/**
+ * @return A pointer to the entry, or NULL if not found.
+ */
+static hash_entry *hash_lookup(hash_table *tab, ScmObj key, int insert)
+{
+    size_t i;
+    unsigned hashval;
+    hash_entry *ent;
+
+    /* If we have > 32 bits, we'll discard some of them.  The lower
+     * bits are zeroed for alignment or used for tag bits, and in the
+     * latter case, the tag can only take 3 values: pair, string, or
+     * vector.  We'll drop these bits.  KEYs are expected to be
+     * pointers into the heap, so their higher bis are probably
+     * uniform.  I haven't confirmed either's validity, though. */
+    hashval = (unsigned)key;
+    if (sizeof(hashval) > 4) {
+        hashval /= sizeof(ScmObjInternal);
+        hashval &= 0xffffffff;
+    }
+
+    hashval *= 2654435761UL; /* golden ratio hash */
+
+    /* We probe linearly, since a) speed isn't a primary concern for
+     * SigScheme, and b) having a table of primes only for this
+     * purpose is probably just a waste. */
+    for (i=0; i < tab->size; i++) {
+        ent = &(tab->ents)[(hashval + i) & (tab->size - 1)];
+        if (!OCCUPIED(ent)) {
+            if (insert) {
+                /* used > size * 2/3 --> overpopulated, grow table */
+                if (tab->used * 3 > tab->size * 2) {
+                    hash_grow(tab);
+                    return hash_lookup(tab, key, 1);
+                }
+                ent->key = key;
+                tab->used++;
+            }
+            return NULL;
+        }
+        if (EQ(ent->key, key))
+            return ent;
+    }
+
+    /* A linear probe should always find a slot. */
+    abort();
+}
+
+/**
+ * Find out what non-atomic objects a structure shares within itself.
+ * @param obj The object in question, or a part of it.
+ * @param ctx Where to put the scan results.
+ */
+static void write_ss_scan(ScmObj obj, write_ss_context *ctx)
+{
+    int i;
+    hash_entry *ent;
+    /* (for-each mark-as-seen-or-return-if-familiar obj) */
+    while (CONSP(obj)) {
+        ent = hash_lookup(&ctx->seen, obj, 1);
+        if (ent) {
+            ent->datum = DEFINING_DATUM;
+            return;
+        }
+        write_ss_scan(CAR(obj), ctx);
+        obj = CDR(obj);
+    }
+
+    if (VECTORP(obj)) {
+        ent = hash_lookup(&ctx->seen, obj, 1);
+        if (ent) {
+            ent->datum = DEFINING_DATUM;
+            return;
+        }
+        for (i=0; i < SCM_VECTOR_LEN(obj); i++)
+            write_ss_scan(SCM_VECTOR_CREF(obj, i), ctx);
+        return;
+    }
+    if (STRINGP(obj) && SCM_STRING_LEN(obj)) {
+        ent = hash_lookup(&ctx->seen, obj, 1);
+	if (ent) {
+	    ent->datum = DEFINING_DATUM;
+	    return;
+	}
+    }
+}
+
+/**
+ * @return The index for obj, if it's a defined datum.  If it's a
+ *         defining datum, allocate an index for it and return the
+ *         *additive inverse* of the index.  If obj is nondefining,
+ *         return zero.
+ */
+static int get_shared_index(ScmObj obj)
+{
+    hash_entry *ent;
+
+    if (write_ss_ctx) {
+        ent = hash_lookup(&write_ss_ctx->seen, obj, 0);
+
+        if (ent->datum == DEFINING_DATUM) {
+            ent->datum = write_ss_ctx->next_index++;
+            return - (ent->datum);
+        }
+        return ent->datum;
+    }
+    return 0;
+}
+
+void SigScm_WriteToPortWithSharedStructure(ScmObj port, ScmObj obj)
+{
+    write_ss_context ctx = {{0}};
+
+    ctx.next_index = 1;
+    ctx.seen.size = 1 << 8;     /* arbitrary initial size */
+    ctx.seen.ents = calloc(ctx.seen.size, sizeof(hash_entry));
+
+    write_ss_scan(obj, &ctx);
+
+    /* If no structure is shared, we do a normal write. */
+    if (!HASH_EMPTY(ctx.seen))
+        write_ss_ctx = &ctx;
+
+    SigScm_WriteToPort(port, obj);
+    
+    write_ss_ctx = NULL;
+    free(ctx.seen.ents);
+}
+
+#endif /* SCM_USE_SRFI38 */
diff -ur sigscheme/main.c ../.r5rs/sigscheme/main.c
--- sigscheme/main.c	2005-08-29 22:53:37.000000000 -0700
+++ ../.r5rs/sigscheme/main.c	2005-08-31 02:38:18.000000000 -0700
@@ -77,7 +77,11 @@
          s_exp = SigScm_Read(stdin_port))
     {
         result = ScmOp_eval(s_exp, SCM_NULL);
-        SigScm_DisplayToPort(stdout_port, result);
+#if SCM_USE_SRFI38
+        SigScm_WriteToPortWithSharedStructure(stdout_port, result);
+#else
+        SigScm_WriteToPort(stdout_port, result);
+#endif
         printf("\nsscm> ");
     }
     
diff -ur sigscheme/operations.c ../.r5rs/sigscheme/operations.c
--- sigscheme/operations.c	2005-08-29 22:53:37.000000000 -0700
+++ ../.r5rs/sigscheme/operations.c	2005-08-31 02:20:19.000000000 -0700
@@ -354,9 +354,11 @@
     if CHECK_2_ARGS(args)
         SigScm_Error("= : Wrong number of arguments\n");
 
+#if !SCM_COMPAT_SIOD
     /* type check */
     if (FALSEP(ScmOp_numberp(CAR(args))))
         SigScm_ErrorObj("= : number required but got ", CAR(args));
+#endif
 
     /* Get first value */
     val = SCM_INT_VALUE(CAR(args));
@@ -364,8 +366,11 @@
     /* compare following value */
     for (args = CDR(args); !NULLP(args); args = CDR(args)) {
         obj = CAR(args);
+
+#if !SCM_COMPAT_SIOD
         if (FALSEP(ScmOp_numberp(obj)))
             SigScm_ErrorObj("= : number required but got ", obj);
+#endif
 
         if (SCM_INT_VALUE(obj) != val)
         {
@@ -2015,6 +2020,9 @@
 #if SCM_USE_SRFI8
 #include "operations-srfi8.c"
 #endif
+#if SCM_USE_SRFI38
+#include "operations-srfi38.c"
+#endif
 #if SCM_COMPAT_SIOD
 #include "operations-siod.c"
 #endif
diff -ur sigscheme/sigscheme.c ../.r5rs/sigscheme/sigscheme.c
--- sigscheme/sigscheme.c	2005-08-29 22:53:37.000000000 -0700
+++ ../.r5rs/sigscheme/sigscheme.c	2005-08-31 02:19:20.000000000 -0700
@@ -319,6 +319,12 @@
     =======================================================================*/
     Scm_RegisterFuncRawListTailRec("receive", ScmOp_SRFI8_receive);
 #endif
+#if SCM_USE_SRFI38
+    /*=======================================================================
+      SRFI-8 Procedure
+    =======================================================================*/
+    Scm_RegisterFuncEvaledList("write-with-shared-structure", ScmOp_SRFI38_write_with_shared_structure);
+#endif
 
 #if SCM_COMPAT_SIOD
     /*=======================================================================
diff -ur sigscheme/sigscheme.h ../.r5rs/sigscheme/sigscheme.h
--- sigscheme/sigscheme.h	2005-08-29 22:53:37.000000000 -0700
+++ ../.r5rs/sigscheme/sigscheme.h	2005-08-31 02:23:59.000000000 -0700
@@ -66,8 +66,9 @@
    Macro Declarations
 =======================================*/
 #define SCM_USE_EUCJP           1  /* use EUC-JP as internal encoding */
-#define SCM_USE_SRFI1           0  /* use SRFI-1 procedures writtein in C */
-#define SCM_USE_SRFI8           1  /* use SRFI-8 receive procedure writtein in C */
+#define SCM_USE_SRFI1           0  /* use SRFI-1 procedures written in C */
+#define SCM_USE_SRFI8           1  /* use SRFI-8 receive procedure written in C */
+#define SCM_USE_SRFI38          1  /* use SRFI-38 write/ss written in C */
 #define SCM_USE_NONSTD_FEATURES 1  /* use Non-R5RS standard features */
 #define SCM_COMPAT_SIOD         1  /* use SIOD compatible features */
 #define SCM_COMPAT_SIOD_BUGS    1  /* emulate the buggy behaviors of SIOD */
@@ -333,6 +334,9 @@
 void SigScm_Display(ScmObj obj);
 void SigScm_WriteToPort(ScmObj port, ScmObj obj);
 void SigScm_DisplayToPort(ScmObj port, ScmObj obj);
+#if SCM_USE_SRFI38
+void SigScm_WriteToPortWithSharedStructure(ScmObj port, ScmObj obj);
+#endif
 
 #if SCM_USE_SRFI1
 /* operations-srfi1.c */
@@ -348,6 +352,9 @@
 /* operations-srfi8.c */
 ScmObj ScmOp_SRFI8_receive(ScmObj args, ScmObj *envp);
 #endif
+#if SCM_USE_SRFI38
+ScmObj ScmOp_SRFI38_write_with_shared_structure(ScmObj arg, ScmObj env);
+#endif
 #if SCM_COMPAT_SIOD
 /* operations-siod.c */
 ScmObj ScmOp_symbol_boundp(ScmObj obj);
diff -ur sigscheme/test/test-list.scm ../.r5rs/sigscheme/test/test-list.scm
--- sigscheme/test/test-list.scm	2005-08-21 04:48:30.000000000 -0700
+++ ../.r5rs/sigscheme/test/test-list.scm	2005-08-31 02:20:30.000000000 -0700
@@ -53,7 +53,7 @@
 (define z '(why))
 (assert-equal? "append test4" '(n o d o car why . ta) (append w x y () z 'ta))
 (assert-equal? "append test5" '(n o) w)	; test non-destructiveness
-(assert-equal? "append test6" x (cdr (append '((Calpis hosi-)) x))) ; share last
+(assert-eq? "append test6" x (cdr (append '((Calpis hosi-)) x))) ; share last
 
 ; reverse
 (assert-equal? "reverse test1" '(c b a) (reverse '(a b c)))


Anthy-dev メーリングリストの案内
Zurück zum Archiv-Index