Projects : gscm : gscm_glibc_build_fix

gscm/src/gscm.c

Dir - Raw

1/**************
2 * Gales Scheme
3 *
4 * A Scheme interpreter for Unix striving for simplicity, soundness, minimal
5 * artificial restrictions, and R5RS compliance with strict error checking.
6 *
7 * J. Welsh
8 * January 2017 - March 2023
9 */
10
11/* Define this if your system supports pipe2 and the O_CLOEXEC flag. Currently this is manually switched because I want to find out if there are any relevant systems that don't. (In other words - if you have to remove this to build, please let me know!) */
12#define HAVE_PIPE2
13
14/* make glibc define MAP_ANONYMOUS */
15#define _BSD_SOURCE
16/* make glibc 2.12+ define O_CLOEXEC */
17#define _POSIX_C_SOURCE 200809L
18
19#include <errno.h>
20#include <limits.h>
21#include <math.h>
22#include <setjmp.h>
23
24#include <fcntl.h>
25#include <unistd.h>
26#include <poll.h>
27#include <sys/mman.h>
28#include <sys/wait.h>
29
30#include <sys/socket.h>
31#include <sys/un.h>
32#include <netinet/in.h>
33
34#ifndef MAP_ANON
35#define MAP_ANON MAP_ANONYMOUS
36#endif
37
38/* stdio.h dependencies listed explicitly */
39int snprintf(char *, size_t, const char *, ...); /* to be replaced */
40
41/* stdlib.h dependencies listed explicitly */
42void abort(void);
43
44/* string.h dependencies listed explicitly */
45size_t strlen(const char *);
46char *strerror(int);
47void *memcpy(void *, const void *, size_t);
48void *memset(void *, int, size_t);
49int memcmp(const void *, const void *, size_t);
50
51/* Newer syscalls which glibc is cranky about declaring: let's not play along with its feature test macro games more than strictly necessary. */
52pid_t vfork(void);
53int fdatasync(int);
54int pipe2(int[2], int);
55
56#include "gscm.h"
57
58
59/******************
60 * Memory structure
61 */
62
63/* The Scheme heap is an array of N-bit cells where N is the size of a machine
64 * address. */
65
66typedef size_t value;
67typedef value (*builtin_func_t)(value args);
68typedef unsigned char uchar;
69typedef unsigned long ulong;
70typedef unsigned int uint;
71
72/* Principal type tag: three most significant bits of cell */
73#define TAG_BITS 3
74
75#define T_SPECIAL 0 /* Special values listed below */
76#define T_MOVED 1 /* "Broken heart" pointer to GC moved object */
77#define T_IMMUT_PAIR 2 /* Pointer to car with cdr following */
78#define T_PAIR 3
79#define T_CHARACTER 4 /* Character in least significant byte */
80#define T_FIXNUM 5 /* N-3 bit two's complement signed integer */
81#define T_EXTENDED 6 /* Pointer to extended object */
82#define T_EXT_HEADER 7 /* Extended type header */
83
84/* Special values indicated by T_SPECIAL. Since that's zero, these can be
85 * compared with values directly. */
86#define SC_NULL 0
87#define SC_TRUE 1
88#define SC_FALSE 2
89#define SC_EOF 3
90#define SC_NULL_ENV 4
91#define SC_REPORT_ENV 5
92#define SC_GSCM_ENV 6
93#define SC_INTERACT_ENV 7
94#define SC_TOPLEVEL_ENV 8
95/* Inaccessible from Scheme */
96#define UNDEFINED 9
97#define RD_CLOSEPAREN 10 /* Returned internally by reader subroutines */
98#define RD_DOT 11
99
100/* T_SPECIAL is also implicitly (ab)used for return addresses (EV_*, RD_* and
101 * so on) and loop counters on the stack. GC doesn't have to know what they
102 * really are as long as it treats them as immediate values. */
103
104/* Extended objects consist of a header cell (T_EXT_HEADER) containing extended
105 * type information followed by possibly untagged data cells, depending on
106 * type. The four bits following the principal tag in the header are the
107 * extended type tag: */
108#define T_IMMUT_STRING 0x0
109#define T_STRING 0x1
110#define T_IMMUT_VECTOR 0x2
111#define T_VECTOR 0x3
112#define T_VARIABLE_REF 0x4
113#define T_SYMBOL 0x5
114#define T_BUILTIN 0x6
115#define T_PROCEDURE 0x7
116#define T_CONTINUATION 0x8
117#define T_PROMISE 0x9
118#define T_PORT 0xA
119#define T_FLONUM 0xB /* is_number assumes all numbers from here */
120#define T_BIGNUM 0xC
121#define T_NEG_BIGNUM 0xD
122#define T_RATIONAL 0xE
123#define T_COMPLEX 0xF
124
125/* Tags for types with immutable variants, both principal and extended, must
126 * be equal to the bitwise OR of 1 with the immutable variant. That is, the
127 * least significant tag bit is the mutability flag, where applicable. */
128
129/* Symbols, strings, vectors, and bignums store their length in the header as
130 * an N-7 bit unsigned integer. For vectors and bignums, that many cells
131 * follow. Strings and symbols are packed, so ceil(length/(N/8)) cells follow.
132 * Lexical variable references store the argument index in this space.
133 *
134 * Example for 32-bit systems:
135 * - Pointers/fixnums have 29 bits
136 * - Max heap size is 2^29 = 512M cells of 4 bytes = 2 GiB (4 during GC)
137 * - Longest string is 2^25 characters = 32 MiB
138 * - Longest vector is 2^25 cells = 128 MiB (not counting any pointer targets)
139 * - Longest bignum is 2^25 cells = 2^30 bits for a magnitute ~ 10^10^8
140 *
141 * If the size limits are a problem, the length could be stored in an untagged
142 * or fixnum cell after the header. */
143
144#if __SIZEOF_POINTER__ == 8
145#define VAL_BITS 61
146#define EXT_VAL_BITS 57
147#define FIXNUM_MAX 0x0FFFFFFFFFFFFFFF
148#define FIXNUM_MIN -0x1000000000000000
149#define EXT_LENGTH_MAX 0x01FFFFFFFFFFFFFF
150#define packed_str_len(bytes) (((bytes) + 7) >> 3)
151#define FLONUM_CELLS 1
152
153#elif __SIZEOF_POINTER__ == 4
154#define VAL_BITS 29
155#define EXT_VAL_BITS 25
156#define FIXNUM_MAX 0x0FFFFFFF
157#define FIXNUM_MIN -0x10000000
158#define EXT_LENGTH_MAX 0x01FFFFFF
159#define packed_str_len(bytes) (((bytes) + 3) >> 2)
160#define FLONUM_CELLS 2
161
162#else
163#error Unsupported pointer size
164#endif
165
166#define tag(v) (((value)(v)) >> VAL_BITS)
167#define add_tag(v, t) ((v) | (((value)(t)) << VAL_BITS))
168#define untag(v) ((((value)(v)) << 3) >> 3)
169#define untag_signed(v) (((long) (((value)(v)) << 3)) >> 3)
170#define ext_tag(v) (((v) >> EXT_VAL_BITS) & 0xF)
171#define ext_add_tag(v, t) ((v) | ((value)(t) << EXT_VAL_BITS) | \
172 (((value)T_EXT_HEADER) << VAL_BITS))
173#define ext_untag(v) ((((value)(v)) << 7) >> 7)
174#define ext_untag_signed(v) (((long) (((value)(v)) << 7)) >> 7)
175/* WARNING: add_tag/ext_add_tag assume v's tag bits are zero */
176
177static value car(value);
178static value cdr(value);
179
180
181/******************
182 * Scheme registers
183 */
184
185/* General purpose */
186static value r0, r1, r2, r3, r4, r5, r6;
187/* Special purpose */
188static value r_stack, r_spool, r_error_cont, r_signal_handler, r_compiler,
189 r_compiler_expr, r_input_port, r_output_port, r_dump;
190static enum {
191 f_none,
192 f_compile,
193 f_apply,
194 f_force,
195 f_call_with_values,
196 f_values,
197} r_flag;
198
199/* Register aliases to make usage more readable. Some rules for validation:
200 * - A subroutine may use a single register under different aliases, but before
201 * it is read or used as an argument under one alias, it must have been:
202 * - Assigned or declared as a parameter under the same alias, and
203 * - Not meanwhile assigned under a different alias.
204 * - Parameter registers must be distinct.
205 */
206#define R_EXPR r0 /* expression being evaluated */
207#define R_ARGS r0 /* arguments to apply procedure to */
208
209#define R_ENV r1 /* evaluation environment */
210#define R_PROC r1 /* procedure to apply */
211#define R_PORT r1 /* argument to I/O routines */
212#define R_ARG r1
213
214#define R_RESULT r2 /* subroutine return value */
215#define R_LEXEME r2
216#define R_FORMALS r2
217#define R_WIND_TO r2
218
219#define R_VARNAME r3
220#define R_TAIL r3 /* last pair of a list being built */
221#define R_LCA r3
222
223#define R_OPERANDS r4
224#define R_SECOND_LAST r4
225
226#define R_CAR r5 /* argument to cons or push */
227
228#define R_CDR r6 /* argument to cons */
229#define R_ITER r6
230
231
232/*****************
233 * Syscall helpers
234 */
235
236static int open_cloexec(const char *path, int flags) {
237#ifdef HAVE_PIPE2
238 return open(path, flags | O_CLOEXEC, 0666);
239#else
240 /* Non-atomic version for systems lacking O_CLOEXEC. This doesn't currently matter, but if we go multi-threaded, we'll need locking for this and any exec family calls. And exec locking would be hard to enforce if this were linkable into larger programs. So basically there's a choice between portability or support for complexity. */
241 int fd = open(path, flags, 0666);
242 if (fd != -1) fcntl(fd, F_SETFD, FD_CLOEXEC);
243 return fd;
244#endif
245}
246
247static int pipe_cloexec(int pipefd[2]) {
248#ifdef HAVE_PIPE2
249 return pipe2(pipefd, O_CLOEXEC);
250#else
251 /* Non-atomic version for systems lacking pipe2 (see O_CLOEXEC comments above). */
252 if (pipe(pipefd) == -1) return -1;
253 fcntl(pipefd[0], F_SETFD, FD_CLOEXEC);
254 fcntl(pipefd[1], F_SETFD, FD_CLOEXEC);
255 return 0;
256#endif
257}
258
259/* Reliably catching close errors is NOT POSSIBLE on Linux and others. The call
260 * may block and be interrupted by a signal handler, yet cannot be retried as
261 * the FD is deallocated early. HPUX at least has the atypical behavior of
262 * leaving the FD open, so it would leak. Should figure out where exactly close
263 * can block. */
264static void blind_close(int fd) {
265 int saved_errno = errno;
266 close(fd);
267 errno = saved_errno;
268}
269
270static int poll1(int fd, short events, int timeout) {
271 int r;
272 struct pollfd sp;
273 sp.fd = fd;
274 sp.events = events;
275 while ((r = poll(&sp, 1, timeout)) == -1)
276 if (errno != EAGAIN && errno != EINTR) sc_perror();
277 return r;
278}
279
280static int write_all(int fd, const char *buf, ssize_t len) {
281 ssize_t n;
282 while ((n = write(fd, buf, len)) < len) {
283 if (n != -1) len -= n, buf += n;
284 else if (errno == EAGAIN || errno == EWOULDBLOCK)
285 poll1(fd, POLLOUT, -1);
286 else if (errno != EINTR) return -1;
287 }
288 return 0;
289}
290
291void sc_write_error(const char *msg) {
292 size_t len = strlen(msg);
293 if (len) write_all(2, msg, len);
294}
295#define write_err sc_write_error
296
297static void flush_all(void);
298
299__attribute__((noreturn))
300void sc_exit(int status) {
301 flush_all();
302 _exit(status);
303}
304
305
306/****************
307 * Error handling
308 */
309
310/* Failsafe error handler */
311
312__attribute__((noreturn))
313static void fatal(const char *msg) {
314 write_err("FATAL: ");
315 write_err(msg);
316 write_err("\n");
317 sc_exit(1);
318}
319
320__attribute__((noreturn))
321static void fatal1(const char *msg, const char *detail) {
322 write_err("FATAL: ");
323 write_err(msg);
324 write_err(": ");
325 write_err(detail);
326 write_err("\n");
327 sc_exit(1);
328}
329
330__attribute__((noreturn))
331void sc_error(const char *msg) { sc_error1(msg, UNDEFINED); }
332
333__attribute__((noreturn))
334void sc_perror(void) { sc_error(strerror(errno)); }
335
336__attribute__((noreturn))
337void sc_perror1(value detail) { sc_error1(strerror(errno), detail); }
338
339static int chkp(int r) { if (r == -1) sc_perror(); return r; }
340
341static const char *fmt_ulong_dec(ulong);
342
343__attribute__((noreturn))
344void sc_assert_fail(const char *file, ulong line, const char *func,
345 const char *expr) {
346 const char *sep = ": ";
347 static int aborting = 0;
348 if (!aborting) flush_all();
349 aborting = 1;
350 write_err("Assertion failed: ");
351 write_err(file); write_err(sep);
352 write_err(fmt_ulong_dec(line)); write_err(sep);
353 write_err(func); write_err(sep);
354 write_err(expr); write_err("\n");
355 abort();
356}
357
358/* various common errors */
359
360__attribute__((noreturn))
361static void not_a_number(value v) { sc_error1("not a number:", v); }
362
363
364/*******************************
365 * Garbage collector & allocator
366 */
367
368/* Heap discipline:
369 *
370 * This garbage collector uses the stop-and-copy (Minsky-Fenichel-Yochelson)
371 * method. Because it relocates values into a new heap and is triggered by
372 * allocation, any function that directly or indirectly calls sc_malloc cannot
373 * keep pointer types (T_PAIR, T_IMMUT_PAIR, T_EXTENDED) in local variables
374 * across such calls, as the addresses may be invalidated. The Scheme stack,
375 * registers, or otherwise statically stored variables registered as roots must
376 * be used instead.
377 *
378 * Such functions will generally be constructors and take their arguments
379 * through the stack or registers. Notably included are push and cons.
380 * Specifically not included are pop, peek, drop, car, cdr, set_car and
381 * set_cdr.
382 *
383 * The reward for this trouble is fast and compacting garbage collection.
384 */
385
386static value *heap, *new_heap;
387static value heap_size, free_ptr;
388
389#define ROOTS_ALLOC 48
390static value *roots[ROOTS_ALLOC];
391static value roots_fill;
392
393static void gc_root(value *handle) {
394 if (roots_fill >= ROOTS_ALLOC) fatal("insufficient ROOTS_ALLOC");
395 roots[roots_fill] = handle;
396 ++roots_fill;
397}
398
399static value ext_obj_size(value header) {
400 switch (ext_tag(header)) {
401 case T_IMMUT_STRING:
402 case T_STRING: return 1 + packed_str_len(ext_untag(header));
403 case T_IMMUT_VECTOR:
404 case T_VECTOR: return 1 + ext_untag(header);
405 case T_VARIABLE_REF: return 2;
406 case T_SYMBOL: return 1 + packed_str_len(ext_untag(header));
407 case T_BUILTIN: return 3;
408 case T_PROCEDURE: return 4;
409 case T_CONTINUATION: return 3;
410 case T_PROMISE: return 3;
411 case T_PORT: return 6;
412 case T_FLONUM: return 1 + FLONUM_CELLS;
413 case T_BIGNUM:
414 case T_NEG_BIGNUM: return 1 + ext_untag(header);
415 case T_RATIONAL: return 3;
416 case T_COMPLEX: return 3;
417 default: fatal("BUG: invalid extended tag");
418 }
419}
420
421/* Process one cell (in either a root or the new heap), returning number of
422 * cells to advance */
423static value scan_cell(value *scan_val) {
424 int scan_tag = tag(*scan_val);
425 value ptr, old_val, length;
426 assert(scan_tag != T_MOVED);
427 switch (scan_tag) {
428 case T_IMMUT_PAIR:
429 case T_PAIR:
430 case T_EXTENDED:
431 ptr = untag(*scan_val);
432 old_val = heap[ptr];
433 if (tag(old_val) == T_MOVED)
434 *scan_val = add_tag(untag(old_val), scan_tag);
435 else {
436 *scan_val = add_tag(free_ptr, scan_tag);
437 length = (scan_tag == T_EXTENDED) ? ext_obj_size(old_val) : 2;
438 memcpy(&new_heap[free_ptr], &heap[ptr], length*sizeof(value));
439 heap[ptr] = add_tag(free_ptr, T_MOVED);
440 free_ptr += length;
441 }
442 return 1;
443 case T_EXT_HEADER:
444 switch (ext_tag(*scan_val)) {
445 /* For compound types, skip the header and scan each element */
446 case T_IMMUT_VECTOR:
447 case T_VECTOR:
448 case T_VARIABLE_REF:
449 case T_PROCEDURE:
450 case T_CONTINUATION:
451 case T_PROMISE:
452 case T_PORT:
453 case T_RATIONAL:
454 case T_COMPLEX:
455 return 1;
456 /* Otherwise skip the whole blob */
457 default:
458 return ext_obj_size(*scan_val);
459 }
460 default:
461 /* All other principal types are immediate values */
462 return 1;
463 }
464}
465
466uint sc_gc_verbose = 0, sc_gc_thrash_factor = 16;
467
468void sc_gc(void) {
469 value root, scan_ptr, *tmp;
470 if (sc_gc_verbose) {
471 static ulong gc_count = 0;
472 write_err("GC: cycle ");
473 write_err(fmt_ulong_dec(++gc_count));
474 write_err(" | ");
475 }
476 free_ptr = 0;
477 for (root = 0; root < roots_fill; ++root) scan_cell(roots[root]);
478 for (scan_ptr = 0; scan_ptr < free_ptr;
479 scan_ptr += scan_cell(&new_heap[scan_ptr]))
480 assert(free_ptr <= heap_size);
481 tmp = heap;
482 heap = new_heap;
483 new_heap = tmp;
484 if (sc_gc_verbose) {
485 /* using floating point to avoid overflow */
486 double live_bytes = free_ptr*sizeof(value);
487 double live_pct = 100.*free_ptr/heap_size;
488 write_err(fmt_ulong_dec(free_ptr));
489 write_err(" cells | ");
490 write_err(fmt_ulong_dec((live_bytes+1023.)/1024.));
491 write_err("K | ");
492 write_err(fmt_ulong_dec(live_pct));
493 write_err(".");
494 write_err(fmt_ulong_dec(((unsigned)(10.*live_pct))%10));
495 write_err("% live\n");
496 }
497}
498
499static value sc_malloc(size_t cells) {
500 value result = free_ptr;
501 free_ptr += cells;
502 if (free_ptr > heap_size) {
503 sc_gc();
504 result = free_ptr;
505 free_ptr += cells;
506 if (free_ptr > (heap_size - heap_size/sc_gc_thrash_factor)) {
507 /* Clear registers in hopes of freeing space. While not guaranteed,
508 * this can help simple cases like recovering the REPL after a
509 * runaway recursion. */
510 r0 = r1 = r2 = r3 = r4 = r5 = r6 = r_stack = SC_NULL;
511 sc_error("out of memory");
512 }
513 }
514 return result;
515}
516
517
518/*************************
519 * Scheme stack operations
520 */
521
522/* Push R_CAR onto the stack (no other side effects) */
523static void push(void) {
524 value new_stack = sc_malloc(2);
525 heap[new_stack] = R_CAR;
526 heap[new_stack+1] = r_stack;
527 r_stack = add_tag(new_stack, T_PAIR);
528}
529
530/* Shorthand to push an arbitrary value */
531#define PUSH(val) { R_CAR = (val); push(); }
532
533/* Remove the top of the stack */
534static void drop(void) {
535 r_stack = cdr(r_stack);
536}
537
538/* Return the top of the stack */
539static value peek(void) {
540 return car(r_stack);
541}
542
543/* Remove and return the top of the stack */
544static value pop(void) {
545 value v = car(r_stack);
546 r_stack = cdr(r_stack);
547 return v;
548}
549
550
551/***************************************************
552 * Builtin type constructors, predicates & accessors
553 */
554
555static int is_ext_type(value v, uint t) {
556 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == t;
557}
558
559static int is_mutable(value v) {
560 int t = tag(v);
561 if (t != T_EXTENDED) return t == T_PAIR;
562 t = ext_tag(heap[untag(v)]);
563 return t == T_STRING || t == T_VECTOR;
564}
565
566/* Booleans */
567
568static value boolean(int b) { return b ? SC_TRUE : SC_FALSE; }
569static int is_boolean(value v) { return v == SC_TRUE || v == SC_FALSE; }
570
571/* Pairs & lists */
572
573/* Return a new pair from the values of R_CAR and R_CDR */
574static value cons(void) {
575 value p = sc_malloc(2);
576 heap[p] = R_CAR;
577 heap[p+1] = R_CDR;
578 return add_tag(p, T_PAIR);
579}
580static value cons_immutable(void) {
581 value p = sc_malloc(2);
582 heap[p] = R_CAR;
583 heap[p+1] = R_CDR;
584 return add_tag(p, T_IMMUT_PAIR);
585}
586static int is_pair(value v) { return (tag(v) | 1) == T_PAIR; }
587static value car(value p) {
588 assert(is_pair(p));
589 return heap[untag(p)];
590}
591static value cdr(value p) {
592 assert(is_pair(p));
593 return heap[untag(p)+1];
594}
595static void set_car(value p, value v) {
596 assert(is_pair(p));
597 heap[untag(p)] = v;
598}
599static void set_cdr(value p, value v) {
600 assert(is_pair(p));
601 heap[untag(p)+1] = v;
602}
603static value safe_car(value p) {
604 if (!is_pair(p)) sc_error1("not a pair:", p);
605 return car(p);
606}
607static value safe_cdr(value p) {
608 if (!is_pair(p)) sc_error1("not a pair:", p);
609 return cdr(p);
610}
611#define cadr(x) car(cdr(x))
612
613/* Safely compute the length of a list, returning -1 if not a proper list */
614static long safe_list_length(value v) {
615 /* Floyd's cycle-finding algorithm */
616 value slow = v, fast = v, length = 0;
617 while (is_pair(fast)) {
618 slow = cdr(slow);
619 fast = cdr(fast);
620 length++;
621 if (!is_pair(fast)) break;
622 fast = cdr(fast);
623 if (fast == slow) return -1; /* cycle */
624 length++;
625 }
626 if (fast != SC_NULL) return -1; /* improper list or not a pair */
627 return length;
628}
629static int is_list(value v) { return safe_list_length(v) >= 0; }
630
631/* Compute the length of a proper list */
632static value list_length(value l) {
633 value length = 0;
634 for (; l != SC_NULL; l = cdr(l)) length++;
635 return length;
636}
637
638/* Find the first node shared by two proper lists; that is, the LCA of two
639 * nodes in the parent-pointer tree rooted at the empty list. */
640static value lowest_common_ancestor(value a, value b) {
641 value al = list_length(a), bl = list_length(b);
642 if (al != bl) {
643 if (al > bl)
644 do a = cdr(a), --al; while (al > bl);
645 else
646 do b = cdr(b), --bl; while (bl > al);
647 }
648 while (a != b) a = cdr(a), b = cdr(b);
649 return a;
650}
651
652/* Numbers */
653
654static value fixnum_zero, fixnum_one;
655
656/* Not bounds checked! */
657static value fixnum(long n) { return add_tag(untag(n), T_FIXNUM); }
658static int is_fixnum(value v) { return tag(v) == T_FIXNUM; }
659static long fixnum_val(value v) {
660 assert(is_fixnum(v));
661 return untag_signed(v);
662}
663static ulong unsigned_fixnum_val(value v) {
664 assert(is_fixnum(v));
665 return untag(v);
666}
667static long safe_fixnum_val(value v) {
668 if (is_fixnum(v)) return untag_signed(v);
669 sc_error1("not an integer or out of bounds:", v);
670}
671
672static value flonum(double x) {
673 value f = sc_malloc(1 + FLONUM_CELLS);
674 heap[f] = ext_add_tag(0, T_FLONUM);
675 /* strict aliasing?
676 *((double *)&heap[f+1]) = x; */
677 memcpy(&heap[f+1], &x, sizeof x);
678 return add_tag(f, T_EXTENDED);
679}
680static int is_flonum(value v) { return is_ext_type(v, T_FLONUM); }
681static double flonum_val(value f) {
682 /* strict aliasing?
683 return *((double *)&heap[untag(f)+1]); */
684 double x;
685 assert(is_flonum(f));
686 memcpy(&x, &heap[untag(f)+1], sizeof x);
687 return x;
688}
689
690static value make_bignum_uninit(value len, int neg) {
691 value ptr;
692 if (len > EXT_LENGTH_MAX) sc_error("length too large for bignum");
693 ptr = sc_malloc(1 + len);
694 heap[ptr] = ext_add_tag(len, T_BIGNUM | neg);
695 return add_tag(ptr, T_EXTENDED);
696}
697static int is_bignum(value v) {
698 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) ==
699 T_NEG_BIGNUM;
700}
701static value bignum_len(value n) {
702 assert(is_bignum(n));
703 return ext_untag(heap[untag(n)]);
704}
705static value bignum_ref(value n, value k) {
706 assert(k < bignum_len(n));
707 return heap[untag(n)+k+1];
708}
709static void bignum_set(value n, value k, value word) {
710 assert(k < bignum_len(n));
711 assert(is_fixnum(word));
712 heap[untag(n)+k+1] = word;
713}
714static int is_bignum_negative(value n) {
715 assert(is_bignum(n));
716 return ext_tag(heap[untag(n)]) & 1;
717}
718static value bignum_set_negative(value n) {
719 assert(is_bignum(n));
720 heap[untag(n)] |= (1UL << EXT_VAL_BITS);
721 return n;
722}
723/* Truncate bignum in place (consider carefully how GC works) */
724static value bignum_truncate(value n, value len) {
725 assert(len <= bignum_len(n));
726 value ptr = untag(n);
727 heap[ptr] = ext_add_tag(len, ext_tag(heap[ptr]));
728 return n;
729}
730
731static int is_rational(value v) { return is_ext_type(v, T_RATIONAL); }
732
733static int is_exact(value v) {
734 return is_fixnum(v) || is_bignum(v) || is_rational(v);
735}
736static int is_number(value v) {
737 return is_fixnum(v) ||
738 (tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) >= T_FLONUM);
739}
740static int is_integer(value v) {
741 if (is_fixnum(v) || is_bignum(v)) return 1;
742 if (is_flonum(v)) {
743 double f = flonum_val(v);
744 return f == nearbyint(f);
745 }
746 return 0;
747}
748
749/* Characters */
750
751static value character(uchar c) { return add_tag(c, T_CHARACTER); }
752static int is_character(value v) { return tag(v) == T_CHARACTER; }
753static uchar safe_char_val(value c) {
754 if (!is_character(c)) sc_error1("not a character:", c);
755 return (uchar)c;
756}
757#define char_val(c) ((uchar)(c))
758
759/* Convert ASCII characters to upper/lowercase */
760static uchar uc(uchar c) {
761 if (c >= 'a' && c <= 'z') return c - 0x20;
762 return c;
763}
764static uchar lc(uchar c) {
765 if (c >= 'A' && c <= 'Z') return c + 0x20;
766 return c;
767}
768
769/* Strings */
770
771static value alloc_string(value len) {
772 if (len > EXT_LENGTH_MAX)
773 sc_error("length negative or too large for string");
774 return sc_malloc(1 + packed_str_len(len));
775}
776static value make_string_uninit(value len) {
777 value ptr = alloc_string(len);
778 heap[ptr] = ext_add_tag(len, T_STRING);
779 return add_tag(ptr, T_EXTENDED);
780}
781static value make_immutable_string(value len) {
782 value ptr = alloc_string(len);
783 heap[ptr] = ext_add_tag(len, T_IMMUT_STRING);
784 return add_tag(ptr, T_EXTENDED);
785}
786static int is_string(value v) {
787 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) == T_STRING;
788}
789static int is_mutable_string(value v) {
790 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == T_STRING;
791}
792static int is_symbol(value);
793static uchar * string_buf(value s) {
794 assert(is_string(s) || is_symbol(s));
795 return (uchar *)&heap[untag(s)+1];
796}
797/* C thinks strings are made of signed chars for some reason... */
798static char * c_string_buf(value s) {
799 assert(is_string(s) || is_symbol(s));
800 return (char *)string_buf(s);
801}
802static value string_len(value s) {
803 assert(is_string(s) || is_symbol(s));
804 return ext_untag(heap[untag(s)]);
805}
806/* Construct string from null-terminated C string not on the Scheme heap */
807static value string(const char *c_str) {
808 value len = strlen(c_str);
809 value str = make_string_uninit(len);
810 memcpy(string_buf(str), c_str, len);
811 return str;
812}
813static value make_string(value len, uchar fill) {
814 value s = make_string_uninit(len);
815 memset(string_buf(s), fill, len);
816 return s;
817}
818/* Construct immutable copy of string or symbol in R_EXPR */
819static value string_copy_immutable(void) {
820 value len = string_len(R_EXPR), ptr = alloc_string(len);
821 heap[ptr] = ext_add_tag(len, T_IMMUT_STRING);
822 memcpy(heap+ptr+1, string_buf(R_EXPR), len);
823 return add_tag(ptr, T_EXTENDED);
824}
825/* Construct copy of string in R_EXPR */
826static value string_copy(void) {
827 value len = string_len(R_EXPR);
828 value result = make_string_uninit(len);
829 memcpy(string_buf(result), string_buf(R_EXPR), len);
830 return result;
831}
832/* Construct copy of string in R_EXPR with null byte appended */
833static value string_append_null(void) {
834 value len = string_len(R_EXPR);
835 value result = make_string_uninit(len + 1);
836 uchar *buf = string_buf(result);
837 memcpy(buf, string_buf(R_EXPR), len);
838 buf[len] = '\0';
839 return result;
840}
841/* Truncate string in place (consider carefully how GC works) */
842static void string_truncate(value s, value len) {
843 assert(len <= string_len(s));
844 value ptr = untag(s);
845 heap[ptr] = ext_add_tag(len, ext_tag(heap[ptr]));
846}
847
848/* Symbols */
849
850static value symbols; /* interning list */
851
852/* Frequently used symbols */
853static value s_lambda, s_quote, s_quasiquote, s_unquote, s_unquote_splicing,
854 s_if, s_set, s_begin, s_letrec, s_define, s_delay, s_literal,
855 s_open_paren, s_close_paren, s_dot, s_open_vector, s_identifier,
856 s_named_char, s_abbrev, s_number, s_truncate, s_overwrite,
857 s_append, s_sync, s_data_sync;
858
859static value find_symbol(const uchar *buf, value len) {
860 value iter, sym;
861 /* some type checks skipped because interning list is not (directly) user
862 * modifiable */
863 for (iter = symbols; iter != SC_NULL; iter = cdr(iter)) {
864 sym = car(iter);
865 if (len == ext_untag(heap[untag(sym)]) &&
866 memcmp(buf, &heap[untag(sym)+1], len) == 0)
867 return sym;
868 }
869 return SC_NULL;
870}
871/* Get symbol from a null-terminated C string not on the Scheme heap, not
872 * converting case (side effects: R_CAR R_CDR) */
873static value symbol(const char *c_str) {
874 value len = strlen(c_str);
875 value sym = find_symbol((uchar *)c_str, len);
876 if (sym != SC_NULL) return sym;
877 value sym_ptr = sc_malloc(1 + packed_str_len(len));
878 heap[sym_ptr] = ext_add_tag(len, T_SYMBOL);
879 memcpy(&heap[sym_ptr+1], c_str, len);
880 R_CAR = add_tag(sym_ptr, T_EXTENDED);
881 R_CDR = symbols;
882 symbols = cons();
883 return R_CAR;
884}
885/* Get symbol from a Scheme string in R_CAR, not converting case
886 * (side effects: R_CAR R_CDR) */
887static value string_to_symbol(void) {
888 value len = string_len(R_CAR);
889 value sym = find_symbol(string_buf(R_CAR), len);
890 if (sym != SC_NULL) return sym;
891 value sym_ptr = sc_malloc(1 + packed_str_len(len));
892 heap[sym_ptr] = ext_add_tag(len, T_SYMBOL);
893 memcpy(&heap[sym_ptr+1], string_buf(R_CAR), len);
894 R_CAR = add_tag(sym_ptr, T_EXTENDED);
895 R_CDR = symbols;
896 symbols = cons();
897 return R_CAR;
898}
899static int is_symbol(value v) { return is_ext_type(v, T_SYMBOL); }
900
901/* Vectors */
902
903static value alloc_vector(value len) {
904 if (len > EXT_LENGTH_MAX)
905 sc_error("length negative or too large for vector");
906 return sc_malloc(1 + len);
907}
908/* Uninitialized constructors: caller must fill without further allocation */
909static value make_vector_uninit(value len) {
910 value vec = alloc_vector(len);
911 heap[vec] = ext_add_tag(len, T_VECTOR);
912 return add_tag(vec, T_EXTENDED);
913}
914static value make_immutable_vector(value len) {
915 value vec = alloc_vector(len);
916 heap[vec] = ext_add_tag(len, T_IMMUT_VECTOR);
917 return add_tag(vec, T_EXTENDED);
918}
919/* Build a new vector with each element initialized to R_EXPR */
920static value make_vector(value len) {
921 value vec = make_vector_uninit(len), *p;
922 for (p = heap+untag(vec)+1; len; --len, ++p) *p = R_EXPR;
923 return vec;
924}
925/* Build a new vector by reversing the elements of proper list R_EXPR */
926static value rev_list_to_vec(void) {
927 value len = list_length(R_EXPR),
928 vec = make_vector_uninit(len),
929 *p = heap+untag(vec)+len;
930 for (; R_EXPR != SC_NULL; --p, R_EXPR = cdr(R_EXPR)) *p = car(R_EXPR);
931 return vec;
932}
933static int is_vector(value v) {
934 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) == T_VECTOR;
935}
936static int is_mutable_vector(value v) {
937 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == T_VECTOR;
938}
939static value vector_len(value v) {
940 assert(is_vector(v));
941 return ext_untag(heap[untag(v)]);
942}
943static value vector_ref(value v, value k) {
944 assert(k < vector_len(v));
945 return heap[untag(v)+k+1];
946}
947static void vector_set(value v, value k, value obj) {
948 assert(k < vector_len(v));
949 heap[untag(v)+k+1] = obj;
950}
951
952/* Builtin procedures */
953
954static value builtin(const char *name, builtin_func_t func) {
955 value b = sc_malloc(3);
956 heap[b] = ext_add_tag(0, T_BUILTIN);
957 heap[b+1] = (value)name;
958 heap[b+2] = (value)func;
959 return add_tag(b, T_EXTENDED);
960}
961static int is_builtin(value v) { return is_ext_type(v, T_BUILTIN); }
962static const char * builtin_name(value b) {
963 return (char *)heap[untag(b)+1];
964}
965static builtin_func_t builtin_func(value b) {
966 return (builtin_func_t)heap[untag(b)+2];
967}
968
969/* Compound procedures */
970
971/* Return a new procedure object from lambda expression operands in R_OPERANDS
972 * and environment in R_ENV.
973 * Side effects: R_OPERANDS R_CAR R_CDR */
974static value procedure(void) {
975 value p, arity;
976 arity = car(R_OPERANDS);
977 if (is_fixnum(arity)) {
978 /* Compiler annotated parameter list attributes to save a traversal */
979 R_OPERANDS = cdr(R_OPERANDS);
980 }
981 else {
982 /* ...this traversal (still needed for bootstrapping), which in turn
983 * saves traversing each time the procedure is applied */
984 p = arity; /* parameter list */
985 arity = 0;
986 for (; is_pair(p); p = cdr(p)) arity++;
987 if (p == SC_NULL) arity = fixnum(arity);
988 else {
989 /* improper (variadic) */
990 assert(is_symbol(p));
991 arity = (value)(-1L - (long)arity);
992 }
993 }
994 p = sc_malloc(4);
995 heap[p] = ext_add_tag(ext_untag(arity), T_PROCEDURE);
996 heap[p+1] = car(R_OPERANDS); /* parameter list */
997 heap[p+2] = cdr(R_OPERANDS); /* body */
998 heap[p+3] = R_ENV;
999 return add_tag(p, T_EXTENDED);
1000}
1001static int is_compound_proc(value v) { return is_ext_type(v, T_PROCEDURE); }
1002static long proc_arity(value p) { return ext_untag_signed(heap[untag(p)]); }
1003static value proc_params(value p) { return heap[untag(p)+1]; }
1004static value proc_body(value p) { return heap[untag(p)+2]; }
1005static value proc_env(value p) { return heap[untag(p)+3]; }
1006
1007/* Continuations */
1008
1009static value current_continuation(void) {
1010 value cont = sc_malloc(3);
1011 heap[cont] = ext_add_tag(0, T_CONTINUATION);
1012 heap[cont+1] = r_stack;
1013 heap[cont+2] = r_spool;
1014 return add_tag(cont, T_EXTENDED);
1015}
1016static int is_continuation(value v) { return is_ext_type(v, T_CONTINUATION); }
1017static value continuation_stack(value c) { return heap[untag(c)+1]; }
1018static value continuation_spool(value c) { return heap[untag(c)+2]; }
1019
1020static int is_procedure(value v) {
1021 return is_builtin(v) || is_compound_proc(v) || is_continuation(v);
1022}
1023
1024/* Promises */
1025
1026/* Construct a promise from an expression in R_EXPR and environment in R_ENV */
1027static value promise(void) {
1028 value p = sc_malloc(3);
1029 heap[p] = ext_add_tag(0, T_PROMISE);
1030 heap[p+1] = R_EXPR;
1031 heap[p+2] = R_ENV;
1032 return add_tag(p, T_EXTENDED);
1033}
1034static int is_promise(value v) { return is_ext_type(v, T_PROMISE); }
1035static int promise_done(value p) { return heap[untag(p)] & 1; }
1036static value promise_value(value p) { return heap[untag(p)+1]; }
1037static value promise_env(value p) { return heap[untag(p)+2]; }
1038static void promise_memoize(value p, value v) {
1039 value ptr = untag(p);
1040 heap[ptr] = ext_add_tag(1, T_PROMISE);
1041 heap[ptr+1] = v;
1042 heap[ptr+2] = SC_NULL; /* release to GC */
1043}
1044
1045/* Ports */
1046
1047static value stdin_port, stdout_port;
1048
1049#define DEFAULT_R_BUF 4096
1050#define DEFAULT_W_BUF 4096
1051
1052/* Flags in header */
1053#define PORT_OUTPUT_BIT 1
1054#define PORT_SOCKET_BIT 2
1055#define PORT_EOF_BIT 4
1056
1057/* Fields */
1058#define PORT_FD 1
1059#define PORT_START 2
1060#define PORT_FILL 3
1061#define PORT_BUF 4
1062#define PORT_COUNTERPART 5
1063
1064/* Construct unidirectional port. Side effects: R_RESULT */
1065static value make_port(int fd, int is_output, long buf_size) {
1066 value port, *p;
1067 if (buf_size < 1) sc_error("buffer size must be at least one");
1068 R_RESULT = make_string_uninit(buf_size);
1069 port = sc_malloc(6);
1070 p = heap+port;
1071 p[0] = ext_add_tag(is_output ? PORT_OUTPUT_BIT : 0, T_PORT);
1072 p[PORT_FD] = fixnum(fd);
1073 p[PORT_START] = fixnum(0);
1074 p[PORT_FILL] = fixnum(0);
1075 p[PORT_BUF] = R_RESULT;
1076 p[PORT_COUNTERPART] = SC_NULL;
1077 return add_tag(port, T_EXTENDED);
1078}
1079/* Construct input port in r0 and output port in r1 from socket file
1080 * descriptor. Side effects: R_RESULT */
1081static void make_socket_ports(int fd, value rbuf_size, value wbuf_size) {
1082 value *p;
1083 chkp(fcntl(fd, F_SETFL, O_NONBLOCK));
1084 r0 = make_port(fd, 0, rbuf_size);
1085 r1 = make_port(fd, 1, wbuf_size);
1086 /* Cross-reference the two directions so the underlying FD can be closed
1087 * promptly when both ports are. */
1088 p = heap+untag(r0);
1089 p[0] |= PORT_SOCKET_BIT;
1090 p[PORT_COUNTERPART] = r1;
1091 p = heap+untag(r1);
1092 p[0] |= PORT_SOCKET_BIT;
1093 p[PORT_COUNTERPART] = r0;
1094}
1095
1096static int is_port(value v) { return is_ext_type(v, T_PORT); }
1097static int is_input_port(value v) {
1098 value header;
1099 if (tag(v) != T_EXTENDED) return 0;
1100 header = heap[untag(v)];
1101 return ext_tag(header) == T_PORT && (header & PORT_OUTPUT_BIT) == 0;
1102}
1103static int is_output_port(value v) {
1104 value header;
1105 if (tag(v) != T_EXTENDED) return 0;
1106 header = heap[untag(v)];
1107 return ext_tag(header) == T_PORT && (header & PORT_OUTPUT_BIT) != 0;
1108}
1109
1110static int set_port_closed(value *p) {
1111 int fd = fixnum_val(p[PORT_FD]);
1112 /* Set an invalid FD so writes to a closed port are caught by the kernel
1113 * with no extra cost in the normal case. Disable buffering so they're
1114 * caught immediately. */
1115 p[PORT_FD] = fixnum(-1);
1116 p[PORT_START] = p[PORT_FILL] = fixnum(0);
1117 string_truncate(p[PORT_BUF], 1);
1118 if (p[PORT_COUNTERPART] == SC_NULL) return close(fd);
1119 heap[untag(p[PORT_COUNTERPART])+PORT_COUNTERPART] = SC_NULL;
1120 p[PORT_COUNTERPART] = SC_NULL;
1121 return 0;
1122}
1123static ssize_t fill_input_port(value *p, int nonblock) {
1124 int fd = fixnum_val(p[PORT_FD]);
1125 uchar *buf = string_buf(p[PORT_BUF]);
1126 value len = string_len(p[PORT_BUF]);
1127 ssize_t n;
1128 while ((n = read(fd, buf, len)) < 0) {
1129 if (errno == EINTR) continue;
1130 if (errno == EAGAIN || errno == EWOULDBLOCK) {
1131 if (nonblock) return -1;
1132 poll1(fd, POLLIN, -1); continue;
1133 }
1134 if (fd == -1) sc_error("input port closed");
1135 sc_perror();
1136 }
1137 p[PORT_START] = fixnum(0);
1138 p[PORT_FILL] = fixnum(n);
1139 return n;
1140}
1141static void flush_output_port(value *p) {
1142 int fd = fixnum_val(p[PORT_FD]);
1143 long fill = fixnum_val(p[PORT_FILL]);
1144 assert(fill > 0); /* zero-length write unspecified on non-regular files */
1145 assert((ulong)fill <= string_len(p[PORT_BUF]));
1146 p[PORT_FILL] = fixnum(0);
1147 if (write_all(fd, c_string_buf(p[PORT_BUF]), fill) == -1) {
1148 int saved;
1149 if (fd == -1) sc_error("output port closed");
1150 /* Probably no sensible way to recover from write errors, so force the
1151 * port closed. XXX Closing standard streams is a concern (i.e. a
1152 * subsequent open gets FD 1 or 2 and terminal output goes to the file
1153 * unexpectedly), except: 1) the interpreter writes to stdout through
1154 * the port object only; 2) the open-subprocess extension always pipes
1155 * the child's stdout; 3) there's no port for stderr. But these are
1156 * fragile assumptions. */
1157 saved = errno; set_port_closed(p); errno = saved;
1158 sc_perror();
1159 }
1160}
1161
1162static void flush_if_needed(value port) {
1163 value *p = heap+untag(port);
1164 if (fixnum_val(p[PORT_FILL]) > 0) flush_output_port(p);
1165}
1166static void close_port(value port) {
1167 value *p = heap+untag(port), header = p[0];
1168 int fd = fixnum_val(p[PORT_FD]);
1169 if (fd == -1) return;
1170 if (header & PORT_OUTPUT_BIT) flush_if_needed(port);
1171 if (header & PORT_SOCKET_BIT)
1172 shutdown(fd, header & PORT_OUTPUT_BIT ? SHUT_WR : SHUT_RD);
1173 chkp(set_port_closed(p));
1174}
1175static value read_char(value port) {
1176 value *p = heap+untag(port), start = p[PORT_START];
1177 uchar *buf = string_buf(p[PORT_BUF]);
1178 if (start == p[PORT_FILL]) {
1179 if (p[0] & PORT_EOF_BIT) { p[0] ^= PORT_EOF_BIT; return SC_EOF; }
1180 if (!fill_input_port(p, 0)) return SC_EOF;
1181 start = 0;
1182 }
1183 else start = untag(start);
1184 p[PORT_START] = fixnum(start+1);
1185 return character(buf[start]);
1186}
1187static value peek_char(value port) {
1188 value *p = heap+untag(port), start = p[PORT_START];
1189 uchar *buf = string_buf(p[PORT_BUF]);
1190 if (start == p[PORT_FILL]) {
1191 /* EOF is not always permanent, e.g. on a tty, so the condition must be
1192 * saved specially for the next peek or read. */
1193 if (p[0] & PORT_EOF_BIT) return SC_EOF;
1194 if (!fill_input_port(p, 0)) { p[0] |= PORT_EOF_BIT; return SC_EOF; }
1195 start = 0;
1196 }
1197 else start = untag(start);
1198 return character(buf[start]);
1199}
1200static value input_port_ready(value port) {
1201 value *p;
1202 int fd;
1203 p = heap+untag(port);
1204 fd = fixnum_val(p[PORT_FD]);
1205 if (p[PORT_START] < p[PORT_FILL]) return SC_TRUE;
1206 if (fd == -1) sc_error("input port closed");
1207 if (!poll1(fd, POLLIN, 0)) return SC_FALSE;
1208 /* XXX Linux poll/select are broken and have false positives for
1209 * readability, at least for sockets, so we try a nonblocking read. But
1210 * this doesn't work for regular files! Seems marginally better to break
1211 * "the next READ-CHAR operation on the given PORT is guaranteed not to
1212 * hang" than have CHAR-READY? itself hang. Alternately, djb's SIGALARM
1213 * hack could be used. */
1214 if (p[0] & PORT_SOCKET_BIT && fill_input_port(p, 1) == -1) return SC_FALSE;
1215 return SC_TRUE;
1216}
1217/* Barbarous relic from writing the lexer based on stdio/ungetc */
1218#define EOF (-1)
1219static void put_back_char(int c) {
1220 value *p;
1221 assert(is_port(R_PORT));
1222 p = heap+untag(R_PORT);
1223 if (c == EOF) p[0] |= PORT_EOF_BIT;
1224 else {
1225 value start = untag(p[PORT_START]);
1226 assert(start);
1227 --start;
1228 string_buf(p[PORT_BUF])[start] = c;
1229 p[PORT_START] = fixnum(start);
1230 }
1231}
1232static void write_char(uchar c) {
1233 value *p, fill, len;
1234 uchar *buf;
1235 assert(is_port(R_PORT));
1236 p = heap+untag(R_PORT);
1237 fill = untag(p[PORT_FILL]);
1238 len = string_len(p[PORT_BUF]);
1239 assert(fill < len);
1240 buf = string_buf(p[PORT_BUF]);
1241 buf[fill] = c;
1242 ++fill;
1243 p[PORT_FILL] = fixnum(fill);
1244 if (fill == len) flush_output_port(p);
1245}
1246
1247static int stdout_ready;
1248static void flush_all(void) {
1249 /* TODO */
1250 if (stdout_ready) flush_if_needed(stdout_port);
1251}
1252
1253static void write_cstr(const char *s) { for (; *s; ++s) write_char(*s); }
1254static void write_str(value s) { /* also for symbols */
1255 value len = string_len(s);
1256 uchar *buf = string_buf(s);
1257 assert(is_string(s) || is_symbol(s));
1258 for (; len; --len, ++buf) write_char(*buf);
1259}
1260static void write_str_quoted(value s) {
1261 value i, len = string_len(s);
1262 uchar *buf = string_buf(s);
1263 write_char('"');
1264 for (i = 0; i < len; i++) {
1265 uchar c = buf[i];
1266 if (c == '"' || c == '\\') write_char('\\');
1267 write_char(c);
1268 }
1269 write_char('"');
1270}
1271static void newline(void) { write_char('\n'); }
1272
1273/* Environments
1274 *
1275 * An environment is a list of lexical frames followed by global frames.
1276 *
1277 * A lexical frame is a vector of which the first element is the list of
1278 * symbols naming the variables (possibly improper, as in a lambda expression),
1279 * and the remaining elements are the corresponding values.
1280 *
1281 * A global frame is a list of (symbol . value) binding pairs. */
1282
1283static value r5rs_env, gscm_env, interaction_env, toplevel_env;
1284
1285static void check_mutable_env(value env, value name) {
1286 if (env != interaction_env) {
1287 assert(env == r5rs_env || env == gscm_env || env == toplevel_env);
1288 sc_error1("variable in immutable environment:", name);
1289 }
1290}
1291
1292/* Construct a new lexical frame for the application of the procedure in R_PROC
1293 * to the freshly allocated argument list in R_ARGS (no other side effects) */
1294static value make_lex_frame(void) {
1295 value k, frame, args, arity, fixed_arity;
1296 long encoded_arity = proc_arity(R_PROC);
1297 if (encoded_arity < 0) {
1298 arity = (value)(-encoded_arity);
1299 fixed_arity = arity - 1;
1300 }
1301 else {
1302 arity = (value)encoded_arity;
1303 fixed_arity = arity;
1304 }
1305 frame = make_vector_uninit(1 + arity);
1306 vector_set(frame, 0, proc_params(R_PROC));
1307 args = R_ARGS;
1308 for (k = 1; k <= fixed_arity; k++) {
1309 if (args == SC_NULL) sc_error("too few arguments");
1310 vector_set(frame, k, car(args));
1311 args = cdr(args);
1312 }
1313 if (fixed_arity < arity) vector_set(frame, k, args);
1314 else if (args != SC_NULL) sc_error("too many arguments");
1315 return frame;
1316}
1317
1318/* Construct a new lexical frame for a LETREC binding list in r2, that is, bind
1319 * the given names to not-yet-defined values. The name list is constructed in
1320 * reverse order.
1321 * Side effects: r2 R_CAR R_CDR */
1322static value make_letrec_frame(void) {
1323 /* TODO optimize: transpose the binding list? */
1324 value k, len, frame;
1325 R_CDR = SC_NULL;
1326 len = 1;
1327 for (; r2 != SC_NULL; r2 = cdr(r2)) {
1328 len++;
1329 R_CAR = car(car(r2));
1330 R_CDR = cons();
1331 }
1332 frame = make_vector_uninit(len);
1333 vector_set(frame, 0, R_CDR);
1334 for (k = 1; k < len; k++)
1335 vector_set(frame, k, UNDEFINED);
1336 return frame;
1337}
1338
1339/* Add a new binding for R_CAR to R_CDR to the topmost frame of global R_ENV.
1340 * Side effects: R_CAR R_CDR */
1341static void extend_global_env(void) {
1342 R_CAR = cons(); /* new binding */
1343 R_CDR = car(R_ENV); /* top frame */
1344 assert(is_pair(R_CDR) || R_CDR == SC_NULL);
1345 R_CDR = cons();
1346 set_car(R_ENV, R_CDR);
1347}
1348
1349/* Construct a new global frame containing copies of the bindings in the frame
1350 * in R_EXPR. Side effects: R_CAR R_CDR R_EXPR R_TAIL R_RESULT */
1351static value copy_global_frame(void) {
1352 value temp;
1353 R_CAR = R_CDR = SC_NULL;
1354 R_TAIL = R_RESULT = cons();
1355 for (; R_EXPR != SC_NULL; R_EXPR = cdr(R_EXPR)) {
1356 temp = car(R_EXPR);
1357 R_CAR = car(temp); R_CDR = cdr(temp);
1358 R_CAR = cons(); /* copied binding */
1359 R_CDR = SC_NULL;
1360 temp = cons();
1361 set_cdr(R_TAIL, temp);
1362 R_TAIL = temp;
1363 }
1364 return cdr(R_RESULT);
1365}
1366
1367static value global_frame_lookup(value name, value frame) {
1368 value binding;
1369 for (; frame != SC_NULL; frame = cdr(frame)) {
1370 binding = car(frame);
1371 if (car(binding) == name) return binding;
1372 }
1373 return SC_FALSE;
1374}
1375
1376static value lex_frame_lookup(value name, value frame) {
1377 value names, index;
1378 index = 1;
1379 for (names = vector_ref(frame, 0); is_pair(names); names = cdr(names)) {
1380 if (car(names) == name) goto found;
1381 index++;
1382 }
1383 if (names != name) return 0;
1384found:
1385 if (vector_ref(frame, 1) == UNDEFINED) /* see LETREC */
1386 sc_error1("undefined variable:", name);
1387 return index;
1388}
1389
1390static value env_lookup(value name, value env) {
1391 value frame, binding, index;
1392 assert(is_symbol(name));
1393 for (; env != SC_NULL; env = cdr(env)) {
1394 frame = car(env);
1395 if (is_vector(frame)) {
1396 index = lex_frame_lookup(name, frame);
1397 if (index) return vector_ref(frame, index);
1398 }
1399 else {
1400 binding = global_frame_lookup(name, frame);
1401 if (binding != SC_FALSE) return cdr(binding);
1402 }
1403 }
1404 sc_error1("unbound variable:", name);
1405}
1406
1407static void env_lookup_set(value name, value env, value new) {
1408 value frame, binding, index;
1409 assert(is_symbol(name));
1410 for (; env != SC_NULL; env = cdr(env)) {
1411 frame = car(env);
1412 if (is_vector(frame)) {
1413 index = lex_frame_lookup(name, frame);
1414 if (index) {
1415 vector_set(frame, index, new);
1416 return;
1417 }
1418 }
1419 else {
1420 binding = global_frame_lookup(name, frame);
1421 if (binding != SC_FALSE) {
1422 check_mutable_env(env, name);
1423 set_cdr(binding, new);
1424 return;
1425 }
1426 }
1427 }
1428 sc_error1("unbound variable:", name);
1429}
1430
1431/* Variable references: created by compiler to memoize environment lookups */
1432
1433static int is_variable_ref(value v) { return is_ext_type(v, T_VARIABLE_REF); }
1434
1435/* Return an unresolved variable reference for a symbol in R_CAR */
1436static value make_variable_ref() {
1437 assert(is_symbol(R_CAR));
1438 value ref = sc_malloc(2);
1439 heap[ref] = ext_add_tag(0, T_VARIABLE_REF);
1440 heap[ref+1] = R_CAR;
1441 return add_tag(ref, T_EXTENDED);
1442}
1443
1444/* Look up an unresolved variable reference and memoize */
1445static void resolve_variable_ref(value ref, value env, int mutable) {
1446 value ptr, name, frame, height, binding, index;
1447 ptr = untag(ref);
1448 name = heap[ptr+1];
1449 assert(is_symbol(name));
1450 height = 0;
1451 for (; env != SC_NULL; env = cdr(env)) {
1452 frame = car(env);
1453 if (is_vector(frame)) {
1454 index = lex_frame_lookup(name, frame);
1455 if (index) {
1456 if (height > FIXNUM_MAX)
1457 /* maybe possible on small architectures */
1458 sc_error("environment too deep");
1459 heap[ptr] = ext_add_tag(index, T_VARIABLE_REF);
1460 heap[ptr+1] = add_tag(height, T_FIXNUM);
1461 return;
1462 }
1463 }
1464 else {
1465 binding = global_frame_lookup(name, frame);
1466 if (binding != SC_FALSE) {
1467 if (mutable) check_mutable_env(env, name);
1468 heap[ptr+1] = binding;
1469 return;
1470 }
1471 }
1472 height++;
1473 }
1474 sc_error1("unbound variable:", name);
1475}
1476
1477static value variable_ref_get(value ref, value env) {
1478 value ptr, contents, height;
1479 ptr = untag(ref);
1480retry:
1481 contents = heap[ptr+1];
1482 if (is_pair(contents)) /* global */
1483 return cdr(contents);
1484 else if (is_fixnum(contents)) { /* lexical */
1485 for (height = fixnum_val(contents); height; height--)
1486 env = cdr(env);
1487 return vector_ref(car(env), ext_untag(heap[ptr]));
1488 }
1489 else { /* unresolved */
1490 resolve_variable_ref(ref, env, 0);
1491 goto retry;
1492 }
1493}
1494
1495static void variable_ref_set(value ref, value env, value new) {
1496 value ptr, contents, height;
1497 ptr = untag(ref);
1498retry:
1499 contents = heap[ptr+1];
1500 if (is_pair(contents)) /* global */
1501 set_cdr(contents, new);
1502 else if (is_fixnum(contents)) { /* lexical */
1503 for (height = fixnum_val(contents); height; height--)
1504 env = cdr(env);
1505 vector_set(car(env), ext_untag(heap[ptr]), new);
1506 }
1507 else { /* unresolved */
1508 resolve_variable_ref(ref, env, 1);
1509 goto retry;
1510 }
1511}
1512
1513
1514/***********
1515 * Debugging
1516 */
1517
1518static void shallow_print(void);
1519
1520void sc_dump(value v) {
1521 r_dump = v;
1522 PUSH(R_CAR);
1523 PUSH(R_EXPR);
1524 PUSH(R_PORT);
1525 R_EXPR = r_dump;
1526 R_PORT = stdout_port;
1527 shallow_print();
1528 newline();
1529 R_PORT = pop();
1530 R_EXPR = pop();
1531 R_CAR = pop();
1532 r_dump = SC_NULL;
1533}
1534
1535
1536/****************
1537 * Core evaluator
1538 *
1539 * The evaluator is a set of subroutines delimited by labels, with "switch"
1540 * cases serving as pushable return addresses. (Caution is needed in case of
1541 * nested switches or "break".) Properly tail recursive calls are where "goto"
1542 * is used rather than CALL, that is, a new return address is not pushed.
1543 * Nothing else may be left on the subroutine's stack frame in these cases!
1544 */
1545
1546/* Shorthand for non-tail subroutine calls. Beware of the register side effects
1547 * or confusing RETURN with C return. */
1548#define CALL(subroutine_label, return_address) \
1549 { R_CAR = return_address; push(); goto subroutine_label; }
1550#define RETURN(val) { R_RESULT = (val); goto dispatch; }
1551
1552/* Return addresses */
1553#define EV_DONE 0
1554#define EV_COMPILE_RESULT 1
1555#define EV_CALL_OPERATOR 2
1556#define EV_CALL_LOOP 3
1557#define EV_UNWIND_LOOP 4
1558#define EV_REWIND_LOOP 5
1559#define EV_SEQ_LOOP 6
1560#define EV_IF_PREDICATE 7
1561#define EV_SET_RESULT 8
1562#define EV_LETREC_LOOP 9
1563#define EV_DEFINE_RESULT 10
1564#define EV_FORCE_RESULT 11
1565#define EV_CALL_WITH_VALUES 12
1566
1567static const char *err_context;
1568static jmp_buf err_longjmp_env;
1569
1570/* Takes expression in R_EXPR and environment in R_ENV */
1571static void evaluator(void) {
1572 value k;
1573 if (setjmp(err_longjmp_env)) goto APPLY;
1574 if (r_compiler) CALL(COMPILE, EV_DONE);
1575 CALL(EVAL, EV_DONE);
1576dispatch:
1577 switch (pop()) {
1578 case EV_DONE:
1579 assert(r_stack == SC_NULL);
1580 r_error_cont = SC_NULL;
1581 break;
1582
1583COMPILE:
1584 /* Compile expression R_EXPR then evaluate in environment R_ENV */
1585 PUSH(R_ENV);
1586 R_CAR = R_EXPR;
1587 R_CDR = SC_NULL;
1588 R_ARGS = cons();
1589 R_PROC = r_compiler;
1590 CALL(APPLY, EV_COMPILE_RESULT);
1591 case EV_COMPILE_RESULT:
1592 R_EXPR = R_RESULT;
1593 R_ENV = pop();
1594 goto EVAL;
1595
1596EVAL:
1597 /* Evaluate expression R_EXPR in environment R_ENV */
1598 err_context = "eval";
1599 if (is_pair(R_EXPR)) { /* Combination */
1600 R_OPERANDS = cdr(R_EXPR);
1601 R_EXPR = car(R_EXPR);
1602 if (is_symbol(R_EXPR)) {
1603 if (R_EXPR == s_lambda) RETURN(procedure());
1604 if (R_EXPR == s_if) goto IF;
1605 if (R_EXPR == s_set) goto SET;
1606 if (R_EXPR == s_begin) goto EVAL_BODY;
1607 if (R_EXPR == s_letrec) goto LETREC;
1608 if (R_EXPR == s_quote) RETURN(car(R_OPERANDS));
1609 if (R_EXPR == s_define) goto DEFINE;
1610 if (R_EXPR == s_delay) goto DELAY;
1611 }
1612 goto EVAL_CALL;
1613 }
1614 if (is_variable_ref(R_EXPR)) /* Cacheable variable reference */
1615 RETURN(variable_ref_get(R_EXPR, R_ENV));
1616 if (is_symbol(R_EXPR))
1617 /* Slow and stupid variable lookup: replacing symbols in the
1618 * expression tree with variable references is done by the
1619 * compiler, so this is needed to bootstrap */
1620 RETURN(env_lookup(R_EXPR, R_ENV));
1621 assert(is_number(R_EXPR) ||
1622 is_boolean(R_EXPR) ||
1623 is_character(R_EXPR) ||
1624 is_string(R_EXPR) ||
1625 /* not valid Scheme, but allowed in compiler output */
1626 R_EXPR == SC_NULL ||
1627 is_vector(R_EXPR));
1628 RETURN(R_EXPR); /* Self-evaluating */
1629
1630EVAL_CALL:
1631 /* Procedure call (operator operand ...)
1632 * Evaluate operator in R_EXPR and each operand in R_OPERANDS, build
1633 * argument list and apply in R_ENV. */
1634 PUSH(R_OPERANDS);
1635 PUSH(R_ENV);
1636 CALL(EVAL, EV_CALL_OPERATOR);
1637 case EV_CALL_OPERATOR:
1638 R_ENV = pop();
1639 R_CAR = R_RESULT;
1640 R_OPERANDS = pop();
1641 push(); /* evaluated operator */
1642 R_CAR = R_CDR = SC_NULL;
1643 R_TAIL = cons(); /* arg list tail pointer */
1644 PUSH(R_TAIL); /* arg list head pointer */
1645 PUSH(R_ENV);
1646 for (; R_OPERANDS != SC_NULL; R_OPERANDS = cdr(R_OPERANDS)) {
1647 PUSH(R_OPERANDS);
1648 PUSH(R_TAIL);
1649 R_EXPR = car(R_OPERANDS);
1650 CALL(EVAL, EV_CALL_LOOP);
1651 case EV_CALL_LOOP:
1652 R_CAR = R_RESULT;
1653 R_TAIL = pop();
1654 R_OPERANDS = pop();
1655 R_ENV = peek();
1656 R_CDR = SC_NULL;
1657 R_CDR = cons();
1658 set_cdr(R_TAIL, R_CDR);
1659 R_TAIL = R_CDR;
1660 }
1661 drop(); /* environment */
1662 R_ARGS = cdr(pop()); /* arg list head pointer */
1663 R_PROC = pop(); /* evaluated operator */
1664 goto APPLY;
1665
1666APPLY:
1667 /* Extend the lexical environment of procedure R_PROC by binding its
1668 * formal parameters to arguments in the freshly allocated list R_ARGS,
1669 * then evaluate its body in the new environment. */
1670 if (is_builtin(R_PROC)) {
1671 err_context = builtin_name(R_PROC);
1672 r_flag = f_none;
1673 R_RESULT = (builtin_func(R_PROC))(R_ARGS);
1674 /* Builtins cannot call back into the evaluator as that would break
1675 * tail recursion and enable unlimited recursion on the C stack.
1676 * Instead they can set a flag to signal a tail call to a given
1677 * subroutine. */
1678 switch (r_flag) {
1679 case f_none: RETURN(R_RESULT);
1680 case f_compile: goto COMPILE;
1681 case f_apply: goto APPLY;
1682 case f_force: goto FORCE;
1683 case f_call_with_values: goto CALL_WITH_VALUES;
1684 /* optimization, see RETURN_VALUES */
1685 case f_values: goto VALUES;
1686 }
1687 }
1688 err_context = "apply";
1689 if (is_compound_proc(R_PROC)) {
1690 R_OPERANDS = proc_body(R_PROC);
1691 R_CAR = make_lex_frame();
1692 R_CDR = proc_env(R_PROC);
1693 R_ENV = cons();
1694 goto EVAL_BODY;
1695 }
1696 if (is_continuation(R_PROC)) goto APPLY_CONTINUATION;
1697 sc_error1("not a procedure:", R_PROC);
1698
1699APPLY_CONTINUATION:
1700 /* Return the value(s) R_ARGS to the continuation R_PROC, restoring its
1701 * stack and applying any thunks registered to exit the current dynamic
1702 * extent and re-enter the captured one. */
1703 R_WIND_TO = continuation_spool(R_PROC);
1704 if (r_spool != R_WIND_TO) {
1705 R_LCA = lowest_common_ancestor(r_spool, R_WIND_TO);
1706 r_stack = SC_NULL;
1707 PUSH(R_ARGS);
1708 PUSH(R_PROC);
1709 /* Unwind: apply "after" thunks from the current extent up to (but
1710 * not including) the narrowest common extent */
1711 while (r_spool != R_LCA) {
1712 assert(r_spool != SC_NULL);
1713 /* XXX ^ possible to violate if thunk escapes? */
1714 R_PROC = cdr(car(r_spool));
1715 r_spool = cdr(r_spool);
1716 R_ARGS = SC_NULL;
1717 PUSH(R_LCA);
1718 CALL(APPLY, EV_UNWIND_LOOP);
1719 case EV_UNWIND_LOOP:
1720 R_LCA = pop();
1721 }
1722 /* Rewind: apply "before" thunks down to the captured extent
1723 * starting below the common extent */
1724 R_WIND_TO = continuation_spool(peek());
1725 for (r_spool = R_WIND_TO; r_spool != R_LCA; r_spool = cdr(r_spool))
1726 PUSH(r_spool);
1727 while (r_spool != R_WIND_TO) {
1728 R_PROC = car(car(peek()));
1729 R_ARGS = SC_NULL;
1730 PUSH(R_WIND_TO);
1731 CALL(APPLY, EV_REWIND_LOOP);
1732 case EV_REWIND_LOOP:
1733 R_WIND_TO = pop();
1734 r_spool = pop();
1735 }
1736 R_PROC = pop();
1737 R_ARGS = pop();
1738 assert(r_stack == SC_NULL);
1739 }
1740 r_stack = continuation_stack(R_PROC);
1741 VALUES:
1742 if (peek() == EV_CALL_WITH_VALUES) {
1743 drop();
1744 goto CALL_WITH_VALUES_CONT;
1745 }
1746 if (R_ARGS == SC_NULL) sc_error("no value for ordinary continuation");
1747 if (cdr(R_ARGS) != SC_NULL)
1748 sc_error1("multiple values for ordinary continuation:", R_ARGS);
1749 RETURN(car(R_ARGS));
1750
1751EVAL_BODY:
1752 /* Evaluate one or more commands/expressions. (No definitions; we don't
1753 * need to distinguish sequence from body, as internal definitions are
1754 * converted to letrec by the compiler.)
1755 * Paramters: R_OPERANDS R_ENV */
1756 PUSH(R_ENV);
1757 assert(R_OPERANDS != SC_NULL);
1758 for (; cdr(R_OPERANDS) != SC_NULL; R_OPERANDS = cdr(R_OPERANDS)) {
1759 R_EXPR = car(R_OPERANDS);
1760 PUSH(R_OPERANDS);
1761 CALL(EVAL, EV_SEQ_LOOP);
1762 case EV_SEQ_LOOP:
1763 R_OPERANDS = pop();
1764 R_ENV = peek();
1765 }
1766 drop(); /* environment */
1767 R_EXPR = car(R_OPERANDS);
1768 goto EVAL;
1769
1770IF:
1771 /* (if predicate consequent [alternate])
1772 * Parameters: R_OPERANDS R_ENV */
1773 R_EXPR = car(R_OPERANDS); /* predicate */
1774 R_OPERANDS = cdr(R_OPERANDS);
1775 R_CAR = car(R_OPERANDS); /* consequent */
1776 R_OPERANDS = cdr(R_OPERANDS);
1777 push(); /* consequent */
1778 PUSH(R_OPERANDS); /* (alternate) */
1779 PUSH(R_ENV);
1780 CALL(EVAL, EV_IF_PREDICATE);
1781 case EV_IF_PREDICATE:
1782 R_ENV = pop();
1783 if (R_RESULT != SC_FALSE) {
1784 drop(); /* (alternate) */
1785 R_EXPR = pop(); /* consequent */
1786 goto EVAL;
1787 }
1788 R_EXPR = pop(); /* (alternate) */
1789 drop(); /* consequent */
1790 if (R_EXPR != SC_NULL) {
1791 R_EXPR = car(R_EXPR); /* alternate */
1792 goto EVAL;
1793 }
1794 RETURN(SC_NULL);
1795
1796SET:
1797 /* (set! variable value)
1798 * Parameters: R_OPERANDS R_ENV */
1799 err_context = "set!";
1800 R_CAR = car(R_OPERANDS); /* variable name/ref */
1801 R_EXPR = cadr(R_OPERANDS); /* value expression */
1802 push();
1803 PUSH(R_ENV);
1804 CALL(EVAL, EV_SET_RESULT);
1805 case EV_SET_RESULT:
1806 R_ENV = pop();
1807 R_CAR = pop(); /* variable name/ref */
1808 if (is_variable_ref(R_CAR))
1809 variable_ref_set(R_CAR, R_ENV, R_RESULT);
1810 else /* Slow and stupid lookup for bootstrap, as in EVAL */
1811 env_lookup_set(R_CAR, R_ENV, R_RESULT);
1812 RETURN(SC_NULL);
1813
1814LETREC:
1815 /* (letrec ((var init) ...) body)
1816 * Parameters: R_OPERANDS R_ENV */
1817 r2 = R_ARGS = car(R_OPERANDS); /* binding specifiers */
1818 PUSH(cdr(R_OPERANDS)); /* body */
1819 R_CAR = make_letrec_frame(); /* new frame */
1820 k = vector_len(R_CAR);
1821 R_CDR = R_ENV;
1822 R_ENV = cons(); /* new environment */
1823 /* Evaluate initializers in the new environment */
1824 PUSH(R_ENV);
1825 for (; R_ARGS != SC_NULL; R_ARGS = cdr(R_ARGS)) {
1826 k--;
1827 PUSH(k);
1828 PUSH(R_ARGS);
1829 R_EXPR = car(cdr(car(R_ARGS)));
1830 CALL(EVAL, EV_LETREC_LOOP);
1831 case EV_LETREC_LOOP:
1832 R_ARGS = pop();
1833 k = pop();
1834 R_ENV = peek();
1835 vector_set(car(R_ENV), k, R_RESULT);
1836 /* Trick: all variables in a frame are considered UNDEFINED if the
1837 * first one is. (Checking this is cheap due to memoized variable
1838 * refs.) Since we're filling in the frame backwards, to match the
1839 * reversed name list from make_letrec_frame, we catch uses of
1840 * undefined variables in the initializers without needing to store
1841 * their results in a temporary list here and then copy. */
1842 }
1843 drop();
1844 assert(k == 1);
1845 /* Evaluate body in the now populated environment */
1846 R_OPERANDS = pop(); /* body */
1847 goto EVAL_BODY;
1848
1849DEFINE:
1850 /* (define variable value)
1851 * Paramters: R_OPERANDS R_ENV */
1852 if (R_ENV != interaction_env) {
1853 err_context = "define";
1854 sc_error("not allowed in this environment");
1855 }
1856 PUSH(car(R_OPERANDS)); /* variable name */
1857 R_EXPR = car(cdr(R_OPERANDS)); /* value expression */
1858 CALL(EVAL, EV_DEFINE_RESULT);
1859 case EV_DEFINE_RESULT:
1860 /* XXX is this supposed to not handle variable refs? */
1861 R_ENV = interaction_env;
1862 R_CAR = pop(); /* variable name */
1863 R_EXPR = global_frame_lookup(R_CAR, car(R_ENV));
1864 if (R_EXPR == SC_FALSE) {
1865 R_CDR = R_RESULT;
1866 extend_global_env();
1867 }
1868 else set_cdr(R_EXPR, R_RESULT);
1869 RETURN(SC_NULL);
1870
1871DELAY:
1872 /* (delay expr)
1873 * Parameters: R_OPERANDS R_ENV */
1874 R_EXPR = car(R_OPERANDS);
1875 RETURN(promise());
1876
1877FORCE:
1878 /* Parameters: R_EXPR: promise */
1879 if (!is_promise(R_EXPR)) sc_error1("not a promise:", R_EXPR);
1880 if (promise_done(R_EXPR)) RETURN(promise_value(R_EXPR));
1881 PUSH(R_EXPR);
1882 R_ENV = promise_env(R_EXPR);
1883 R_EXPR = promise_value(R_EXPR);
1884 CALL(EVAL, EV_FORCE_RESULT);
1885 case EV_FORCE_RESULT:
1886 R_EXPR = pop();
1887 /* If promise forces itself recursively, keep the first result */
1888 if (promise_done(R_EXPR)) RETURN(promise_value(R_EXPR));
1889 promise_memoize(R_EXPR, R_RESULT);
1890 RETURN(R_RESULT);
1891
1892CALL_WITH_VALUES:
1893 /* Parameters: R_PROC: producer, R_ARGS: consumer */
1894 PUSH(R_ARGS);
1895 R_ARGS = SC_NULL;
1896 CALL(APPLY, EV_CALL_WITH_VALUES);
1897 case EV_CALL_WITH_VALUES:
1898 /* Producer returned a single value normally */
1899 R_CAR = R_RESULT;
1900 R_CDR = SC_NULL;
1901 R_ARGS = cons();
1902 CALL_WITH_VALUES_CONT:
1903 /* Producer returned by calling a continuation */
1904 R_PROC = pop();
1905 goto APPLY;
1906
1907 }
1908}
1909
1910/* Internal error signaller: similar in form to an evaluator subroutine, but
1911 * callable from downstack C functions. */
1912__attribute__((noreturn))
1913void sc_error1(const char *msg, value detail) {
1914 static int in_handler = 0;
1915 const char *sep = ": ";
1916 if (r_error_cont != SC_NULL) {
1917 /* Hook installed by toplevel. As it's a captured continuation,
1918 * unwinding from where the error occurred happens in the usual way. */
1919 R_PROC = r_error_cont;
1920 /* Mirroring toplevel, fall back to the default if an error is
1921 * recursively raised in the handler (or the allocations here). If a
1922 * handler is restored using SET-ERROR-HANDLER!, r_error_cont is
1923 * restored alongside. */
1924 r_error_cont = SC_NULL;
1925 R_CDR = SC_NULL;
1926 if (detail != UNDEFINED) {
1927 R_CAR = detail;
1928 R_CDR = cons();
1929 }
1930 if (err_context) {
1931 value cl = strlen(err_context), sl = strlen(sep), ml = strlen(msg);
1932 uchar *buf = string_buf(R_CAR = make_string_uninit(cl + sl + ml));
1933 memcpy(buf, err_context, cl); buf += cl;
1934 memcpy(buf, sep, sl); buf += sl;
1935 memcpy(buf, msg, ml);
1936 }
1937 else R_CAR = string(msg);
1938 R_CAR = cons();
1939 R_CDR = SC_NULL;
1940 R_ARGS = cons();
1941 longjmp(err_longjmp_env, 1);
1942 }
1943 else if (stdout_port && !in_handler) {
1944 /* Default handler: print and halt */
1945 in_handler = 1; /* fall back to fatal if this too raises an error */
1946 R_PORT = stdout_port;
1947 write_cstr("ERROR [fallback]: ");
1948 if (err_context) {
1949 write_cstr(err_context);
1950 write_cstr(sep);
1951 }
1952 write_cstr(msg);
1953 if (detail != UNDEFINED) {
1954 write_char(' ');
1955 R_EXPR = detail;
1956 shallow_print();
1957 }
1958 newline();
1959 sc_exit(1);
1960 }
1961 else fatal(msg); /* Not initialized, or loop */
1962}
1963
1964
1965/*****************
1966 * Lexical scanner
1967 */
1968
1969/* Initial buffer allocation for token types that need it */
1970#define DEFAULT_LEXBUF_SIZE 32
1971
1972static value lexeme_length;
1973static void lexbuf_init(void) {
1974 lexeme_length = 0;
1975 R_LEXEME = make_string_uninit(DEFAULT_LEXBUF_SIZE);
1976}
1977static void lexbuf_append(uchar c) {
1978 value buf_length = string_len(R_LEXEME);
1979 if (lexeme_length == buf_length) {
1980 value new_length = buf_length * 2;
1981 if (new_length > EXT_LENGTH_MAX) {
1982 new_length = EXT_LENGTH_MAX;
1983 if (lexeme_length == new_length) sc_error("token too long");
1984 }
1985 value new_buf = make_string_uninit(new_length);
1986 memcpy(string_buf(new_buf), string_buf(R_LEXEME), buf_length);
1987 R_LEXEME = new_buf;
1988 }
1989 string_buf(R_LEXEME)[lexeme_length] = c;
1990 lexeme_length++;
1991}
1992static void lexbuf_done(void) { string_truncate(R_LEXEME, lexeme_length); }
1993
1994static int is_letter(int c) {
1995 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1996}
1997static int is_digit(int c) { return (c >= '0' && c <= '9'); }
1998static int in_str(int c, const char *s) {
1999 for (; *s; s++) if (*s == c) return 1;
2000 return 0;
2001}
2002static int is_whitespace(int c) { return in_str(c, " \t\n\f\r"); }
2003static int is_delimiter(int c) { return c == EOF || in_str(c, " \t\n\f\r()\";"); }
2004static int is_special_initial(int c) { return in_str(c, "!$%&*/:<=>?^_~"); }
2005static int is_special_subsequent(int c) { return in_str(c, "+-.@"); }
2006
2007typedef enum {
2008 tok_eof,
2009 tok_literal,
2010 tok_open_paren,
2011 tok_close_paren,
2012 tok_dot,
2013 tok_open_vector,
2014 tok_identifier,
2015 tok_named_char,
2016 tok_abbrev,
2017 tok_number,
2018} token_type;
2019
2020typedef enum {
2021 lex_start,
2022 lex_comment,
2023 lex_sharp,
2024 lex_bool,
2025 lex_comma,
2026 lex_dot,
2027 lex_dot2,
2028 lex_dot3,
2029 lex_ident,
2030 lex_string,
2031 lex_string_escape,
2032 lex_char,
2033 lex_char2,
2034 lex_named_char,
2035 lex_plus,
2036 lex_minus,
2037 lex_number,
2038} lexer_state;
2039
2040/* Finite state machine to read a token from R_PORT. Returns the token type and
2041 * sets R_LEXEME to the value, if applicable: the expanded symbol for the
2042 * quoting abbreviations, and a string for identifiers, named characters, and
2043 * numbers. */
2044
2045static token_type read_token(void) {
2046 lexer_state state = lex_start;
2047 uchar saved_char = 0;
2048 R_LEXEME = SC_NULL;
2049#define TRANSITION(s) { state = s; continue; }
2050#define PUT_BACK put_back_char(c)
2051 for (;;) {
2052 int c;
2053 value cv = read_char(R_PORT);
2054 c = (cv == SC_EOF) ? EOF : char_val(cv);
2055
2056 switch (state) {
2057 case lex_start:
2058 switch (c) {
2059 case EOF: return tok_eof;
2060 case '(': return tok_open_paren;
2061 case ')': return tok_close_paren;
2062 case '\'': R_LEXEME = s_quote; return tok_abbrev;
2063 case '`': R_LEXEME = s_quasiquote; return tok_abbrev;
2064 case '#': TRANSITION(lex_sharp);
2065 case ',': TRANSITION(lex_comma);
2066 case '.': TRANSITION(lex_dot);
2067 case ';': TRANSITION(lex_comment);
2068 case '"': lexbuf_init(); TRANSITION(lex_string);
2069 case '+': TRANSITION(lex_plus);
2070 case '-': TRANSITION(lex_minus);
2071 default:
2072 if (is_whitespace(c)) continue;
2073 lexbuf_init();
2074 if (is_letter(c) || is_special_initial(c)) {
2075 lexbuf_append(lc(c)); TRANSITION(lex_ident);
2076 }
2077 if (is_digit(c)) {
2078 lexbuf_append(c); TRANSITION(lex_number);
2079 }
2080 sc_error1("bad character at start of token:", character(c));
2081 }
2082 case lex_comment:
2083 if (c == '\n') TRANSITION(lex_start);
2084 if (c == EOF) return tok_eof;
2085 continue;
2086 case lex_sharp:
2087 switch (lc(c)) {
2088 case '(': return tok_open_vector;
2089 case 't': R_LEXEME = SC_TRUE; TRANSITION(lex_bool);
2090 case 'f': R_LEXEME = SC_FALSE; TRANSITION(lex_bool);
2091 case 'e':
2092 case 'i':
2093 case 'b':
2094 case 'o':
2095 case 'd':
2096 case 'x': lexbuf_init(); lexbuf_append('#'); lexbuf_append(c);
2097 TRANSITION(lex_number);
2098 case '\\': TRANSITION(lex_char);
2099 default: sc_error("bad # sequence");
2100 }
2101 case lex_bool:
2102 PUT_BACK;
2103 if (!is_delimiter(c)) sc_error("bad # sequence");
2104 return tok_literal;
2105 case lex_comma:
2106 if (c == '@') { R_LEXEME = s_unquote_splicing; return tok_abbrev; }
2107 PUT_BACK; R_LEXEME = s_unquote; return tok_abbrev;
2108 case lex_dot:
2109 if (is_delimiter(c)) { PUT_BACK; return tok_dot; }
2110 if (c == '.') TRANSITION(lex_dot2);
2111 lexbuf_init(); lexbuf_append('.'); lexbuf_append(c);
2112 TRANSITION(lex_number);
2113 case lex_dot2:
2114 if (c != '.') { PUT_BACK; sc_error("bad . sequence"); }
2115 TRANSITION(lex_dot3);
2116 case lex_dot3:
2117 PUT_BACK;
2118 if (is_delimiter(c)) {
2119 R_LEXEME = string("..."); return tok_identifier;
2120 }
2121 sc_error("bad . sequence");
2122 case lex_ident:
2123 if (is_letter(c) || is_special_initial(c) ||
2124 is_digit(c) || is_special_subsequent(c)) {
2125 lexbuf_append(lc(c)); continue;
2126 }
2127 PUT_BACK;
2128 if (is_delimiter(c)) { lexbuf_done(); return tok_identifier; }
2129 sc_error("bad identifier");
2130 case lex_string:
2131 switch (c) {
2132 case EOF: sc_error("unexpected end-of-file in string");
2133 case '"': lexbuf_done(); return tok_literal;
2134 case '\\': TRANSITION(lex_string_escape);
2135 default: lexbuf_append(c); continue;
2136 }
2137 case lex_string_escape:
2138 switch (c) {
2139 case EOF: sc_error("unexpected end-of-file in string");
2140 case '"':
2141 case '\\': lexbuf_append(c); TRANSITION(lex_string);
2142 default: sc_error("bad escape in string");
2143 }
2144 case lex_char:
2145 if (c == EOF) sc_error("unexpected end-of-file in character");
2146 saved_char = c; TRANSITION(lex_char2);
2147 case lex_char2:
2148 if (is_delimiter(c)) {
2149 PUT_BACK; R_LEXEME = character(saved_char); return tok_literal;
2150 }
2151 lexbuf_init(); lexbuf_append(lc(saved_char)); lexbuf_append(lc(c));
2152 TRANSITION(lex_named_char);
2153 case lex_named_char:
2154 if (is_delimiter(c)) {
2155 PUT_BACK;
2156 lexbuf_done();
2157 return tok_named_char;
2158 }
2159 lexbuf_append(lc(c)); continue;
2160 case lex_plus:
2161 if (is_delimiter(c)) {
2162 PUT_BACK; R_LEXEME = string("+"); return tok_identifier;
2163 }
2164 lexbuf_init(); lexbuf_append('+'); lexbuf_append(c);
2165 TRANSITION(lex_number);
2166 case lex_minus:
2167 if (is_delimiter(c)) {
2168 PUT_BACK; R_LEXEME = string("-"); return tok_identifier;
2169 }
2170 lexbuf_init(); lexbuf_append('-'); lexbuf_append(c);
2171 TRANSITION(lex_number);
2172 case lex_number:
2173 if (is_delimiter(c)) {
2174 PUT_BACK;
2175 lexbuf_done();
2176 return tok_number;
2177 }
2178 lexbuf_append(c); continue;
2179 }
2180 }
2181}
2182
2183
2184/******************
2185 * Bootstrap reader
2186 */
2187
2188/* Read a value from R_PORT, using a predictive parser for Scheme's LL(1)
2189 * grammar (report section 7.1.2). The sole purpose is to parse the compiler
2190 * and library code at startup (though this started out as the only reader).
2191 * Does not handle named characters or any numeric syntax beyond plain decimal
2192 * fixnums.
2193 *
2194 * O(n) runtime except for symbols, as interning is currently proportional to
2195 * the symbol table size for each one. Implemented as subroutines calling on
2196 * the Scheme stack, much like the evaluator, so there is no overflow hazard or
2197 * nesting depth limit other than available heap space. */
2198
2199/* Minimal base-10 fixnum decoder */
2200static value str_to_fixnum(value s) {
2201 uchar *p = string_buf(s);
2202 value len = string_len(s), neg = 0, acc = 0;
2203 if (!len) goto err;
2204 if (*p == '-') {
2205 neg = 1; --len; ++p;
2206 if (!len) goto err;
2207 }
2208 for (; len; --len, ++p) {
2209 if (!is_digit(*p)) goto err;
2210 if (acc > FIXNUM_MAX/10) goto err;
2211 acc = 10*acc + (*p - '0');
2212 }
2213 if (acc > FIXNUM_MAX) goto err;
2214 return fixnum(neg ? -acc : acc);
2215err:
2216 sc_error("bad number token");
2217}
2218
2219/* Return addresses */
2220#define RD_DONE 0
2221#define RD_LIST_FIRST 1
2222#define RD_LIST_LOOP 2
2223#define RD_LIST_DOT 3
2224#define RD_ABBREV 4
2225#define RD_VEC_LOOP 5
2226
2227static value sc_read(void) {
2228 token_type t;
2229 CALL(datum, RD_DONE);
2230
2231dispatch:
2232 switch (pop()) {
2233 case RD_DONE:
2234 break;
2235
2236datum:
2237 t = read_token();
2238 switch (t) {
2239 case tok_eof: RETURN(SC_EOF);
2240 case tok_literal: RETURN(R_LEXEME);
2241 case tok_open_paren: goto list;
2242 case tok_close_paren: RETURN(RD_CLOSEPAREN);
2243 case tok_dot: RETURN(RD_DOT);
2244 case tok_open_vector: goto vector;
2245 case tok_identifier: R_CAR = R_LEXEME; RETURN(string_to_symbol());
2246 case tok_named_char: sc_error("named characters unsupported");
2247 case tok_abbrev: goto abbrev;
2248 case tok_number: RETURN(str_to_fixnum(R_LEXEME));
2249 }
2250
2251list:
2252 CALL(datum, RD_LIST_FIRST);
2253 case RD_LIST_FIRST:
2254 if (R_RESULT == RD_CLOSEPAREN) RETURN(SC_NULL);
2255 if (R_RESULT == RD_DOT) sc_error("dotted list without first item");
2256 if (R_RESULT == SC_EOF) sc_error("unexpected end-of-file in list");
2257 R_CAR = R_RESULT;
2258 R_CDR = SC_NULL;
2259 R_CAR = cons();
2260 push(); /* list head */
2261 for (;;) {
2262 push(); /* list tail */
2263 CALL(datum, RD_LIST_LOOP);
2264 case RD_LIST_LOOP:
2265 if (R_RESULT == RD_CLOSEPAREN) {
2266 drop(); /* list tail */
2267 RETURN(pop()); /* list head */
2268 }
2269 if (R_RESULT == RD_DOT) {
2270 CALL(datum, RD_LIST_DOT);
2271 case RD_LIST_DOT:
2272 if (R_RESULT == RD_CLOSEPAREN)
2273 sc_error("dotted list without last item");
2274 if (R_RESULT == RD_DOT) sc_error("extra dot in dotted list");
2275 if (R_RESULT == SC_EOF)
2276 sc_error("unexpected end-of-file in list");
2277 PUSH(R_RESULT)
2278 t = read_token();
2279 R_RESULT = pop();
2280 R_CAR = pop(); /* list tail */
2281 if (t == tok_close_paren) {
2282 set_cdr(R_CAR, R_RESULT);
2283 RETURN(pop()); /* list head */
2284 }
2285 if (t == tok_eof) sc_error("unexpected end-of-file in list");
2286 sc_error("excess item in tail of dotted list");
2287 }
2288 if (R_RESULT == SC_EOF) sc_error("unexpected end-of-file in list");
2289 R_CAR = R_RESULT;
2290 R_CDR = SC_NULL;
2291 R_CAR = cons();
2292 R_CDR = pop(); /* list tail */
2293 set_cdr(R_CDR, R_CAR);
2294 }
2295
2296abbrev: /* 'x -> (quote x) etc. */
2297 PUSH(R_LEXEME) /* expanded abbrev symbol */
2298 CALL(datum, RD_ABBREV);
2299 case RD_ABBREV:
2300 if (R_RESULT == RD_CLOSEPAREN)
2301 sc_error("unexpected close-paren in abbreviation");
2302 if (R_RESULT == RD_DOT)
2303 sc_error("unexpected dot in abbreviation");
2304 if (R_RESULT == SC_EOF)
2305 sc_error("unexpected end-of-file in abbreviation");
2306 R_CAR = R_RESULT;
2307 R_CDR = SC_NULL;
2308 R_CDR = cons();
2309 R_CAR = pop(); /* expanded abbrev symbol */
2310 RETURN(cons());
2311
2312vector:
2313 /* First build a list */
2314 R_CAR = SC_NULL;
2315 for (;;) {
2316 push(); /* list head */
2317 CALL(datum, RD_VEC_LOOP);
2318 case RD_VEC_LOOP:
2319 if (R_RESULT == RD_CLOSEPAREN) {
2320 /* Then copy to a new vector while un-reversing */
2321 R_EXPR = pop(); /* list head */
2322 RETURN(rev_list_to_vec());
2323 }
2324 if (R_RESULT == RD_DOT) sc_error("unexpected dot in vector");
2325 if (R_RESULT == SC_EOF)
2326 sc_error("unexpected end-of-file in vector");
2327 R_CAR = R_RESULT;
2328 R_CDR = pop(); /* list head */
2329 R_CAR = cons();
2330 }
2331
2332 }
2333 if (R_RESULT == RD_CLOSEPAREN) sc_error("unexpected close-paren");
2334 if (R_RESULT == RD_DOT) sc_error("unexpected dot");
2335 return R_RESULT;
2336}
2337
2338
2339/*****************
2340 * Number printers
2341 */
2342
2343static char fmt_buf[128]; /* TODO justify size */
2344static const char *fmt_fixnum_dec(long val) {
2345 int i = sizeof(fmt_buf) - 1, neg = 0;
2346 /* TODO null termination is convenient here but perhaps not ideal */
2347 fmt_buf[i] = 0;
2348 if (val < 0) { neg = 1; val = -val; }
2349 do {
2350 --i; assert(i);
2351 fmt_buf[i] = '0' + (val % 10);
2352 val /= 10;
2353 } while (val);
2354 if (neg) fmt_buf[--i] = '-';
2355 return fmt_buf+i;
2356}
2357static const char *fmt_ulong_dec(ulong val) {
2358 int i = sizeof(fmt_buf) - 1;
2359 fmt_buf[i] = 0;
2360 do {
2361 --i; assert(i >= 0);
2362 fmt_buf[i] = '0' + (val % 10);
2363 val /= 10;
2364 } while (val);
2365 return fmt_buf+i;
2366}
2367static const char *fmt_fixnum_hex(long val) {
2368 int i = sizeof(fmt_buf) - 1, neg = 0;
2369 fmt_buf[i] = 0;
2370 if (val < 0) { neg = 1; val = -val; }
2371 do {
2372 --i; assert(i);
2373 fmt_buf[i] = "0123456789abcdef"[val & 0xf];
2374 val >>= 4;
2375 } while (val);
2376 if (neg) fmt_buf[--i] = '-';
2377 return fmt_buf+i;
2378}
2379static const char *fmt_fixnum_oct(long val) {
2380 int i = sizeof(fmt_buf) - 1, neg = 0;
2381 fmt_buf[i] = 0;
2382 if (val < 0) { neg = 1; val = -val; }
2383 do {
2384 --i; assert(i);
2385 fmt_buf[i] = '0' + (val & 7);
2386 val >>= 3;
2387 } while (val);
2388 if (neg) fmt_buf[--i] = '-';
2389 return fmt_buf+i;
2390}
2391static const char *fmt_fixnum_bin(long val) {
2392 int i = sizeof(fmt_buf) - 1, neg = 0;
2393 fmt_buf[i] = 0;
2394 if (val < 0) { neg = 1; val = -val; }
2395 do {
2396 --i; assert(i);
2397 fmt_buf[i] = '0' + (val & 1);
2398 val >>= 1;
2399 } while (val);
2400 if (neg) fmt_buf[--i] = '-';
2401 return fmt_buf+i;
2402}
2403static const char *fmt_ulong_bin(ulong val) {
2404 int i = sizeof(fmt_buf) - 1;
2405 fmt_buf[i] = 0;
2406 do {
2407 --i; assert(i);
2408 fmt_buf[i] = '0' + (val & 1);
2409 val >>= 1;
2410 } while (val);
2411 return fmt_buf+i;
2412}
2413static const char *fmt_flonum_dec(double val) {
2414 /* TODO follow up on R5RS citations 3 and 5 */
2415 if ((size_t)snprintf(fmt_buf, sizeof fmt_buf, "%.15g", val) >=
2416 sizeof fmt_buf)
2417 sc_error("BUG: flonum formatting truncated");
2418 return fmt_buf;
2419}
2420
2421/****************************
2422 * Fallback (shallow) printer
2423 */
2424
2425/* Print the value in R_EXPR to R_PORT, using "write" style (quoting strings
2426 * and characters) but not expanding named characters or looking inside
2427 * compound objects. (This used to be the real printer, implemented as
2428 * recursive subroutines on the Scheme stack like the reader and evaluator, but
2429 * is now just for low-level debug and fallback error handlers.) */
2430
2431static void shallow_print(void) {
2432 int t = tag(R_EXPR);
2433 if (t == T_SPECIAL) {
2434 const char *s;
2435 if (R_EXPR == SC_NULL) s = "()";
2436 else if (R_EXPR == SC_TRUE) s = "#t";
2437 else if (R_EXPR == SC_FALSE) s = "#f";
2438 else if (R_EXPR == SC_EOF) s = "#EOF";
2439 else if (R_EXPR == SC_NULL_ENV) s = "#ENVSPEC:NULL";
2440 else if (R_EXPR == SC_REPORT_ENV) s = "#ENVSPEC:SCHEME-REPORT";
2441 else if (R_EXPR == SC_GSCM_ENV) s = "#ENVSPEC:GALES-SCHEME";
2442 else if (R_EXPR == SC_INTERACT_ENV) s = "#ENVSPEC:INTERACTION";
2443 else if (R_EXPR == SC_TOPLEVEL_ENV) s = "#ENVSPEC:TOPLEVEL";
2444 else if (R_EXPR == UNDEFINED) s = "#UNDEFINED";
2445 else if (R_EXPR == RD_CLOSEPAREN) s = "#RDSENTINEL:CLOSEPAREN";
2446 else if (R_EXPR == RD_DOT) s = "#RDSENTINEL:DOT";
2447 else fatal("BUG: invalid special in shallow_print");
2448 write_cstr(s);
2449 }
2450 else if (t == T_IMMUT_PAIR) write_cstr("#IMMUTABLE-PAIR");
2451 else if (t == T_PAIR) write_cstr("#PAIR");
2452 else if (t == T_CHARACTER) { write_cstr("#\\"); write_char(R_EXPR); }
2453 else if (t == T_FIXNUM) write_cstr(fmt_fixnum_dec(fixnum_val(R_EXPR)));
2454 else if (t == T_EXTENDED) {
2455 t = ext_tag(heap[untag(R_EXPR)]);
2456 if ((t | 1) == T_STRING) write_str_quoted(R_EXPR);
2457 else if ((t | 1) == T_VECTOR) {
2458 if (t == T_VECTOR) write_cstr("#VECTOR:");
2459 else write_cstr("#IMMUTABLE-VECTOR:");
2460 write_cstr(fmt_fixnum_dec(vector_len(R_EXPR)));
2461 }
2462 else if (t == T_SYMBOL) write_str(R_EXPR);
2463 else if (t == T_BUILTIN) {
2464 write_cstr("#BUILTIN:");
2465 write_cstr(builtin_name(R_EXPR));
2466 }
2467 else if (t == T_PROCEDURE) write_cstr("#PROCEDURE");
2468 else if (t == T_CONTINUATION) write_cstr("#CONTINUATION");
2469 else if (t == T_PROMISE) write_cstr("#PROMISE");
2470 else if (t == T_PORT) write_cstr("#PORT");
2471 else if (t == T_FLONUM) write_cstr("#FLONUM");
2472 else if (t == T_BIGNUM) write_cstr("#BIGNUM");
2473 else if (t == T_RATIONAL) write_cstr("#RATIONAL");
2474 else if (t == T_COMPLEX) write_cstr("#COMPLEX");
2475 else if (t == T_VARIABLE_REF) write_cstr("#VARIABLE-REF");
2476 else fatal("BUG: invalid extended tag in shallow_print");
2477 }
2478 else fatal("BUG: invalid tag in shallow_print");
2479}
2480
2481
2482/********************
2483 * Builtin procedures
2484 */
2485
2486/* Argument wrangling helpers for builtins */
2487
2488static void require_args(value args) {
2489 if (args == SC_NULL) sc_error("too few arguments");
2490}
2491
2492static void no_args(value args) {
2493 if (args != SC_NULL) sc_error("too many arguments");
2494}
2495
2496static value extract_arg(value *args) {
2497 require_args(*args);
2498 value arg = car(*args);
2499 *args = cdr(*args);
2500 return arg;
2501}
2502
2503static value final_arg(value args) {
2504 require_args(args);
2505 no_args(cdr(args));
2506 return car(args);
2507}
2508
2509static value require_input_port(value arg) {
2510 if (!is_input_port(arg)) sc_error("not an input port"); return arg;
2511}
2512
2513static value require_output_port(value arg) {
2514 if (!is_output_port(arg)) sc_error("not an output port"); return arg;
2515}
2516
2517static value opt_final_in_port_arg(value args) {
2518 return require_input_port(args == SC_NULL ? r_input_port :
2519 final_arg(args));
2520}
2521
2522static value opt_final_out_port_arg(value args) {
2523 return require_output_port(args == SC_NULL ? r_output_port :
2524 final_arg(args));
2525}
2526
2527static value require_symbol(value arg) {
2528 if (!is_symbol(arg)) sc_error1("not a symbol:", arg);
2529 return arg;
2530}
2531
2532static value require_string(value arg) {
2533 if (!is_string(arg)) sc_error1("not a string:", arg);
2534 return arg;
2535}
2536
2537static value require_mutable_string(value arg) {
2538 if (!is_mutable_string(arg)) {
2539 if (is_string(arg)) sc_error1("immutable string:", arg);
2540 sc_error1("not a string:", arg);
2541 }
2542 return arg;
2543}
2544
2545static value require_stringlike(value arg) {
2546 if (!(is_string(arg) || is_symbol(arg)))
2547 sc_error1("not a string or symbol:", arg);
2548 return arg;
2549}
2550
2551static value require_vector(value arg) {
2552 if (!is_vector(arg)) sc_error1("not a vector:", arg);
2553 return arg;
2554}
2555
2556static value require_mutable_vector(value arg) {
2557 if (!is_mutable_vector(arg)) {
2558 if (is_vector(arg)) sc_error1("immutable vector:", arg);
2559 sc_error1("not a vector:", arg);
2560 }
2561 return arg;
2562}
2563
2564static value require_fixnum(value arg) {
2565 if (!is_fixnum(arg)) sc_error1("not a fixnum:", arg);
2566 return arg;
2567}
2568
2569static value require_procedure(value arg) {
2570 if (!is_procedure(arg)) sc_error1("not a procedure:", arg);
2571 return arg;
2572}
2573
2574#define BUILTIN(name) static value name(value args)
2575
2576/* Mnemonic for multi-valued returns, i.e. passing multiple values to the
2577 * current continuation. f_values is strictly an optimization; we could just as
2578 * well set R_PROC to current_continuation() and r_flag to f_apply.
2579 * The arg list must be newly allocated! */
2580#define RETURN_VALUES(args) { \
2581 R_ARGS = args; \
2582 r_flag = f_values; \
2583 return SC_NULL; \
2584}
2585
2586/* 6.1 Equivalence predicates */
2587
2588BUILTIN(builtin_is_eq) {
2589 value a = extract_arg(&args);
2590 return boolean(a == final_arg(args));
2591}
2592
2593/* 6.2.5 Numerical operations */
2594
2595BUILTIN(builtin_is_number) { return boolean(is_number(final_arg(args))); }
2596BUILTIN(builtin_is_integer) { return boolean(is_integer(final_arg(args))); }
2597BUILTIN(builtin_is_exact) { return boolean(is_exact(final_arg(args))); }
2598BUILTIN(builtin_is_inexact) { return boolean(is_flonum(final_arg(args))); }
2599
2600/* 6.3.1 Booleans */
2601
2602BUILTIN(builtin_not) { return boolean(final_arg(args) == SC_FALSE); }
2603BUILTIN(builtin_is_boolean) { return boolean(is_boolean(final_arg(args))); }
2604
2605/* 6.3.2 Pairs and lists */
2606
2607BUILTIN(builtin_is_pair) { return boolean(is_pair(final_arg(args))); }
2608BUILTIN(builtin_cons) {
2609 R_CAR = extract_arg(&args);
2610 R_CDR = final_arg(args);
2611 return cons();
2612}
2613
2614BUILTIN(builtin_car) { return safe_car(final_arg(args)); }
2615BUILTIN(builtin_cdr) { return safe_cdr(final_arg(args)); }
2616
2617BUILTIN(builtin_caar) { return safe_car(builtin_car(args)); }
2618BUILTIN(builtin_cadr) { return safe_car(builtin_cdr(args)); }
2619BUILTIN(builtin_cdar) { return safe_cdr(builtin_car(args)); }
2620BUILTIN(builtin_cddr) { return safe_cdr(builtin_cdr(args)); }
2621
2622BUILTIN(builtin_caaar) { return safe_car(builtin_caar(args)); }
2623BUILTIN(builtin_caadr) { return safe_car(builtin_cadr(args)); }
2624BUILTIN(builtin_cadar) { return safe_car(builtin_cdar(args)); }
2625BUILTIN(builtin_caddr) { return safe_car(builtin_cddr(args)); }
2626BUILTIN(builtin_cdaar) { return safe_cdr(builtin_caar(args)); }
2627BUILTIN(builtin_cdadr) { return safe_cdr(builtin_cadr(args)); }
2628BUILTIN(builtin_cddar) { return safe_cdr(builtin_cdar(args)); }
2629BUILTIN(builtin_cdddr) { return safe_cdr(builtin_cddr(args)); }
2630
2631BUILTIN(builtin_caaaar) { return safe_car(builtin_caaar(args)); }
2632BUILTIN(builtin_caaadr) { return safe_car(builtin_caadr(args)); }
2633BUILTIN(builtin_caadar) { return safe_car(builtin_cadar(args)); }
2634BUILTIN(builtin_caaddr) { return safe_car(builtin_caddr(args)); }
2635BUILTIN(builtin_cadaar) { return safe_car(builtin_cdaar(args)); }
2636BUILTIN(builtin_cadadr) { return safe_car(builtin_cdadr(args)); }
2637BUILTIN(builtin_caddar) { return safe_car(builtin_cddar(args)); }
2638BUILTIN(builtin_cadddr) { return safe_car(builtin_cdddr(args)); }
2639BUILTIN(builtin_cdaaar) { return safe_cdr(builtin_caaar(args)); }
2640BUILTIN(builtin_cdaadr) { return safe_cdr(builtin_caadr(args)); }
2641BUILTIN(builtin_cdadar) { return safe_cdr(builtin_cadar(args)); }
2642BUILTIN(builtin_cdaddr) { return safe_cdr(builtin_caddr(args)); }
2643BUILTIN(builtin_cddaar) { return safe_cdr(builtin_cdaar(args)); }
2644BUILTIN(builtin_cddadr) { return safe_cdr(builtin_cdadr(args)); }
2645BUILTIN(builtin_cdddar) { return safe_cdr(builtin_cddar(args)); }
2646BUILTIN(builtin_cddddr) { return safe_cdr(builtin_cdddr(args)); }
2647
2648BUILTIN(builtin_set_car) {
2649 value p = extract_arg(&args);
2650 value val = final_arg(args);
2651 if (tag(p) != T_PAIR) {
2652 if (tag(p) == T_IMMUT_PAIR) sc_error("immutable pair");
2653 sc_error("not a pair");
2654 }
2655 set_car(p, val);
2656 return SC_NULL;
2657}
2658BUILTIN(builtin_set_cdr) {
2659 value p = extract_arg(&args);
2660 value val = final_arg(args);
2661 if (tag(p) != T_PAIR) {
2662 if (tag(p) == T_IMMUT_PAIR) sc_error("immutable pair");
2663 sc_error("not a pair");
2664 }
2665 set_cdr(p, val);
2666 return SC_NULL;
2667}
2668
2669BUILTIN(builtin_is_null) { return boolean(final_arg(args) == SC_NULL); }
2670BUILTIN(builtin_is_list) { return boolean(is_list(final_arg(args))); }
2671
2672BUILTIN(builtin_length) {
2673 long len = safe_list_length(final_arg(args));
2674 if (len < 0) sc_error("not a list");
2675 return fixnum(len);
2676}
2677
2678/* 6.3.3 Symbols */
2679
2680BUILTIN(builtin_is_symbol) { return boolean(is_symbol(final_arg(args))); }
2681
2682BUILTIN(builtin_sym_to_str) {
2683 /* TODO use immutability to avoid copying */
2684 R_EXPR = require_symbol(final_arg(args));
2685 return string_copy_immutable();
2686}
2687
2688BUILTIN(builtin_str_to_sym) {
2689 R_CAR = require_string(final_arg(args));
2690 return string_to_symbol();
2691}
2692
2693/* 6.3.4 Characters */
2694
2695BUILTIN(builtin_is_char) { return boolean(is_character(final_arg(args))); }
2696
2697#define CHAR1 uchar a = safe_char_val(final_arg(args));
2698#define CHAR2 uchar a = safe_char_val(extract_arg(&args)); \
2699 uchar b = safe_char_val(final_arg(args));
2700
2701BUILTIN(builtin_char_eq) { CHAR2 return boolean(a == b); }
2702BUILTIN(builtin_char_lt) { CHAR2 return boolean(a < b); }
2703BUILTIN(builtin_char_gt) { CHAR2 return boolean(a > b); }
2704BUILTIN(builtin_char_le) { CHAR2 return boolean(a <= b); }
2705BUILTIN(builtin_char_ge) { CHAR2 return boolean(a >= b); }
2706BUILTIN(builtin_char_ci_eq) { CHAR2 return boolean(lc(a) == lc(b)); }
2707BUILTIN(builtin_char_ci_lt) { CHAR2 return boolean(lc(a) < lc(b)); }
2708BUILTIN(builtin_char_ci_gt) { CHAR2 return boolean(lc(a) > lc(b)); }
2709BUILTIN(builtin_char_ci_le) { CHAR2 return boolean(lc(a) <= lc(b)); }
2710BUILTIN(builtin_char_ci_ge) { CHAR2 return boolean(lc(a) >= lc(b)); }
2711
2712BUILTIN(builtin_char_is_alpha) {
2713 CHAR1 return boolean((a >= 'A' && a <= 'Z') || (a >= 'a' && a <= 'z'));
2714}
2715BUILTIN(builtin_char_is_num) {
2716 CHAR1 return boolean(a >= '0' && a <= '9');
2717}
2718BUILTIN(builtin_char_is_white) { CHAR1 return boolean(is_whitespace(a)); }
2719BUILTIN(builtin_char_is_upper) { CHAR1 return boolean(a >= 'A' && a <= 'Z'); }
2720BUILTIN(builtin_char_is_lower) { CHAR1 return boolean(a >= 'a' && a <= 'z'); }
2721
2722BUILTIN(builtin_char_to_int) { CHAR1 return fixnum(a); }
2723
2724BUILTIN(builtin_int_to_char) {
2725 long n = safe_fixnum_val(final_arg(args));
2726 if (n < 0 || n > 255) sc_error1("out of bounds:", fixnum(n));
2727 return character(n);
2728}
2729
2730BUILTIN(builtin_char_upcase) { CHAR1 return character(uc(a)); }
2731BUILTIN(builtin_char_downcase) { CHAR1 return character(lc(a)); }
2732
2733/* 6.3.5 Strings */
2734
2735BUILTIN(builtin_is_str) { return boolean(is_string(final_arg(args))); }
2736
2737BUILTIN(builtin_make_str) {
2738 long len = safe_fixnum_val(extract_arg(&args));
2739 uchar fill = (args == SC_NULL) ? ' ' : safe_char_val(final_arg(args));
2740 return make_string(len, fill);
2741}
2742
2743BUILTIN(builtin_str_length) {
2744 return fixnum(string_len(require_string(final_arg(args))));
2745}
2746
2747BUILTIN(builtin_str_ref) {
2748 value s = require_string(extract_arg(&args));
2749 value k = final_arg(args);
2750 value k_unsigned = safe_fixnum_val(k);
2751 /* see builtin_vec_ref comments */
2752 if (k_unsigned >= string_len(s)) sc_error1("out of bounds:", k);
2753 return character(string_buf(s)[k_unsigned]);
2754}
2755
2756BUILTIN(builtin_str_set) {
2757 value s = require_mutable_string(extract_arg(&args));
2758 value k = extract_arg(&args);
2759 uchar new_char = safe_char_val(final_arg(args));
2760 value k_unsigned = safe_fixnum_val(k);
2761 /* see builtin_vec_ref comments */
2762 if (k_unsigned >= string_len(s)) sc_error1("out of bounds:", k);
2763 string_buf(s)[k_unsigned] = new_char;
2764 return SC_NULL;
2765}
2766
2767#define STR2 value a = require_string(extract_arg(&args)); \
2768 value b = require_string(final_arg(args)); \
2769 size_t a_len = string_len(a), b_len = string_len(b); \
2770 uchar *a_buf = string_buf(a), *b_buf = string_buf(b);
2771
2772BUILTIN(builtin_str_eq) {
2773 STR2
2774 if (a_len != b_len) return SC_FALSE;
2775 return boolean(memcmp(a_buf, b_buf, a_len) == 0);
2776}
2777
2778#define STRCMP \
2779 STR2 int cmp = memcmp(a_buf, b_buf, (a_len < b_len) ? a_len : b_len);
2780
2781BUILTIN(builtin_str_lt) {
2782 STRCMP return boolean(cmp < 0 || (cmp == 0 && a_len < b_len));
2783}
2784BUILTIN(builtin_str_gt) {
2785 STRCMP return boolean(cmp > 0 || (cmp == 0 && a_len > b_len));
2786}
2787BUILTIN(builtin_str_le) {
2788 STRCMP return boolean(cmp < 0 || (cmp == 0 && a_len <= b_len));
2789}
2790BUILTIN(builtin_str_ge) {
2791 STRCMP return boolean(cmp > 0 || (cmp == 0 && a_len >= b_len));
2792}
2793
2794static int memcmp_ci(const void *s1, const void *s2, size_t n) {
2795 const uchar *b1 = s1, *b2 = s2;
2796 uchar c1, c2;
2797 size_t i;
2798 for (i = 0; i < n; i++) {
2799 c1 = lc(b1[i]);
2800 c2 = lc(b2[i]);
2801 if (c1 < c2) return -1;
2802 if (c1 > c2) return 1;
2803 }
2804 return 0;
2805}
2806
2807BUILTIN(builtin_str_ci_eq) {
2808 STR2
2809 if (a_len != b_len) return SC_FALSE;
2810 return boolean(memcmp_ci(a_buf, b_buf, a_len) == 0);
2811}
2812
2813#define STRCMP_CI STR2 \
2814 int cmp = memcmp_ci(a_buf, b_buf, (a_len < b_len) ? a_len : b_len);
2815
2816BUILTIN(builtin_str_ci_lt) {
2817 STRCMP_CI return boolean(cmp < 0 || (cmp == 0 && a_len < b_len));
2818}
2819BUILTIN(builtin_str_ci_gt) {
2820 STRCMP_CI return boolean(cmp > 0 || (cmp == 0 && a_len > b_len));
2821}
2822BUILTIN(builtin_str_ci_le) {
2823 STRCMP_CI return boolean(cmp < 0 || (cmp == 0 && a_len <= b_len));
2824}
2825BUILTIN(builtin_str_ci_ge) {
2826 STRCMP_CI return boolean(cmp > 0 || (cmp == 0 && a_len >= b_len));
2827}
2828
2829BUILTIN(builtin_substr) {
2830 value len = string_len(R_EXPR = require_string(extract_arg(&args))),
2831 start = extract_arg(&args), end = final_arg(args),
2832 start_unsigned = safe_fixnum_val(start),
2833 end_unsigned = safe_fixnum_val(end);
2834 if (start_unsigned > len) sc_error1("start out of bounds:", start);
2835 if (end_unsigned > len) sc_error1("end out of bounds:", end);
2836 if (end_unsigned < start_unsigned) sc_error("end less than start");
2837 len = end_unsigned - start_unsigned;
2838 R_RESULT = make_string_uninit(len);
2839 memcpy(string_buf(R_RESULT), string_buf(R_EXPR)+start_unsigned, len);
2840 return R_RESULT;
2841}
2842
2843BUILTIN(builtin_str_append) {
2844 value p, s, len = 0;
2845 uchar *buf;
2846 R_ARGS = args;
2847 for (p = R_ARGS; p != SC_NULL; p = cdr(p)) {
2848 len += string_len(require_string(car(p)));
2849 if (len > EXT_LENGTH_MAX) sc_error("length too large for string");
2850 }
2851 R_RESULT = make_string_uninit(len);
2852 buf = string_buf(R_RESULT);
2853 for (p = R_ARGS; p != SC_NULL; p = cdr(p)) {
2854 s = car(p);
2855 len = string_len(s);
2856 memcpy(buf, string_buf(s), len);
2857 buf += len;
2858 }
2859 return R_RESULT;
2860}
2861
2862BUILTIN(builtin_list_to_str) {
2863 long len, i;
2864 value s;
2865 uchar *buf;
2866 R_ARGS = final_arg(args);
2867 len = safe_list_length(R_ARGS);
2868 if (len < 0) sc_error("not a list");
2869 s = make_string_uninit(len);
2870 buf = string_buf(s);
2871 for (i = 0; i < len; i++) {
2872 buf[i] = safe_char_val(car(R_ARGS));
2873 R_ARGS = cdr(R_ARGS);
2874 }
2875 return s;
2876}
2877
2878BUILTIN(builtin_str_copy) {
2879 R_EXPR = require_string(final_arg(args));
2880 return string_copy();
2881}
2882
2883BUILTIN(builtin_str_fill) {
2884 value s = require_mutable_string(extract_arg(&args));
2885 memset(string_buf(s), safe_char_val(final_arg(args)), string_len(s));
2886 return SC_NULL;
2887}
2888
2889/* 6.3.6 Vectors */
2890
2891BUILTIN(builtin_is_vector) { return boolean(is_vector(final_arg(args))); }
2892
2893BUILTIN(builtin_make_vector) {
2894 long len = safe_fixnum_val(extract_arg(&args));
2895 R_EXPR = (args == SC_NULL) ? SC_NULL : final_arg(args);
2896 return make_vector(len);
2897}
2898
2899BUILTIN(builtin_vec_length) {
2900 value vec = require_vector(final_arg(args));
2901 return fixnum(vector_len(vec));
2902}
2903
2904BUILTIN(builtin_vec_ref) {
2905 value vec = require_vector(extract_arg(&args));
2906 value k = final_arg(args);
2907 value k_unsigned = safe_fixnum_val(k);
2908 if (k_unsigned >= vector_len(vec)) sc_error1("out of bounds:", k);
2909 /* We don't need to also check for negative k: as value is an unsigned
2910 * type, the assignment from long causes a negative to be seen as a
2911 * positive greater than the longest allowed vector length.
2912 * XXX: are there weird machines where this isn't true? */
2913 return vector_ref(vec, k_unsigned);
2914}
2915
2916BUILTIN(builtin_vec_set) {
2917 value vec = require_mutable_vector(extract_arg(&args));
2918 value k = extract_arg(&args);
2919 value obj = final_arg(args);
2920 value k_unsigned = safe_fixnum_val(k);
2921 if (k_unsigned >= vector_len(vec)) sc_error1("out of bounds:", k);
2922 vector_set(vec, k_unsigned, obj);
2923 return SC_NULL;
2924}
2925
2926BUILTIN(builtin_list_to_vec) {
2927 long len;
2928 value vec, *p;
2929 R_ARGS = final_arg(args);
2930 len = safe_list_length(R_ARGS);
2931 if (len < 0) sc_error("not a list");
2932 vec = make_vector_uninit(len);
2933 p = heap + untag(vec) + 1;
2934 for (; len; --len, ++p, R_ARGS = cdr(R_ARGS)) *p = car(R_ARGS);
2935 return vec;
2936}
2937
2938BUILTIN(builtin_vec_fill) {
2939 value vec = require_mutable_vector(extract_arg(&args));
2940 value fill = final_arg(args);
2941 value len = vector_len(vec), i;
2942 for (i = 0; i < len; i++) vector_set(vec, i, fill);
2943 return SC_NULL;
2944}
2945
2946/* 6.4 Control features */
2947
2948BUILTIN(builtin_is_procedure) { return boolean(is_procedure(final_arg(args))); }
2949
2950BUILTIN(builtin_force) {
2951 R_EXPR = final_arg(args);
2952 r_flag = f_force;
2953 return SC_NULL;
2954}
2955
2956BUILTIN(builtin_call_cc) {
2957 R_PROC = require_procedure(final_arg(args));
2958 R_CAR = current_continuation();
2959 R_CDR = SC_NULL;
2960 R_ARGS = cons();
2961 r_flag = f_apply;
2962 return SC_NULL;
2963}
2964
2965BUILTIN(builtin_values) RETURN_VALUES(args)
2966
2967BUILTIN(builtin_call_with_values) {
2968 R_PROC = extract_arg(&args);
2969 R_ARGS = final_arg(args);
2970 r_flag = f_call_with_values;
2971 return SC_NULL;
2972}
2973
2974/* 6.5 Eval */
2975
2976BUILTIN(builtin_eval) {
2977 R_EXPR = extract_arg(&args);
2978 value e = final_arg(args);
2979 switch (e) {
2980 case SC_NULL_ENV:
2981 R_ENV = SC_NULL; break;
2982 case SC_REPORT_ENV:
2983 R_ENV = r5rs_env; break;
2984 case SC_GSCM_ENV:
2985 R_ENV = gscm_env; break;
2986 case SC_INTERACT_ENV:
2987 R_ENV = interaction_env; break;
2988 case SC_TOPLEVEL_ENV:
2989 R_ENV = toplevel_env; break;
2990 default:
2991 sc_error1("not an environment specifier:", e);
2992 }
2993 r_flag = f_compile;
2994 return SC_NULL;
2995}
2996
2997BUILTIN(builtin_report_env) {
2998 if (safe_fixnum_val(final_arg(args)) != 5)
2999 sc_error("unsupported version");
3000 return SC_REPORT_ENV;
3001}
3002BUILTIN(builtin_null_env) {
3003 if (safe_fixnum_val(final_arg(args)) != 5)
3004 sc_error("unsupported version");
3005 return SC_NULL_ENV;
3006}
3007BUILTIN(builtin_interaction_env) {
3008 no_args(args);
3009 return SC_INTERACT_ENV;
3010}
3011
3012/* 6.6.1 Ports */
3013
3014BUILTIN(builtin_is_port) {
3015 return boolean(is_port(final_arg(args)));
3016}
3017BUILTIN(builtin_is_in_port) {
3018 return boolean(is_input_port(final_arg(args)));
3019}
3020BUILTIN(builtin_is_out_port) {
3021 return boolean(is_output_port(final_arg(args)));
3022}
3023
3024BUILTIN(builtin_current_in_port) { no_args(args); return r_input_port; }
3025BUILTIN(builtin_current_out_port) { no_args(args); return r_output_port; }
3026
3027BUILTIN(builtin_open_in_file) {
3028 int fd;
3029 R_EXPR = require_string(final_arg(args));
3030 fd = open_cloexec(c_string_buf(string_append_null()), O_RDONLY);
3031 if (fd == -1) sc_perror1(R_EXPR);
3032 return make_port(fd, 0, DEFAULT_R_BUF);
3033}
3034
3035BUILTIN(builtin_open_out_file) {
3036 int fd, flags = O_WRONLY | O_CREAT;
3037 value if_exists;
3038 R_EXPR = require_string(extract_arg(&args));
3039 if (args == SC_NULL) if_exists = s_truncate;
3040 else if_exists = final_arg(args);
3041
3042 if (if_exists == s_truncate) flags |= O_TRUNC;
3043 else if (if_exists == s_overwrite) ;
3044 else if (if_exists == s_append) flags |= O_APPEND;
3045 else sc_error("invalid if-exists option");
3046
3047 fd = open_cloexec(c_string_buf(string_append_null()), flags);
3048 if (fd == -1) sc_perror1(R_EXPR);
3049 return make_port(fd, 1, DEFAULT_W_BUF);
3050}
3051
3052BUILTIN(builtin_close_in_port) {
3053 close_port(require_input_port(final_arg(args)));
3054 return SC_NULL;
3055}
3056
3057BUILTIN(builtin_close_out_port) {
3058 close_port(require_output_port(final_arg(args)));
3059 return SC_NULL;
3060}
3061
3062/* 6.6.2 Input */
3063
3064BUILTIN(builtin_read_char) { return read_char(opt_final_in_port_arg(args)); }
3065
3066BUILTIN(builtin_peek_char) { return peek_char(opt_final_in_port_arg(args)); }
3067
3068BUILTIN(builtin_is_eof) { return boolean(final_arg(args) == SC_EOF); }
3069
3070BUILTIN(builtin_is_char_ready) {
3071 return input_port_ready(opt_final_in_port_arg(args));
3072}
3073
3074/* 6.6.3 Output */
3075
3076BUILTIN(builtin_write_char) {
3077 uchar c = safe_char_val(extract_arg(&args));
3078 R_PORT = opt_final_out_port_arg(args);
3079 write_char(c);
3080 return SC_NULL;
3081}
3082
3083/* Gales Scheme extensions */
3084
3085BUILTIN(builtin_gscm_env) { no_args(args); return SC_GSCM_ENV; }
3086
3087BUILTIN(builtin_is_immutable) { return boolean(!is_mutable(final_arg(args))); }
3088
3089BUILTIN(builtin_cons_immutable) {
3090 R_CAR = extract_arg(&args);
3091 R_CDR = final_arg(args);
3092 return cons_immutable();
3093}
3094
3095BUILTIN(builtin_str_copy_immutable) {
3096 R_EXPR = require_string(final_arg(args));
3097 return string_copy_immutable();
3098}
3099
3100BUILTIN(builtin_vec_copy_immutable) {
3101 value len;
3102 R_EXPR = require_vector(final_arg(args));
3103 len = vector_len(R_EXPR);
3104 R_RESULT = make_immutable_vector(len);
3105 memcpy(heap+untag(R_RESULT)+1, heap+untag(R_EXPR)+1, len*sizeof(value));
3106 return R_RESULT;
3107}
3108
3109BUILTIN(builtin_flush_out_port) {
3110 value port = require_output_port(args == SC_NULL ? r_output_port :
3111 extract_arg(&args)), *p = heap+untag(port);
3112 int fd = fixnum_val(p[PORT_FD]);
3113 if (fd == -1) sc_error("output port closed");
3114 flush_if_needed(port);
3115 if (args != SC_NULL) {
3116 value opt = final_arg(args);
3117 if (opt == s_sync) { if (fsync(fd)) goto sync_err; }
3118 else if (opt == s_data_sync) { if (fdatasync(fd)) goto sync_err; }
3119 else sc_error1("invalid option:", opt);
3120 }
3121 return SC_NULL;
3122sync_err:
3123 if (errno == EINVAL) sc_error("synchronization not possible");
3124 else {
3125 /* As in flush_output_port: no good way to recover from output errors,
3126 * but the kernel won't necessarily continue returning errors, so close
3127 * the port. In practice, the mistake of retrying a failed fsync has
3128 * caused data loss in PostgreSQL (broken durability guarantee). */
3129 int saved = errno; set_port_closed(p); errno = saved;
3130 sc_perror();
3131 }
3132}
3133
3134BUILTIN(builtin_gc) {
3135 no_args(args);
3136 sc_gc();
3137 return fixnum(free_ptr);
3138}
3139
3140BUILTIN(builtin_is_fixnum) { return boolean(is_fixnum(final_arg(args))); }
3141
3142BUILTIN(builtin_fx_eq) {
3143 value a = require_fixnum(extract_arg(&args));
3144 return boolean(a == require_fixnum(final_arg(args)));
3145}
3146
3147BUILTIN(builtin_fx_lt) {
3148 long a = safe_fixnum_val(extract_arg(&args));
3149 return boolean(a < safe_fixnum_val(final_arg(args)));
3150}
3151
3152BUILTIN(builtin_fx_le) {
3153 long a = safe_fixnum_val(extract_arg(&args));
3154 return boolean(a <= safe_fixnum_val(final_arg(args)));
3155}
3156
3157BUILTIN(builtin_fx_lt_unsigned) {
3158 value a = require_fixnum(extract_arg(&args));
3159 return boolean(a < require_fixnum(final_arg(args)));
3160}
3161
3162BUILTIN(builtin_fx_le_unsigned) {
3163 value a = require_fixnum(extract_arg(&args));
3164 return boolean(a <= require_fixnum(final_arg(args)));
3165}
3166
3167/* inputs left tagged: valid for wrapping and bitwise ops */
3168#define FXFOLD(op, init) { \
3169 ulong acc = init; \
3170 for (; args != SC_NULL; args = cdr(args)) \
3171 acc = acc op require_fixnum(car(args)); \
3172 return fixnum(acc); \
3173}
3174
3175BUILTIN(builtin_fx_add_wrap) FXFOLD(+, 0)
3176
3177BUILTIN(builtin_fx_add_carry) {
3178 long acc = untag_signed(require_fixnum(extract_arg(&args)));
3179 acc += untag_signed(require_fixnum(extract_arg(&args)));
3180 if (args != SC_NULL) acc += untag_signed(require_fixnum(final_arg(args)));
3181 R_CDR = SC_NULL; R_CAR = fixnum(acc >> VAL_BITS); /* high word (carry) */
3182 R_CDR = cons(); R_CAR = fixnum(acc); /* low word */
3183 RETURN_VALUES(cons());
3184}
3185
3186BUILTIN(builtin_fx_add_carry_unsigned) {
3187 ulong acc = untag(require_fixnum(extract_arg(&args)));
3188 acc += untag(require_fixnum(extract_arg(&args)));
3189 if (args != SC_NULL) acc += untag(require_fixnum(final_arg(args)));
3190 R_CDR = SC_NULL; R_CAR = fixnum(acc >> VAL_BITS); /* high word (carry) */
3191 R_CDR = cons(); R_CAR = fixnum(acc); /* low word */
3192 RETURN_VALUES(cons());
3193}
3194
3195BUILTIN(builtin_fx_sub_wrap) {
3196 ulong acc = require_fixnum(extract_arg(&args));
3197 if (args == SC_NULL) return fixnum(-acc);
3198 do {
3199 acc -= require_fixnum(car(args));
3200 args = cdr(args);
3201 } while (args != SC_NULL);
3202 return fixnum(acc);
3203}
3204
3205BUILTIN(builtin_fx_sub_borrow_unsigned) {
3206 ulong acc = untag(require_fixnum(extract_arg(&args)));
3207 acc -= untag(require_fixnum(extract_arg(&args)));
3208 if (args != SC_NULL) acc -= untag(require_fixnum(final_arg(args)));
3209 R_CDR = SC_NULL; R_CAR = fixnum(-(((long)acc) >> VAL_BITS));
3210 R_CDR = cons(); R_CAR = fixnum(acc);
3211 RETURN_VALUES(cons());
3212}
3213
3214BUILTIN(builtin_fx_mul_wrap) FXFOLD(*, 1)
3215
3216BUILTIN(builtin_fx_mul_carry) {
3217 ulong a = untag_signed(require_fixnum(extract_arg(&args)));
3218 ulong b = untag_signed(require_fixnum(final_arg(args)));
3219 sc_wide_mul_signed(&a, &b);
3220 R_CDR = SC_NULL; R_CAR = fixnum(b << TAG_BITS | tag(a)); /* high word */
3221 R_CDR = cons(); R_CAR = fixnum(a); /* low word */
3222 RETURN_VALUES(cons());
3223}
3224
3225BUILTIN(builtin_fx_mul_carry_unsigned) {
3226 ulong a = untag(require_fixnum(extract_arg(&args)));
3227 ulong b = untag(require_fixnum(final_arg(args)));
3228 sc_wide_mul(&a, &b);
3229 R_CDR = SC_NULL; R_CAR = fixnum(b << TAG_BITS | tag(a)); /* high word */
3230 R_CDR = cons(); R_CAR = fixnum(a); /* low word */
3231 RETURN_VALUES(cons());
3232}
3233
3234BUILTIN(builtin_fxnot) {
3235 return fixnum(~require_fixnum(final_arg(args)));
3236}
3237
3238BUILTIN(builtin_fxand) FXFOLD(&, -1)
3239BUILTIN(builtin_fxior) FXFOLD(|, 0)
3240BUILTIN(builtin_fxxor) FXFOLD(^, 0)
3241
3242BUILTIN(builtin_fxif) {
3243 ulong mask = require_fixnum(extract_arg(&args));
3244 ulong a = require_fixnum(extract_arg(&args));
3245 ulong b = require_fixnum(final_arg(args));
3246 return fixnum(b ^ (mask & (a ^ b)));
3247 /* equivalent to (mask & a) | (~mask & b) */
3248}
3249
3250BUILTIN(builtin_fxmaj) {
3251 ulong a = require_fixnum(extract_arg(&args));
3252 ulong b = require_fixnum(extract_arg(&args));
3253 ulong c = require_fixnum(final_arg(args));
3254 return fixnum((a & (b | c)) | (b & c));
3255 /* equivalent to (a & b) | (a & c) | (b & c) */
3256}
3257
3258BUILTIN(builtin_fxshift) {
3259 long a = untag_signed(require_fixnum(extract_arg(&args)));
3260 long bits = untag_signed(require_fixnum(final_arg(args)));
3261 if (bits < 0) {
3262 if (bits <= -VAL_BITS) bits = -VAL_BITS+1;
3263 a >>= -bits;
3264 }
3265 else {
3266 if (bits >= VAL_BITS) a = 0;
3267 else a <<= bits;
3268 }
3269 return fixnum(a);
3270}
3271
3272BUILTIN(builtin_fxshift_unsigned) {
3273 ulong a = require_fixnum(extract_arg(&args));
3274 long bits = untag_signed(require_fixnum(final_arg(args)));
3275 if (bits < 0) {
3276 if (bits <= -VAL_BITS) a = 0;
3277 else a = untag(a) >> -bits;
3278 }
3279 else {
3280 if (bits >= VAL_BITS) a = 0;
3281 else a <<= bits;
3282 }
3283 return fixnum(a);
3284}
3285
3286BUILTIN(builtin_fxlength_unsigned) {
3287 /* TODO check existing interface alternatives */
3288 return fixnum(sc_bit_length(untag(require_fixnum(final_arg(args)))));
3289}
3290
3291/** (open-subprocess PROGRAM . ARGS) -> (values PID IN-PORT OUT-PORT)
3292 *
3293 * Executes PROGRAM in a Unix subprocess with the given arguments, returning
3294 * its process ID along with input and output ports piped to its standard
3295 * output and input streams respectively. Does not redirect standard error. By
3296 * convention, the first ARG should be the executable filename.
3297 *
3298 * This is intended to be fast and hygienic: it does not invoke the system
3299 * shell, perform a PATH search, pass through environment variables, or leak
3300 * file descriptors associated with ports previously opened in Scheme.
3301 *
3302 * Signals an error if a system-defined limit is reached, per fork(2) (or any
3303 * argument is not a string).
3304 *
3305 * The type of the returned PID is not specified, but must be composed of
3306 * standard types with unambiguous external representation.
3307 *
3308 * See also: wait-subprocess */
3309BUILTIN(builtin_open_subprocess) {
3310 value n_args = 0, i;
3311 char *path, **argv, *envp[] = {NULL};
3312 pid_t pid;
3313 int out_pipe[2], in_pipe[2];
3314
3315 require_args(args);
3316 r1 = args;
3317 /* begin allocation: null-terminated strings and argv */
3318 for (r2 = r1; r2 != SC_NULL; r2 = cdr(r2)) {
3319 R_EXPR = require_string(car(r2));
3320 R_EXPR = string_append_null();
3321 set_car(r2, R_EXPR);
3322 n_args++;
3323 }
3324 n_args--; /* program path not counted as argument */
3325 /* Caution: allocating C blob on the Scheme heap. Must not be reachable
3326 * from the roots, which in turn excludes further allocation while it's in
3327 * use. */
3328 argv = (void*)&heap[sc_malloc(n_args+1)];
3329 /* end allocation */
3330 path = c_string_buf(car(r1));
3331 r1 = cdr(r1); /* program args */
3332 for (i = 0; i < n_args; i++) {
3333 argv[i] = c_string_buf(car(r1));
3334 r1 = cdr(r1);
3335 }
3336 argv[i] = NULL;
3337
3338 if (pipe_cloexec(out_pipe)) goto err1;
3339 if (pipe_cloexec(in_pipe)) goto err2;
3340 /* Use vfork so child creation can be fast, and possible on non-overcommit
3341 * systems, even when parent is large. Any signal handlers must not corrupt
3342 * the parent if invoked in the child. See http://ewontfix.com/7/. */
3343 if ((pid = vfork()) == -1) goto err3;
3344 if (!pid) { /* child */
3345 while (dup2(out_pipe[0], 0) == -1) if (errno != EINTR) _exit(errno);
3346 while (dup2(in_pipe[1], 1) == -1) if (errno != EINTR) _exit(errno);
3347 execve(path, argv, envp);
3348 _exit(errno);
3349 }
3350 blind_close(out_pipe[0]);
3351 blind_close(in_pipe[1]);
3352
3353 /* resume allocation */
3354 R_CDR = SC_NULL; R_CAR = make_port(out_pipe[1], 1, DEFAULT_W_BUF);
3355 R_CDR = cons(); R_CAR = make_port(in_pipe[0], 0, DEFAULT_R_BUF);
3356 R_CDR = cons(); R_CAR = string(fmt_ulong_dec(pid));
3357 /* ^ pid_t can't be guaranteed to fit in a fixnum, so stringify. I can't
3358 * quite decipher POSIX here but it seems safe to assume it fits in a long
3359 * and is positive on success. */
3360 RETURN_VALUES(cons());
3361
3362err3:
3363 blind_close(in_pipe[0]);
3364 blind_close(in_pipe[1]);
3365err2:
3366 blind_close(out_pipe[0]);
3367 blind_close(out_pipe[1]);
3368err1:
3369 sc_perror();
3370}
3371
3372/** (wait-subprocess [PID]) -> STATUS
3373 *
3374 * Blocks until a subprocess has terminated, releases the associated resources,
3375 * and returns either the nonnegative integer exit status for normal exit or
3376 * the negative signal number for termination by signal.
3377 *
3378 * PID identifies the process to wait for; it must compare "equal?" to a PID
3379 * previously returned by open-subprocess for which status has not yet been
3380 * retrieved. If omitted, any subprocess is waited for. */
3381BUILTIN(builtin_wait_subprocess) {
3382 int status;
3383 pid_t pid;
3384 if (args == SC_NULL) pid = -1;
3385 else {
3386 /* dedicated parser for stringified PIDs (see above), yuck */
3387 value s = require_string(final_arg(args));
3388 value len = string_len(s), i;
3389 const uchar *b = string_buf(s);
3390 ulong acc = 0;
3391 if (!len) goto invalid;
3392 for (i = 0; i < len; i++) {
3393 uchar digit = b[i] - '0';
3394 if (digit > 9) goto invalid;
3395 if (acc > ULONG_MAX/10) goto invalid;
3396 acc *= 10;
3397 if (acc + digit < acc) goto invalid;
3398 acc += digit;
3399 }
3400 pid = acc;
3401 if ((ulong)pid != acc || pid < 0) goto invalid;
3402 goto start;
3403invalid:
3404 sc_error1("invalid PID:", s);
3405 }
3406start:
3407 if (waitpid(pid, &status, 0) == -1) {
3408 if (errno == EINTR) goto start;
3409 sc_perror();
3410 }
3411 if (WIFEXITED(status)) return fixnum(WEXITSTATUS(status));
3412 if (WIFSIGNALED(status)) return fixnum(-WTERMSIG(status));
3413 sc_error("unknown status type"); /* shouldn't happen */
3414}
3415
3416BUILTIN(builtin_read_token) {
3417 R_PORT = opt_final_in_port_arg(args);
3418 switch (read_token()) {
3419 case tok_eof: return SC_EOF;
3420 case tok_literal: R_CAR = s_literal; break;
3421 case tok_open_paren: R_CAR = s_open_paren; break;
3422 case tok_close_paren: R_CAR = s_close_paren; break;
3423 case tok_dot: R_CAR = s_dot; break;
3424 case tok_open_vector: R_CAR = s_open_vector; break;
3425 case tok_identifier: R_CAR = s_identifier; break;
3426 case tok_named_char: R_CAR = s_named_char; break;
3427 case tok_abbrev: R_CAR = s_abbrev; break;
3428 case tok_number: R_CAR = s_number; break;
3429 }
3430 R_CDR = R_LEXEME;
3431 return cons();
3432}
3433
3434BUILTIN(builtin_write_string) {
3435 value s = extract_arg(&args);
3436 R_PORT = opt_final_out_port_arg(args);
3437 write_str(require_stringlike(s));
3438 return SC_NULL;
3439}
3440
3441BUILTIN(builtin_write_string_quoted) {
3442 value s = extract_arg(&args);
3443 R_PORT = opt_final_out_port_arg(args);
3444 write_str_quoted(require_stringlike(s));
3445 return SC_NULL;
3446}
3447
3448/* Private builtins exposed to the toplevel and compiler only */
3449
3450#define assert_args(n) (assert(list_length(args) == (n)))
3451
3452/* Debug access to the privileged environment */
3453BUILTIN(builtin_toplevel_env) { no_args(args); return SC_TOPLEVEL_ENV; }
3454
3455/* (define-r5rs symbol obj)
3456 *
3457 * Binds a variable in the otherwise immutable (scheme-report-environment 5)
3458 * as well as the interaction environment. */
3459BUILTIN(builtin_define_r5rs) {
3460 R_CAR = R_VARNAME = require_symbol(extract_arg(&args));
3461 R_CDR = R_EXPR = final_arg(args);
3462 assert(global_frame_lookup(R_CAR, car(r5rs_env)) == SC_FALSE);
3463 assert(global_frame_lookup(R_CAR, car(interaction_env)) == SC_FALSE);
3464 R_ENV = r5rs_env;
3465 extend_global_env();
3466
3467 R_CAR = R_VARNAME;
3468 R_CDR = R_EXPR;
3469 R_ENV = interaction_env;
3470 extend_global_env();
3471 return SC_NULL;
3472}
3473
3474/* (define-gscm symbol obj)
3475 *
3476 * Binds a variable in the otherwise immutable (gales-scheme-environment) as
3477 * well as the interaction environment. */
3478BUILTIN(builtin_define_gscm) {
3479 value binding;
3480 R_VARNAME = require_symbol(extract_arg(&args));
3481 R_EXPR = final_arg(args);
3482 assert(global_frame_lookup(R_VARNAME, car(r5rs_env)) == SC_FALSE);
3483
3484 /* need to be able to upgrade ERROR on startup */
3485 binding = global_frame_lookup(R_VARNAME, car(gscm_env));
3486 if (binding == SC_FALSE) {
3487 R_CAR = R_VARNAME;
3488 R_CDR = R_EXPR;
3489 R_ENV = gscm_env;
3490 extend_global_env();
3491 }
3492 else set_cdr(binding, R_EXPR);
3493
3494 binding = global_frame_lookup(R_VARNAME, car(interaction_env));
3495 if (binding == SC_FALSE) {
3496 R_CAR = R_VARNAME;
3497 R_CDR = R_EXPR;
3498 R_ENV = interaction_env;
3499 extend_global_env();
3500 }
3501 else set_cdr(binding, R_EXPR);
3502 return SC_NULL;
3503}
3504
3505BUILTIN(builtin_set_in_port) {
3506 r_input_port = require_input_port(final_arg(args));
3507 return SC_NULL;
3508}
3509
3510BUILTIN(builtin_set_out_port) {
3511 r_output_port = require_output_port(final_arg(args));
3512 return SC_NULL;
3513}
3514
3515BUILTIN(builtin_push_winding) {
3516 err_context = "dynamic-wind";
3517 R_CAR = args;
3518 require_procedure(extract_arg(&args));
3519 set_cdr(R_CAR, require_procedure(final_arg(args)));
3520 R_CDR = r_spool;
3521 r_spool = cons();
3522 return SC_NULL;
3523}
3524
3525BUILTIN(builtin_variable_ref) {
3526 R_CAR = car(args);
3527 assert(cdr(args) == SC_NULL);
3528 return make_variable_ref();
3529}
3530
3531BUILTIN(builtin_apply_unchecked) {
3532 assert_args(2);
3533 R_PROC = car(args);
3534 R_ARGS = cadr(args);
3535 r_flag = f_apply;
3536 return SC_NULL;
3537}
3538
3539BUILTIN(builtin_car_unchecked) { assert_args(1); return car(car(args)); }
3540BUILTIN(builtin_cdr_unchecked) { assert_args(1); return cdr(car(args)); }
3541BUILTIN(builtin_set_car_unchecked) {
3542 assert_args(2); set_car(car(args), cadr(args)); return SC_NULL;
3543}
3544BUILTIN(builtin_set_cdr_unchecked) {
3545 assert_args(2); set_cdr(car(args), cadr(args)); return SC_NULL;
3546}
3547
3548BUILTIN(builtin_str_ref_unchecked) {
3549 assert_args(2); return character(
3550 string_buf(car(args))[fixnum_val(cadr(args))]);
3551}
3552BUILTIN(builtin_vec_ref_unchecked) {
3553 assert_args(2); return vector_ref(car(args), fixnum_val(cadr(args)));
3554}
3555
3556BUILTIN(builtin_fx_add_unchecked) {
3557 assert_args(2); return fixnum(unsigned_fixnum_val(car(args)) +
3558 unsigned_fixnum_val(cadr(args)));
3559}
3560BUILTIN(builtin_fx_sub_unchecked) {
3561 assert_args(2); return fixnum(unsigned_fixnum_val(car(args)) -
3562 unsigned_fixnum_val(cadr(args)));
3563}
3564BUILTIN(builtin_fx_eq_unchecked) {
3565 assert_args(2); assert(is_fixnum(car(args)) && is_fixnum(cadr(args)));
3566 return boolean(car(args) == cadr(args));
3567}
3568BUILTIN(builtin_fx_lt_unchecked) {
3569 assert_args(2); return boolean(fixnum_val(car(args)) <
3570 fixnum_val(cadr(args)));
3571}
3572BUILTIN(builtin_fx_le_unchecked) {
3573 assert_args(2); return boolean(fixnum_val(car(args)) <=
3574 fixnum_val(cadr(args)));
3575}
3576BUILTIN(builtin_fx_neg_unchecked) {
3577 assert_args(1); return fixnum(-fixnum_val(car(args)));
3578}
3579BUILTIN(builtin_is_fx_neg_unchecked) {
3580 assert_args(1); return boolean(fixnum_val(car(args)) < 0);
3581}
3582
3583BUILTIN(builtin_fx_div_unsigned_unchecked) {
3584 /* unsigned as / and % are implementation-defined on negatives */
3585 ulong a, b, q;
3586 assert_args(2);
3587 a = unsigned_fixnum_val(car(args));
3588 b = unsigned_fixnum_val(cadr(args));
3589 assert(b != 0);
3590 /* the compiler had better recognize this as one division... */
3591 q = a/b;
3592 a = a%b;
3593 R_CDR = SC_NULL; R_CAR = fixnum(a);
3594 R_CDR = cons(); R_CAR = fixnum(q);
3595 RETURN_VALUES(cons());
3596}
3597
3598BUILTIN(builtin_fx_div_ext_unsigned_unchecked) {
3599 /* unsigned as / and % are implementation-defined on negatives */
3600 ulong a_lo, a_hi, b;
3601 assert_args(3);
3602 a_lo = unsigned_fixnum_val(car(args)); args = cdr(args);
3603 a_hi = unsigned_fixnum_val(car(args)); args = cdr(args);
3604 b = unsigned_fixnum_val(car(args));
3605 assert(b > a_hi); /* so quotient fits in fixnum */
3606 a_lo |= a_hi << VAL_BITS;
3607 a_hi >>= TAG_BITS;
3608 sc_div_extended(&a_lo, &a_hi, b);
3609 R_CDR = SC_NULL; R_CAR = fixnum(a_lo); /* remainder */
3610 R_CDR = cons(); R_CAR = fixnum(a_hi); /* quotient */
3611 RETURN_VALUES(cons());
3612}
3613
3614BUILTIN(builtin_fixnum_to_dec_unchecked) {
3615 assert_args(1); return string(fmt_fixnum_dec(fixnum_val(car(args))));
3616}
3617BUILTIN(builtin_fixnum_to_hex_unchecked) {
3618 assert_args(1); return string(fmt_fixnum_hex(fixnum_val(car(args))));
3619}
3620BUILTIN(builtin_fixnum_to_oct_unchecked) {
3621 assert_args(1); return string(fmt_fixnum_oct(fixnum_val(car(args))));
3622}
3623BUILTIN(builtin_fixnum_to_bin_unchecked) {
3624 assert_args(1); return string(fmt_fixnum_bin(fixnum_val(car(args))));
3625}
3626BUILTIN(builtin_fixnum_to_bin_unsigned_unchecked) {
3627 assert_args(1);
3628 return string(fmt_ulong_bin(unsigned_fixnum_val(car(args))));
3629}
3630BUILTIN(builtin_flonum_to_dec_unchecked) {
3631 assert_args(1); return string(fmt_flonum_dec(flonum_val(car(args))));
3632}
3633
3634/* Minimal error builtin to be replaced on startup, e.g. in case of compile
3635 * errors in the toplevel */
3636BUILTIN(builtin_error) {
3637 value msg = require_string(extract_arg(&args));
3638 R_PORT = stdout_port;
3639 write_cstr("ERROR [startup]: ");
3640 write_str(msg);
3641 if (args != SC_NULL) {
3642 write_char(' ');
3643 R_EXPR = car(args);
3644 shallow_print();
3645 }
3646 newline();
3647 sc_exit(1);
3648}
3649
3650BUILTIN(builtin_set_err_cont) {
3651 value h = final_arg(args);
3652 if (!is_continuation(h)) sc_error("not a continuation");
3653 r_error_cont = h;
3654 return SC_NULL;
3655}
3656
3657BUILTIN(builtin_socket_ports) {
3658 make_socket_ports(safe_fixnum_val(final_arg(args)),
3659 DEFAULT_R_BUF, DEFAULT_W_BUF);
3660 R_CDR = SC_NULL; R_CAR = r1;
3661 R_CDR = cons(); R_CAR = r0;
3662 RETURN_VALUES(cons());
3663}
3664
3665static union {
3666 struct sockaddr sa;
3667 struct sockaddr_in sin;
3668 struct sockaddr_un sun;
3669} sa;
3670
3671static socklen_t sa_len;
3672
3673/* Fill sa/sa_len from a Scheme IPv4 address structure */
3674static void build_sockaddr_in(value addr) {
3675 value ip = require_vector(safe_car(addr)),
3676 port = safe_fixnum_val(safe_car(cdr(addr))), i, byte;
3677 uchar *port_buf = (uchar *)&sa.sin.sin_port,
3678 *addr_buf = (uchar *)&sa.sin.sin_addr;
3679 if (port > 65535) sc_error1("port number out of range:", car(cdr(addr)));
3680 memset(&sa.sin, 0, sizeof sa.sin);
3681 sa.sin.sin_family = AF_INET;
3682 port_buf[0] = port >> 8;
3683 port_buf[1] = port & 0xFF;
3684 if (vector_len(ip) != 4) sc_error("bad address length");
3685 for (i = 0; i < 4; ++i) {
3686 byte = safe_fixnum_val(vector_ref(ip, i));
3687 if (byte > 255)
3688 sc_error1("address byte out of range:", vector_ref(ip, i));
3689 addr_buf[i] = byte;
3690 }
3691 sa_len = sizeof sa.sin;
3692}
3693
3694/* Fill sa/sa_len from a Scheme Unix-domain address structure (string) */
3695static void build_sockaddr_un(value addr) {
3696 value path = require_string(addr), len = string_len(path), i;
3697 uchar *buf = string_buf(path);
3698 if (len > sizeof sa.sun.sun_path) sc_error("oversize pathname");
3699 /* initial NUL allowed for Linux abstract sockets */
3700 if (len && buf[0])
3701 for (i = 1; i < len; i++)
3702 if (!buf[i]) sc_error("NUL byte in pathname");
3703 memset(&sa.sun, 0, sizeof sa.sun);
3704 sa.sun.sun_family = AF_UNIX;
3705 memcpy(&sa.sun.sun_path, string_buf(path), len);
3706 sa_len = offsetof(struct sockaddr_un, sun_path) + len;
3707}
3708
3709/* Construct immutable Scheme address structure from a struct sockaddr_* in
3710 * sa/sa_len. Side effects: R_CAR R_CDR */
3711static value parse_sockaddr(void) {
3712 if (sa.sa.sa_family == AF_INET) {
3713 int i;
3714 uchar *port_buf = (uchar *)&sa.sin.sin_port,
3715 *addr_buf = (uchar *)&sa.sin.sin_addr;
3716 R_CDR = SC_NULL;
3717 R_CAR = fixnum((port_buf[0] << 8) + port_buf[1]);
3718 R_CDR = cons_immutable();
3719 R_CAR = make_immutable_vector(4);
3720 for (i = 0; i < 4; ++i) vector_set(R_CAR, i, fixnum(addr_buf[i]));
3721 return cons_immutable();
3722 }
3723 else if (sa.sa.sa_family == AF_UNIX) {
3724 value path, path_len;
3725 if (sa_len > sizeof sa.sun) sc_error("oversize pathname?!");
3726 /* XXX Linuxism; the data returned for unnamed sockets is unspecified
3727 * in the standards */
3728 if (sa_len == sizeof(sa_family_t)) return SC_FALSE;
3729 /* Possible somewhere? */
3730 if (sa_len <= offsetof(struct sockaddr_un, sun_path)) return SC_FALSE;
3731 path_len = sa_len - offsetof(struct sockaddr_un, sun_path);
3732 /* Some implementations are so rude as to append a trailing NUL and
3733 * include it in the length. But a singular NUL is a valid abstract
3734 * socket name on Linux. */
3735 if (path_len > 1 && sa.sun.sun_path[0] && !sa.sun.sun_path[path_len-1])
3736 --path_len;
3737 path = make_immutable_string(path_len);
3738 memcpy(string_buf(path), sa.sun.sun_path, path_len);
3739 return path;
3740 }
3741 sc_error("unknown address family");
3742}
3743
3744static value unbound_socket(int domain, int type) {
3745 return fixnum(chkp(socket(domain, type, 0)));
3746}
3747
3748static value bound_socket(int domain, int type, int reuse) {
3749 int fd = chkp(socket(domain, type, 0));
3750 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof reuse) ||
3751 bind(fd, &sa.sa, sa_len)) {
3752 blind_close(fd);
3753 sc_perror();
3754 }
3755 return fixnum(fd);
3756}
3757
3758BUILTIN(builtin_inet_stream_sock) {
3759 if (args != SC_NULL) {
3760 build_sockaddr_in(final_arg(args));
3761 return bound_socket(AF_INET, SOCK_STREAM, 0);
3762 }
3763 return unbound_socket(AF_INET, SOCK_STREAM);
3764}
3765
3766BUILTIN(builtin_inet_dgram_sock) {
3767 if (args != SC_NULL) {
3768 build_sockaddr_in(final_arg(args));
3769 return bound_socket(AF_INET, SOCK_DGRAM, 0);
3770 }
3771 return unbound_socket(AF_INET, SOCK_DGRAM);
3772}
3773
3774BUILTIN(builtin_unix_stream_sock) {
3775 if (args != SC_NULL) {
3776 build_sockaddr_un(final_arg(args));
3777 return bound_socket(AF_UNIX, SOCK_STREAM, 0);
3778 }
3779 return unbound_socket(AF_UNIX, SOCK_STREAM);
3780}
3781
3782BUILTIN(builtin_unix_dgram_sock) {
3783 if (args != SC_NULL) {
3784 build_sockaddr_un(final_arg(args));
3785 return bound_socket(AF_UNIX, SOCK_DGRAM, 0);
3786 }
3787 return unbound_socket(AF_UNIX, SOCK_DGRAM);
3788}
3789
3790BUILTIN(builtin_getsockname) {
3791 uint fd = safe_fixnum_val(final_arg(args));
3792 sa_len = sizeof sa;
3793 chkp(getsockname(fd, &sa.sa, &sa_len));
3794 return parse_sockaddr();
3795}
3796
3797BUILTIN(builtin_getpeername) {
3798 uint fd = safe_fixnum_val(final_arg(args));
3799 sa_len = sizeof sa;
3800 chkp(getpeername(fd, &sa.sa, &sa_len));
3801 return parse_sockaddr();
3802}
3803
3804BUILTIN(builtin_connect_inet) {
3805 uint fd = safe_fixnum_val(extract_arg(&args));
3806 build_sockaddr_in(final_arg(args));
3807 chkp(connect(fd, &sa.sa, sa_len));
3808 return SC_NULL;
3809}
3810
3811BUILTIN(builtin_connect_unix) {
3812 uint fd = safe_fixnum_val(extract_arg(&args));
3813 build_sockaddr_un(final_arg(args));
3814 chkp(connect(fd, &sa.sa, sa_len));
3815 return SC_NULL;
3816}
3817
3818BUILTIN(builtin_listen) {
3819 uint fd = safe_fixnum_val(extract_arg(&args));
3820 long backlog = safe_fixnum_val(final_arg(args));
3821 if (backlog < 0) sc_error("negative backlog");
3822 if (backlog > INT_MAX) backlog = INT_MAX;
3823 chkp(listen(fd, backlog));
3824 return SC_NULL;
3825}
3826
3827BUILTIN(builtin_accept) {
3828 uint fd = safe_fixnum_val(final_arg(args));
3829 return fixnum(chkp(accept(fd, 0, 0)));
3830}
3831
3832BUILTIN(builtin_close) {
3833 chkp(close(safe_fixnum_val(final_arg(args))));
3834 return SC_NULL;
3835}
3836
3837BUILTIN(builtin_is_flonum) { return boolean(is_flonum(final_arg(args))); }
3838
3839/* NB: "if the value being converted is in the range of values that can be
3840 * represented but cannot be represented exactly, the result is either the
3841 * nearest higher or nearest lower value, chosen in an implementation-defined
3842 * manner." -C89 */
3843BUILTIN(builtin_flonum_unchecked) {
3844 assert_args(1); return flonum(fixnum_val(car(args)));
3845}
3846BUILTIN(builtin_flonum_unsigned_unchecked) {
3847 assert_args(1); return flonum(unsigned_fixnum_val(car(args)));
3848}
3849
3850BUILTIN(builtin_flo_eq_unchecked) {
3851 assert_args(2);
3852 return boolean(flonum_val(car(args)) == flonum_val(cadr(args)));
3853}
3854BUILTIN(builtin_flo_lt_unchecked) {
3855 assert_args(2);
3856 return boolean(flonum_val(car(args)) < flonum_val(cadr(args)));
3857}
3858BUILTIN(builtin_flo_le_unchecked) {
3859 assert_args(2);
3860 return boolean(flonum_val(car(args)) <= flonum_val(cadr(args)));
3861}
3862BUILTIN(builtin_flo_neg_unchecked) {
3863 assert_args(1); return flonum(-flonum_val(car(args)));
3864}
3865BUILTIN(builtin_is_flo_neg_unchecked) {
3866 assert_args(1); return boolean(flonum_val(car(args)) < 0);
3867}
3868
3869#define FLONUM_OP2(op) { \
3870 assert_args(2); \
3871 return flonum(flonum_val(car(args)) op flonum_val(cadr(args))); \
3872}
3873
3874BUILTIN(builtin_flo_add_unchecked) FLONUM_OP2(+)
3875BUILTIN(builtin_flo_sub_unchecked) FLONUM_OP2(-)
3876BUILTIN(builtin_flo_mul_unchecked) FLONUM_OP2(*)
3877BUILTIN(builtin_flo_div_unchecked) FLONUM_OP2(/)
3878
3879BUILTIN(builtin_flo_quotient_unchecked) {
3880 assert_args(2);
3881 return flonum(trunc(flonum_val(car(args)) / flonum_val(cadr(args))));
3882}
3883
3884BUILTIN(builtin_flo_remainder_unchecked) {
3885 double a, b;
3886 assert_args(2);
3887 a = flonum_val(car(args));
3888 b = flonum_val(cadr(args));
3889 return flonum(a < 0 ? -fmod(-a, fabs(b)) : fmod(a, fabs(b)));
3890}
3891
3892BUILTIN(builtin_frac_exp_unchecked) {
3893 int e;
3894 double frac;
3895 assert_args(1);
3896 frac = frexp(flonum_val(car(args)), &e);
3897 R_CDR = SC_NULL; R_CAR = fixnum(e);
3898 R_CDR = cons(); R_CAR = flonum(frac);
3899 RETURN_VALUES(cons());
3900}
3901
3902BUILTIN(builtin_load_exp_unchecked) {
3903 assert_args(2);
3904 return flonum(ldexp(flonum_val(car(args)), fixnum_val(cadr(args))));
3905}
3906
3907BUILTIN(builtin_is_inf_unchecked) {
3908 double d;
3909 assert_args(1);
3910 d = flonum_val(car(args));
3911 return boolean(d == HUGE_VAL || d == -HUGE_VAL);
3912}
3913
3914BUILTIN(builtin_flo_to_fix_unchecked) {
3915 double d;
3916 assert_args(1);
3917 d = flonum_val(car(args));
3918 assert(fabs(d) <= (double)(1L << VAL_BITS));
3919 /* ^ Catches overflow of double to long conversion, which is UB, though
3920 * not of long to fixnum (how tight the check can be made is not yet clear
3921 * to me.) */
3922 return fixnum(d);
3923}
3924
3925#define MATH_FUNC(f) { \
3926 assert_args(1); return flonum(f(flonum_val(car(args)))); \
3927}
3928BUILTIN(builtin_floor) MATH_FUNC(floor)
3929BUILTIN(builtin_ceiling) MATH_FUNC(ceil)
3930BUILTIN(builtin_truncate) MATH_FUNC(trunc)
3931BUILTIN(builtin_round) MATH_FUNC(nearbyint)
3932BUILTIN(builtin_exp) MATH_FUNC(exp)
3933BUILTIN(builtin_log) MATH_FUNC(log)
3934BUILTIN(builtin_sin) MATH_FUNC(sin)
3935BUILTIN(builtin_cos) MATH_FUNC(cos)
3936BUILTIN(builtin_tan) MATH_FUNC(tan)
3937BUILTIN(builtin_asin) MATH_FUNC(asin)
3938BUILTIN(builtin_acos) MATH_FUNC(acos)
3939BUILTIN(builtin_atan) MATH_FUNC(atan)
3940BUILTIN(builtin_atan2) {
3941 assert_args(2);
3942 return flonum(atan2(flonum_val(car(args)), flonum_val(cadr(args))));
3943}
3944BUILTIN(builtin_sqrt) MATH_FUNC(sqrt)
3945
3946BUILTIN(builtin_rev_list_to_vec_unchecked) {
3947 assert_args(1);
3948 R_EXPR = car(args);
3949 return rev_list_to_vec();
3950}
3951
3952BUILTIN(builtin_is_builtin) {
3953 return boolean(is_builtin(final_arg(args)));
3954}
3955BUILTIN(builtin_builtin_name) {
3956 value b = final_arg(args);
3957 if (!is_builtin(b)) sc_error("not a builtin");
3958 return string(builtin_name(b));
3959}
3960BUILTIN(builtin_is_promise) {
3961 return boolean(is_promise(final_arg(args)));
3962}
3963BUILTIN(builtin_is_continuation) {
3964 return boolean(is_continuation(final_arg(args)));
3965}
3966
3967BUILTIN(builtin_make_bignum) {
3968 assert_args(1);
3969 /* Returning uninitialized is safe for the garbage collector: bignums are
3970 * not scanned internally, though the words do keep their fixnum tags. Of
3971 * course, used memory is still being exposed; the privileged bignum
3972 * library is responsible for fully initializing or truncating. */
3973 return make_bignum_uninit(fixnum_val(car(args)), 0);
3974}
3975BUILTIN(builtin_is_bignum) {
3976 assert_args(1); return boolean(is_bignum(car(args)));
3977}
3978BUILTIN(builtin_is_bignum_negative) {
3979 assert_args(1); return boolean(is_bignum_negative(car(args)));
3980}
3981BUILTIN(builtin_bignum_set_negative) {
3982 assert_args(1); return bignum_set_negative(car(args));
3983}
3984BUILTIN(builtin_bignum_ref) {
3985 assert_args(2); return bignum_ref(car(args), fixnum_val(cadr(args)));
3986}
3987BUILTIN(builtin_bignum_set) {
3988 value bn;
3989 assert_args(3);
3990 bn = car(args); args = cdr(args);
3991 bignum_set(bn, fixnum_val(car(args)), cadr(args));
3992 return SC_NULL;
3993}
3994BUILTIN(builtin_bignum_length) {
3995 assert_args(1); return fixnum(bignum_len(car(args)));
3996}
3997BUILTIN(builtin_bignum_truncate) {
3998 assert_args(2); return bignum_truncate(car(args), fixnum_val(cadr(args)));
3999}
4000
4001/* Construct bignum from signed fixnum, not demoting. */
4002BUILTIN(builtin_bignum) {
4003 value bn, word, word_sign_bit, word_sign_ext;
4004 assert_args(1);
4005 /* branch-free conversion from two's complement to sign-magnitude */
4006 word = fixnum_val(car(args));
4007 word_sign_bit = word >> ((8*sizeof word)-1);
4008 word_sign_ext = ((long)word) >> ((8*sizeof word)-1);
4009 word = (word ^ word_sign_ext) + word_sign_bit;
4010 bn = make_bignum_uninit(1, word_sign_bit);
4011 bignum_set(bn, 0, fixnum(word));
4012 return bn;
4013}
4014
4015/* Construct bignum from unsigned fixnum, not demoting. */
4016BUILTIN(builtin_bignum_unsigned) {
4017 value bn, word;
4018 assert_args(1);
4019 word = car(args);
4020 bn = make_bignum_uninit(1, 0);
4021 bignum_set(bn, 0, word);
4022 return bn;
4023}
4024
4025/* Construct bignum from 2-word signed quantity, normalizing and demoting to
4026 * fixnum when possible. */
4027BUILTIN(builtin_bignum2) {
4028 value bn;
4029 long lo, hi;
4030 int neg = 0;
4031 assert_args(2);
4032 lo = fixnum_val(car(args));
4033 hi = fixnum_val(cadr(args));
4034 /* in signed fixnum range if high word is sign extension of low */
4035 if (lo >> (VAL_BITS - 1) == hi) return fixnum(lo);
4036 if (hi < 0) {
4037 /* convert from two's complement to sign-magnitude */
4038 neg = 1;
4039 /* capture carry bit in the tag by setting it to all ones prior to
4040 * complement */
4041 lo = -(lo | (-1L << VAL_BITS));
4042 hi = ~(ulong)hi + (((ulong)lo) >> VAL_BITS);
4043 }
4044 if (hi == 0) {
4045 /* need to drop high word to normalize */
4046 bn = make_bignum_uninit(1, neg);
4047 bignum_set(bn, 0, fixnum(lo));
4048 }
4049 else {
4050 /* both words significant */
4051 bn = make_bignum_uninit(2, neg);
4052 bignum_set(bn, 0, fixnum(lo));
4053 bignum_set(bn, 1, fixnum(hi));
4054 }
4055 return bn;
4056}
4057
4058
4059/****************
4060 * Initialization
4061 */
4062
4063/* Construct a builtin and define it in the top frame of R_ENV.
4064 * Side effects: R_CAR R_CDR */
4065static void add_builtin(const char *name, builtin_func_t func) {
4066 R_CAR = symbol(name);
4067 R_CDR = builtin(name, func);
4068 assert(global_frame_lookup(R_CAR, car(R_ENV)) == SC_FALSE);
4069 extend_global_env();
4070}
4071
4072/* Define a variable in the top frame of R_ENV.
4073 * Side effects: R_EXPR R_CAR R_CDR */
4074static void add_variable(const char *name, value val) {
4075 R_EXPR = val;
4076 R_CAR = symbol(name);
4077 R_CDR = R_EXPR;
4078 assert(global_frame_lookup(R_CAR, car(R_ENV)) == SC_FALSE);
4079 extend_global_env();
4080}
4081
4082/* Side effects: R_RESULT */
4083static value open_lib_file(const char *filename) {
4084 int fd = open_cloexec(filename, O_RDONLY);
4085 if (fd == -1) fatal1(filename, strerror(errno));
4086 return make_port(fd, 0, DEFAULT_R_BUF);
4087}
4088
4089uint sc_hugepages;
4090
4091void sc_init(value heap_alloc) {
4092 int mflags;
4093 assert(sizeof(value) == __SIZEOF_POINTER__);
4094 assert(sizeof(value) == sizeof(ulong));
4095
4096 mflags = MAP_PRIVATE | MAP_ANON;
4097 if (sc_hugepages) {
4098#ifdef MAP_HUGETLB
4099 mflags |= MAP_HUGETLB;
4100#else
4101 fatal("huge pages not supported");
4102#endif
4103 }
4104 heap = mmap(NULL, heap_alloc, PROT_READ | PROT_WRITE, mflags, -1, 0);
4105 if (heap == MAP_FAILED) fatal1("failed to map heap", strerror(errno));
4106 heap_size = heap_alloc / sizeof(value) / 2;
4107 new_heap = heap + heap_size;
4108
4109 gc_root(&r0);
4110 gc_root(&r1);
4111 gc_root(&r2);
4112 gc_root(&r3);
4113 gc_root(&r4);
4114 gc_root(&r5);
4115 gc_root(&r6);
4116 gc_root(&r_stack);
4117 gc_root(&r_spool);
4118 gc_root(&r_error_cont);
4119 gc_root(&r_signal_handler);
4120 gc_root(&r_compiler);
4121 gc_root(&r_compiler_expr);
4122 gc_root(&r_input_port);
4123 gc_root(&r_output_port);
4124 gc_root(&r_dump);
4125 gc_root(&stdin_port);
4126 gc_root(&stdout_port);
4127 gc_root(&symbols);
4128 gc_root(&s_lambda);
4129 gc_root(&s_quote);
4130 gc_root(&s_quasiquote);
4131 gc_root(&s_unquote);
4132 gc_root(&s_unquote_splicing);
4133 gc_root(&s_if);
4134 gc_root(&s_set);
4135 gc_root(&s_begin);
4136 gc_root(&s_letrec);
4137 gc_root(&s_define);
4138 gc_root(&s_delay);
4139 gc_root(&s_literal);
4140 gc_root(&s_open_paren);
4141 gc_root(&s_close_paren);
4142 gc_root(&s_dot);
4143 gc_root(&s_open_vector);
4144 gc_root(&s_identifier);
4145 gc_root(&s_named_char);
4146 gc_root(&s_abbrev);
4147 gc_root(&s_number);
4148 gc_root(&s_truncate);
4149 gc_root(&s_overwrite);
4150 gc_root(&s_append);
4151 gc_root(&s_sync);
4152 gc_root(&s_data_sync);
4153 gc_root(&r5rs_env);
4154 gc_root(&gscm_env);
4155 gc_root(&interaction_env);
4156 gc_root(&toplevel_env);
4157
4158 r_input_port = stdin_port = make_port(0, 0, DEFAULT_R_BUF);
4159 r_output_port = stdout_port = make_port(1, 1, DEFAULT_W_BUF);
4160 stdout_ready = 1;
4161 fixnum_zero = fixnum(0);
4162 fixnum_one = fixnum(1);
4163
4164 s_lambda = symbol("lambda");
4165 s_quote = symbol("quote");
4166 s_quasiquote = symbol("quasiquote");
4167 s_unquote = symbol("unquote");
4168 s_unquote_splicing = symbol("unquote-splicing");
4169 s_if = symbol("if");
4170 s_set = symbol("set!");
4171 s_begin = symbol("begin");
4172 s_letrec = symbol("letrec");
4173 s_define = symbol("define");
4174 s_delay = symbol("delay");
4175 s_literal = symbol("literal");
4176 s_open_paren = symbol("open-paren");
4177 s_close_paren = symbol("close-paren");
4178 s_dot = symbol("dot");
4179 s_open_vector = symbol("open-vector");
4180 s_identifier = symbol("identifier");
4181 s_named_char = symbol("named-char");
4182 s_abbrev = symbol("abbrev");
4183 s_number = symbol("number");
4184 s_truncate = symbol("truncate");
4185 s_overwrite = symbol("overwrite");
4186 s_append = symbol("append");
4187 s_sync = symbol("sync");
4188 s_data_sync = symbol("data-sync");
4189
4190 R_CAR = R_CDR = SC_NULL;
4191 R_ENV = r5rs_env = cons();
4192 add_builtin("eq?", builtin_is_eq);
4193 add_builtin("number?", builtin_is_number);
4194 add_builtin("complex?", builtin_is_number);
4195 add_builtin("real?", builtin_is_number);
4196 add_builtin("rational?", builtin_is_number);
4197 add_builtin("integer?", builtin_is_integer);
4198 add_builtin("exact?", builtin_is_exact);
4199 add_builtin("inexact?", builtin_is_inexact);
4200 add_builtin("not", builtin_not);
4201 add_builtin("boolean?", builtin_is_boolean);
4202 add_builtin("pair?", builtin_is_pair);
4203 add_builtin("cons", builtin_cons);
4204 add_builtin("car", builtin_car);
4205 add_builtin("cdr", builtin_cdr);
4206 add_builtin("caar", builtin_caar);
4207 add_builtin("cadr", builtin_cadr);
4208 add_builtin("cdar", builtin_cdar);
4209 add_builtin("cddr", builtin_cddr);
4210 add_builtin("caaar", builtin_caaar);
4211 add_builtin("caadr", builtin_caadr);
4212 add_builtin("cadar", builtin_cadar);
4213 add_builtin("caddr", builtin_caddr);
4214 add_builtin("cdaar", builtin_cdaar);
4215 add_builtin("cdadr", builtin_cdadr);
4216 add_builtin("cddar", builtin_cddar);
4217 add_builtin("cdddr", builtin_cdddr);
4218 add_builtin("caaaar", builtin_caaaar);
4219 add_builtin("caaadr", builtin_caaadr);
4220 add_builtin("caadar", builtin_caadar);
4221 add_builtin("caaddr", builtin_caaddr);
4222 add_builtin("cadaar", builtin_cadaar);
4223 add_builtin("cadadr", builtin_cadadr);
4224 add_builtin("caddar", builtin_caddar);
4225 add_builtin("cadddr", builtin_cadddr);
4226 add_builtin("cdaaar", builtin_cdaaar);
4227 add_builtin("cdaadr", builtin_cdaadr);
4228 add_builtin("cdadar", builtin_cdadar);
4229 add_builtin("cdaddr", builtin_cdaddr);
4230 add_builtin("cddaar", builtin_cddaar);
4231 add_builtin("cddadr", builtin_cddadr);
4232 add_builtin("cdddar", builtin_cdddar);
4233 add_builtin("cddddr", builtin_cddddr);
4234 add_builtin("set-car!", builtin_set_car);
4235 add_builtin("set-cdr!", builtin_set_cdr);
4236 add_builtin("null?", builtin_is_null);
4237 add_builtin("list?", builtin_is_list);
4238 add_builtin("length", builtin_length);
4239 add_builtin("symbol?", builtin_is_symbol);
4240 add_builtin("symbol->string", builtin_sym_to_str);
4241 add_builtin("string->symbol", builtin_str_to_sym);
4242 add_builtin("char?", builtin_is_char);
4243 add_builtin("char=?", builtin_char_eq);
4244 add_builtin("char<?", builtin_char_lt);
4245 add_builtin("char>?", builtin_char_gt);
4246 add_builtin("char<=?", builtin_char_le);
4247 add_builtin("char>=?", builtin_char_ge);
4248 add_builtin("char-ci=?", builtin_char_ci_eq);
4249 add_builtin("char-ci<?", builtin_char_ci_lt);
4250 add_builtin("char-ci>?", builtin_char_ci_gt);
4251 add_builtin("char-ci<=?", builtin_char_ci_le);
4252 add_builtin("char-ci>=?", builtin_char_ci_ge);
4253 add_builtin("char-alphabetic?", builtin_char_is_alpha);
4254 add_builtin("char-numeric?", builtin_char_is_num);
4255 add_builtin("char-whitespace?", builtin_char_is_white);
4256 add_builtin("char-upper-case?", builtin_char_is_upper);
4257 add_builtin("char-lower-case?", builtin_char_is_lower);
4258 add_builtin("char->integer", builtin_char_to_int);
4259 add_builtin("integer->char", builtin_int_to_char);
4260 add_builtin("char-upcase", builtin_char_upcase);
4261 add_builtin("char-downcase", builtin_char_downcase);
4262 add_builtin("string?", builtin_is_str);
4263 add_builtin("make-string", builtin_make_str);
4264 add_builtin("string-length",builtin_str_length);
4265 add_builtin("string-ref", builtin_str_ref);
4266 add_builtin("string-set!", builtin_str_set);
4267 add_builtin("string=?", builtin_str_eq);
4268 add_builtin("string<?", builtin_str_lt);
4269 add_builtin("string>?", builtin_str_gt);
4270 add_builtin("string<=?", builtin_str_le);
4271 add_builtin("string>=?", builtin_str_ge);
4272 add_builtin("string-ci=?", builtin_str_ci_eq);
4273 add_builtin("string-ci<?", builtin_str_ci_lt);
4274 add_builtin("string-ci>?", builtin_str_ci_gt);
4275 add_builtin("string-ci<=?", builtin_str_ci_le);
4276 add_builtin("string-ci>=?", builtin_str_ci_ge);
4277 add_builtin("substring", builtin_substr);
4278 add_builtin("string-append",builtin_str_append);
4279 add_builtin("list->string", builtin_list_to_str);
4280 add_builtin("string-copy", builtin_str_copy);
4281 add_builtin("string-fill!", builtin_str_fill);
4282 add_builtin("vector?", builtin_is_vector);
4283 add_builtin("make-vector", builtin_make_vector);
4284 add_builtin("vector-length",builtin_vec_length);
4285 add_builtin("vector-ref", builtin_vec_ref);
4286 add_builtin("vector-set!", builtin_vec_set);
4287 add_builtin("list->vector", builtin_list_to_vec);
4288 add_builtin("vector-fill!", builtin_vec_fill);
4289 add_builtin("procedure?", builtin_is_procedure);
4290 add_builtin("force", builtin_force);
4291 add_builtin("call-with-current-continuation", builtin_call_cc);
4292 add_builtin("call/cc", builtin_call_cc);
4293 add_builtin("values", builtin_values);
4294 add_builtin("call-with-values", builtin_call_with_values);
4295 add_builtin("eval", builtin_eval);
4296 add_builtin("scheme-report-environment", builtin_report_env);
4297 add_builtin("null-environment", builtin_null_env);
4298 add_builtin("interaction-environment", builtin_interaction_env);
4299 add_builtin("port?", builtin_is_port);
4300 add_builtin("input-port?", builtin_is_in_port);
4301 add_builtin("output-port?", builtin_is_out_port);
4302 add_builtin("current-input-port", builtin_current_in_port);
4303 add_builtin("current-output-port", builtin_current_out_port);
4304 add_builtin("open-input-file", builtin_open_in_file);
4305 add_builtin("open-output-file", builtin_open_out_file);
4306 add_builtin("close-input-port", builtin_close_in_port);
4307 add_builtin("close-output-port", builtin_close_out_port);
4308 add_builtin("read-char", builtin_read_char);
4309 add_builtin("peek-char", builtin_peek_char);
4310 add_builtin("eof-object?", builtin_is_eof);
4311 add_builtin("char-ready?", builtin_is_char_ready);
4312 add_builtin("write-char", builtin_write_char);
4313
4314 /* Immutable environment for extensions */
4315 R_CAR = SC_NULL; R_CDR = r5rs_env;
4316 R_ENV = gscm_env = cons();
4317 add_builtin("gales-scheme-environment", builtin_gscm_env);
4318 add_builtin("immutable?", builtin_is_immutable);
4319 add_builtin("cons/immutable", builtin_cons_immutable);
4320 add_builtin("string-copy/immutable", builtin_str_copy_immutable);
4321 add_builtin("vector-copy/immutable", builtin_vec_copy_immutable);
4322 add_builtin("flush-output-port", builtin_flush_out_port);
4323 add_builtin("error", builtin_error);
4324 add_builtin("gc", builtin_gc);
4325 add_variable("*fixnum-width*", fixnum(VAL_BITS));
4326 add_variable("*greatest-fixnum*", fixnum(FIXNUM_MAX));
4327 add_variable("*least-fixnum*", fixnum(FIXNUM_MIN));
4328 add_builtin("fixnum?", builtin_is_fixnum);
4329 add_builtin("fx=", builtin_fx_eq);
4330 add_builtin("fx<", builtin_fx_lt);
4331 add_builtin("fx<=", builtin_fx_le);
4332 add_builtin("fx</unsigned", builtin_fx_lt_unsigned);
4333 add_builtin("fx<=/unsigned", builtin_fx_le_unsigned);
4334 add_builtin("fx+/wrap", builtin_fx_add_wrap);
4335 add_builtin("fx+/carry", builtin_fx_add_carry);
4336 add_builtin("fx+/carry-unsigned", builtin_fx_add_carry_unsigned);
4337 add_builtin("fx-/wrap", builtin_fx_sub_wrap);
4338 add_builtin("fx-/borrow-unsigned", builtin_fx_sub_borrow_unsigned);
4339 add_builtin("fx*/wrap", builtin_fx_mul_wrap);
4340 add_builtin("fx*/carry", builtin_fx_mul_carry);
4341 add_builtin("fx*/carry-unsigned", builtin_fx_mul_carry_unsigned);
4342 add_builtin("fxnot", builtin_fxnot);
4343 add_builtin("fxand", builtin_fxand);
4344 add_builtin("fxior", builtin_fxior);
4345 add_builtin("fxxor", builtin_fxxor);
4346 add_builtin("fxif", builtin_fxif);
4347 add_builtin("fxmaj", builtin_fxmaj);
4348 add_builtin("fxshift", builtin_fxshift);
4349 add_builtin("fxshift/unsigned", builtin_fxshift_unsigned);
4350 add_builtin("fxlength/unsigned", builtin_fxlength_unsigned);
4351 add_builtin("open-subprocess", builtin_open_subprocess);
4352 add_builtin("wait-subprocess", builtin_wait_subprocess);
4353 add_builtin("read-token", builtin_read_token);
4354 add_builtin("write-string", builtin_write_string);
4355 add_builtin("write-string/quoted", builtin_write_string_quoted);
4356
4357 /* The interaction environment is a mutable copy of the Scheme report
4358 * environment plus extensions */
4359 R_EXPR = car(r5rs_env);
4360 R_CAR = copy_global_frame(); R_CDR = SC_NULL;
4361 interaction_env = cons();
4362 /* XXX there's probably no reason for these to be separate frames */
4363 R_EXPR = car(gscm_env);
4364 R_CAR = copy_global_frame(); R_CDR = interaction_env;
4365 interaction_env = cons();
4366
4367 /* Privileged environment for the compiler and toplevel code */
4368 R_CAR = SC_NULL; R_CDR = gscm_env;
4369 R_ENV = toplevel_env = cons();
4370 add_builtin("toplevel-environment", builtin_toplevel_env);
4371 add_variable("*max-parameters*", fixnum(EXT_LENGTH_MAX >> 1));
4372 /* ^ sign-encoded arity must fit in procedure header; frame index must fit
4373 * in variable ref header */
4374 add_builtin("define-r5rs", builtin_define_r5rs);
4375 add_builtin("define-gscm", builtin_define_gscm);
4376 add_builtin("set-input-port!", builtin_set_in_port);
4377 add_builtin("set-output-port!", builtin_set_out_port);
4378 add_builtin("push-winding!", builtin_push_winding);
4379 add_builtin("variable-ref", builtin_variable_ref);
4380 add_builtin("apply/unchecked", builtin_apply_unchecked);
4381 add_builtin("car/unchecked", builtin_car_unchecked);
4382 add_builtin("cdr/unchecked", builtin_cdr_unchecked);
4383 add_builtin("set-car/unchecked!", builtin_set_car_unchecked);
4384 add_builtin("set-cdr/unchecked!", builtin_set_cdr_unchecked);
4385 add_builtin("string-ref/unchecked", builtin_str_ref_unchecked);
4386 add_builtin("vector-ref/unchecked", builtin_vec_ref_unchecked);
4387 add_builtin("fx+/unchecked", builtin_fx_add_unchecked);
4388 add_builtin("fx-/unchecked", builtin_fx_sub_unchecked);
4389 add_builtin("fx=/unchecked", builtin_fx_eq_unchecked);
4390 add_builtin("fx</unchecked", builtin_fx_lt_unchecked);
4391 add_builtin("fx<=/unchecked", builtin_fx_le_unchecked);
4392 add_builtin("fxneg/unchecked", builtin_fx_neg_unchecked);
4393 add_builtin("fxnegative/unchecked?", builtin_is_fx_neg_unchecked);
4394 add_builtin("fxdiv/unsigned/unchecked", builtin_fx_div_unsigned_unchecked);
4395 add_builtin("fxdiv/ext/unsigned/unchecked",
4396 builtin_fx_div_ext_unsigned_unchecked);
4397 add_builtin("fixnum->dec/unchecked", builtin_fixnum_to_dec_unchecked);
4398 add_builtin("fixnum->hex/unchecked", builtin_fixnum_to_hex_unchecked);
4399 add_builtin("fixnum->oct/unchecked", builtin_fixnum_to_oct_unchecked);
4400 add_builtin("fixnum->bin/unchecked", builtin_fixnum_to_bin_unchecked);
4401 add_builtin("fixnum->bin/unsigned/unchecked",
4402 builtin_fixnum_to_bin_unsigned_unchecked);
4403 add_builtin("flonum->dec/unchecked", builtin_flonum_to_dec_unchecked);
4404 add_builtin("set-error-continuation!", builtin_set_err_cont);
4405 add_builtin("inet-stream-socket", builtin_inet_stream_sock);
4406 add_builtin("inet-dgram-socket", builtin_inet_dgram_sock);
4407 add_builtin("unix-stream-socket", builtin_unix_stream_sock);
4408 add_builtin("unix-dgram-socket", builtin_unix_dgram_sock);
4409 add_builtin("socket-ports", builtin_socket_ports);
4410 add_builtin("getsockname", builtin_getsockname);
4411 add_builtin("getpeername", builtin_getpeername);
4412 add_builtin("connect-inet", builtin_connect_inet);
4413 add_builtin("connect-unix", builtin_connect_unix);
4414 add_builtin("listen", builtin_listen);
4415 add_builtin("accept", builtin_accept);
4416 add_builtin("close", builtin_close);
4417 add_builtin("flonum?", builtin_is_flonum);
4418 add_builtin("flonum/unchecked", builtin_flonum_unchecked);
4419 add_builtin("flonum/unsigned/unchecked", builtin_flonum_unsigned_unchecked);
4420 add_builtin("flo=/unchecked", builtin_flo_eq_unchecked);
4421 add_builtin("flo</unchecked", builtin_flo_lt_unchecked);
4422 add_builtin("flo<=/unchecked", builtin_flo_le_unchecked);
4423 add_builtin("floneg/unchecked", builtin_flo_neg_unchecked);
4424 add_builtin("flonegative/unchecked?", builtin_is_flo_neg_unchecked);
4425 add_builtin("flo+/unchecked", builtin_flo_add_unchecked);
4426 add_builtin("flo-/unchecked", builtin_flo_sub_unchecked);
4427 add_builtin("flo*/unchecked", builtin_flo_mul_unchecked);
4428 add_builtin("flodiv/unchecked", builtin_flo_div_unchecked);
4429 add_builtin("floquotient/unchecked", builtin_flo_quotient_unchecked);
4430 add_builtin("floremainder/unchecked", builtin_flo_remainder_unchecked);
4431 add_builtin("fraction/exponent/unchecked", builtin_frac_exp_unchecked);
4432 add_builtin("load-exponent/unchecked", builtin_load_exp_unchecked);
4433 add_builtin("inf/unchecked?", builtin_is_inf_unchecked);
4434 add_builtin("flonum->fixnum/unchecked", builtin_flo_to_fix_unchecked);
4435 add_builtin("floor/unchecked", builtin_floor);
4436 add_builtin("ceiling/unchecked", builtin_ceiling);
4437 add_builtin("truncate/unchecked", builtin_truncate);
4438 add_builtin("round/unchecked", builtin_round);
4439 add_builtin("exp/unchecked", builtin_exp);
4440 add_builtin("log/unchecked", builtin_log);
4441 add_builtin("sin/unchecked", builtin_sin);
4442 add_builtin("cos/unchecked", builtin_cos);
4443 add_builtin("tan/unchecked", builtin_tan);
4444 add_builtin("asin/unchecked", builtin_asin);
4445 add_builtin("acos/unchecked", builtin_acos);
4446 add_builtin("atan/unchecked", builtin_atan);
4447 add_builtin("atan2/unchecked", builtin_atan2);
4448 add_builtin("sqrt/unchecked", builtin_sqrt);
4449 add_builtin("reverse-list->vector/unchecked", builtin_rev_list_to_vec_unchecked);
4450 add_builtin("builtin?", builtin_is_builtin);
4451 add_builtin("builtin-name", builtin_builtin_name);
4452 add_builtin("promise?", builtin_is_promise);
4453 add_builtin("continuation?", builtin_is_continuation);
4454 add_builtin("make-bignum", builtin_make_bignum);
4455 add_builtin("bignum?", builtin_is_bignum);
4456 add_builtin("bignum-negative?", builtin_is_bignum_negative);
4457 add_builtin("bignum-set-negative!", builtin_bignum_set_negative);
4458 add_builtin("bignum-ref", builtin_bignum_ref);
4459 add_builtin("bignum-set!", builtin_bignum_set);
4460 add_builtin("bignum-length", builtin_bignum_length);
4461 add_builtin("bignum", builtin_bignum);
4462 add_builtin("bignum/unsigned", builtin_bignum_unsigned);
4463 add_builtin("bignum2", builtin_bignum2);
4464 add_builtin("bignum-truncate!", builtin_bignum_truncate);
4465
4466 R_PORT = open_lib_file(GSCMLIB "/compiler.scm");
4467 err_context = "compiler";
4468 r_compiler_expr = sc_read();
4469 if (r_compiler_expr == SC_EOF) fatal("EOF reading compiler code");
4470 close_port(R_PORT);
4471 R_EXPR = r_compiler_expr;
4472 R_ENV = toplevel_env;
4473 evaluator();
4474 r_compiler = R_RESULT;
4475 /* Self-compile, for the speed benefit of variable refs */
4476 R_EXPR = r_compiler_expr;
4477 R_ENV = toplevel_env;
4478 r_compiler_expr = SC_NULL;
4479 evaluator();
4480 r_compiler = R_RESULT;
4481}
4482
4483int sc_toplevel(int argc, char **argv) {
4484 int i;
4485 R_CDR = SC_NULL;
4486 for (i=argc-1; i>=0; --i) {
4487 R_CAR = string(argv[i]);
4488 R_CDR = cons();
4489 }
4490 R_ENV = interaction_env;
4491 add_variable("*args*", R_CDR);
4492
4493 R_PORT = open_lib_file(GSCMLIB "/toplevel.scm");
4494 err_context = "toplevel";
4495 R_EXPR = sc_read();
4496 if (R_EXPR == SC_EOF) fatal("EOF reading toplevel code");
4497 close_port(R_PORT);
4498 R_ENV = toplevel_env;
4499 evaluator();
4500 flush_all();
4501 return fixnum_val(R_RESULT);
4502}