📄 cordbscs.c
字号:
/* * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved. * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * * Author: Hans-J. Boehm (boehm@parc.xerox.com) *//* Boehm, October 3, 1994 5:19 pm PDT */# include "gc.h"# include "cord.h"# include <stdlib.h># include <stdio.h># include <string.h>/* An implementation of the cord primitives. These are the only *//* Functions that understand the representation. We perform only *//* minimal checks on arguments to these functions. Out of bounds *//* arguments to the iteration functions may result in client functions *//* invoked on garbage data. In most cases, client functions should be *//* programmed defensively enough that this does not result in memory *//* smashes. */ typedef void (* oom_fn)(void);oom_fn CORD_oom_fn = (oom_fn) 0;# define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \ ABORT("Out of memory\n"); }# define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }typedef unsigned long word;typedef union { struct Concatenation { char null; char header; char depth; /* concatenation nesting depth. */ unsigned char left_len; /* Length of left child if it is sufficiently */ /* short; 0 otherwise. */# define MAX_LEFT_LEN 255 word len; CORD left; /* length(left) > 0 */ CORD right; /* length(right) > 0 */ } concatenation; struct Function { char null; char header; char depth; /* always 0 */ char left_len; /* always 0 */ word len; CORD_fn fn; void * client_data; } function; struct Generic { char null; char header; char depth; char left_len; word len; } generic; char string[1];} CordRep;# define CONCAT_HDR 1 # define FN_HDR 4# define SUBSTR_HDR 6 /* Substring nodes are a special case of function nodes. */ /* The client_data field is known to point to a substr_args */ /* structure, and the function is either CORD_apply_access_fn */ /* or CORD_index_access_fn. *//* The following may be applied only to function and concatenation nodes: */#define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)#define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)#define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)#define LEN(s) (((CordRep *)s) -> generic.len)#define DEPTH(s) (((CordRep *)s) -> generic.depth)#define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))#define LEFT_LEN(c) ((c) -> left_len != 0? \ (c) -> left_len \ : (CORD_IS_STRING((c) -> left) ? \ (c) -> len - GEN_LEN((c) -> right) \ : LEN((c) -> left)))#define SHORT_LIMIT (sizeof(CordRep) - 1) /* Cords shorter than this are C strings *//* Dump the internal representation of x to stdout, with initial *//* indentation level n. */void CORD_dump_inner(CORD x, unsigned n){ register size_t i; for (i = 0; i < (size_t)n; i++) { fputs(" ", stdout); } if (x == 0) { fputs("NIL\n", stdout); } else if (CORD_IS_STRING(x)) { for (i = 0; i <= SHORT_LIMIT; i++) { if (x[i] == '\0') break; putchar(x[i]); } if (x[i] != '\0') fputs("...", stdout); putchar('\n'); } else if (IS_CONCATENATION(x)) { register struct Concatenation * conc = &(((CordRep *)x) -> concatenation); printf("Concatenation: %p (len: %d, depth: %d)\n", x, (int)(conc -> len), (int)(conc -> depth)); CORD_dump_inner(conc -> left, n+1); CORD_dump_inner(conc -> right, n+1); } else /* function */{ register struct Function * func = &(((CordRep *)x) -> function); if (IS_SUBSTR(x)) printf("(Substring) "); printf("Function: %p (len: %d): ", x, (int)(func -> len)); for (i = 0; i < 20 && i < func -> len; i++) { putchar((*(func -> fn))(i, func -> client_data)); } if (i < func -> len) fputs("...", stdout); putchar('\n'); }}/* Dump the internal representation of x to stdout */void CORD_dump(CORD x){ CORD_dump_inner(x, 0); fflush(stdout);}CORD CORD_cat_char_star(CORD x, const char * y, size_t leny){ register size_t result_len; register size_t lenx; register int depth; if (x == CORD_EMPTY) return(y); if (leny == 0) return(x); if (CORD_IS_STRING(x)) { lenx = strlen(x); result_len = lenx + leny; if (result_len <= SHORT_LIMIT) { register char * result = GC_MALLOC_ATOMIC(result_len+1); if (result == 0) OUT_OF_MEMORY; memcpy(result, x, lenx); memcpy(result + lenx, y, leny); result[result_len] = '\0'; return((CORD) result); } else { depth = 1; } } else { register CORD right; register CORD left; register char * new_right; register size_t right_len; lenx = LEN(x); if (leny <= SHORT_LIMIT/2 && IS_CONCATENATION(x) && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) { /* Merge y into right part of x. */ if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) { right_len = lenx - LEN(left); } else if (((CordRep *)x) -> concatenation.left_len != 0) { right_len = lenx - ((CordRep *)x) -> concatenation.left_len; } else { right_len = strlen(right); } result_len = right_len + leny; /* length of new_right */ if (result_len <= SHORT_LIMIT) { new_right = GC_MALLOC_ATOMIC(result_len + 1); memcpy(new_right, right, right_len); memcpy(new_right + right_len, y, leny); new_right[result_len] = '\0'; y = new_right; leny = result_len; x = left; lenx -= right_len; /* Now fall through to concatenate the two pieces: */ } if (CORD_IS_STRING(x)) { depth = 1; } else { depth = DEPTH(x) + 1; } } else { depth = DEPTH(x) + 1; } result_len = lenx + leny; } { /* The general case; lenx, result_len is known: */ register struct Concatenation * result; result = GC_NEW(struct Concatenation); if (result == 0) OUT_OF_MEMORY; result->header = CONCAT_HDR; result->depth = depth; if (lenx <= MAX_LEFT_LEN) result->left_len = lenx; result->len = result_len; result->left = x; result->right = y; if (depth >= MAX_DEPTH) { return(CORD_balance((CORD)result)); } else { return((CORD) result); } }}CORD CORD_cat(CORD x, CORD y){ register size_t result_len; register int depth; register size_t lenx; if (x == CORD_EMPTY) return(y); if (y == CORD_EMPTY) return(x); if (CORD_IS_STRING(y)) { return(CORD_cat_char_star(x, y, strlen(y))); } else if (CORD_IS_STRING(x)) { lenx = strlen(x); depth = DEPTH(y) + 1; } else { register int depthy = DEPTH(y); lenx = LEN(x); depth = DEPTH(x) + 1; if (depthy >= depth) depth = depthy + 1; } result_len = lenx + LEN(y); { register struct Concatenation * result; result = GC_NEW(struct Concatenation); if (result == 0) OUT_OF_MEMORY; result->header = CONCAT_HDR; result->depth = depth; if (lenx <= MAX_LEFT_LEN) result->left_len = lenx; result->len = result_len; result->left = x; result->right = y; if (depth >= MAX_DEPTH) { return(CORD_balance((CORD)result)); } else { return((CORD) result); } }}CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len){ if (len <= 0) return(0); if (len <= SHORT_LIMIT) { register char * result; register size_t i; char buf[SHORT_LIMIT+1]; register char c; for (i = 0; i < len; i++) { c = (*fn)(i, client_data); if (c == '\0') goto gen_case; buf[i] = c; } buf[i] = '\0'; result = GC_MALLOC_ATOMIC(len+1); if (result == 0) OUT_OF_MEMORY; strcpy(result, buf); result[len] = '\0'; return((CORD) result); } gen_case: { register struct Function * result; result = GC_NEW(struct Function); if (result == 0) OUT_OF_MEMORY; result->header = FN_HDR; /* depth is already 0 */ result->len = len; result->fn = fn; result->client_data = client_data; return((CORD) result); }}size_t CORD_len(CORD x){ if (x == 0) { return(0); } else { return(GEN_LEN(x)); }}struct substr_args { CordRep * sa_cord; size_t sa_index;};char CORD_index_access_fn(size_t i, void * client_data){ register struct substr_args *descr = (struct substr_args *)client_data; return(((char *)(descr->sa_cord))[i + descr->sa_index]);}char CORD_apply_access_fn(size_t i, void * client_data){ register struct substr_args *descr = (struct substr_args *)client_data; register struct Function * fn_cord = &(descr->sa_cord->function); return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));}/* A version of CORD_substr that simply returns a function node, thus *//* postponing its work. The fourth argument is a function that may *//* be used for efficient access to the ith character. *//* Assumes i >= 0 and i + n < length(x). */CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f){ register struct substr_args * sa = GC_NEW(struct substr_args); CORD result; if (sa == 0) OUT_OF_MEMORY; sa->sa_cord = (CordRep *)x; sa->sa_index = i; result = CORD_from_fn(f, (void *)sa, n); ((CordRep *)result) -> function.header = SUBSTR_HDR; return (result);}# define SUBSTR_LIMIT (10 * SHORT_LIMIT) /* Substrings of function nodes and flat strings shorter than */ /* this are flat strings. Othewise we use a functional */ /* representation, which is significantly slower to access. *//* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/CORD CORD_substr_checked(CORD x, size_t i, size_t n){ if (CORD_IS_STRING(x)) { if (n > SUBSTR_LIMIT) { return(CORD_substr_closure(x, i, n, CORD_index_access_fn)); } else { register char * result = GC_MALLOC_ATOMIC(n+1); if (result == 0) OUT_OF_MEMORY; strncpy(result, x+i, n); result[n] = '\0'; return(result); } } else if (IS_CONCATENATION(x)) { register struct Concatenation * conc = &(((CordRep *)x) -> concatenation); register size_t left_len; register size_t right_len; left_len = LEFT_LEN(conc); right_len = conc -> len - left_len; if (i >= left_len) { if (n == right_len) return(conc -> right); return(CORD_substr_checked(conc -> right, i - left_len, n)); } else if (i+n <= left_len) { if (n == left_len) return(conc -> left); return(CORD_substr_checked(conc -> left, i, n)); } else { /* Need at least one character from each side. */ register CORD left_part; register CORD right_part; register size_t left_part_len = left_len - i; if (i == 0) { left_part = conc -> left; } else { left_part = CORD_substr_checked(conc -> left, i, left_part_len); } if (i + n == right_len + left_len) { right_part = conc -> right; } else { right_part = CORD_substr_checked(conc -> right, 0, n - left_part_len); } return(CORD_cat(left_part, right_part)); } } else /* function */ { if (n > SUBSTR_LIMIT) { if (IS_SUBSTR(x)) { /* Avoid nesting substring nodes. */ register struct Function * f = &(((CordRep *)x) -> function); register struct substr_args *descr = (struct substr_args *)(f -> client_data); return(CORD_substr_closure((CORD)descr->sa_cord, i + descr->sa_index, n, f -> fn)); } else { return(CORD_substr_closure(x, i, n, CORD_apply_access_fn)); } } else { char * result; register struct Function * f = &(((CordRep *)x) -> function); char buf[SUBSTR_LIMIT+1]; register char * p = buf; register char c; register int j; register int lim = i + n; for (j = i; j < lim; j++) { c = (*(f -> fn))(j, f -> client_data); if (c == '\0') { return(CORD_substr_closure(x, i, n, CORD_apply_access_fn)); } *p++ = c; } *p = '\0'; result = GC_MALLOC_ATOMIC(n+1); if (result == 0) OUT_OF_MEMORY; strcpy(result, buf); return(result); } }}CORD CORD_substr(CORD x, size_t i, size_t n){ register size_t len = CORD_len(x); if (i >= len || n <= 0) return(0); /* n < 0 is impossible in a correct C implementation, but */ /* quite possible under SunOS 4.X. */ if (i + n > len) n = len - i;# ifndef __STDC__ if (i < 0) ABORT("CORD_substr: second arg. negative"); /* Possible only if both client and C implementation are buggy. */ /* But empirically this happens frequently. */# endif return(CORD_substr_checked(x, i, n));}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -