📄 gdk_storage.mx
字号:
if (b->hheap && (b->batCopiedtodisk == 0 || b->batDirty || b->hheapdirty)) if (b->htype && b->hvarsized) { if (err == 0) err = HEAPsave(b->hheap, nme, "hheap"); } if (b->theap && (b->batCopiedtodisk == 0 || b->batDirty || b->theapdirty)) if (b->ttype && b->tvarsized) { if (err == 0) err = HEAPsave(b->theap, nme, "theap"); } /* correct heap modes such that they are ACID respecting */ DESCsetmodes(bd, b); if (err == 0) if (b->batCopiedtodisk == 0 || b->batDirty || b->batDirtydesc) { DESCclean(b); b->batCopiedtodisk = 1; err = BATsavedesc(b, nme); IODEBUG THRprintf(GDKout, "#BATsavedesc(%s,free=" SZFMT ") = %d\n", nme, b->batBuns->free, err); } if (b->hheap) GDKfree(b->hheap); if (b->theap) GDKfree(b->theap); if (err == 0) { bd->batCopiedtodisk = 1; DESCclean(bd); return bd; } return NULL;}@-TODO: move to gdk_bbp.mx@cBAT *BATload_intern(bat i){ bat bid = ABS(i); str nme = BBP_physical(bid); BAT *b = DESCload(bid); int ret = 0; int vv = 1; if (b == NULL) { return NULL; } /* LOAD bun heap */ if (b->htype != TYPE_void || b->ttype != TYPE_void) { vv = 0; ret = HEAPload(b->batBuns, nme, "buns", b->batRestricted == BAT_READ); if (ret < 0) { return NULL; } } else { b->batBuns->base = (char *) 1; } /* LOAD head heap */ if (ATOMvarsized(b->htype) && ret >= 0) { ret |= HEAPload(b->hheap, nme, "hheap", b->batRestricted == BAT_READ); if (ret < 0) { HEAPfree(b->batBuns); return NULL; } if (BATatoms[b->htype].atomHeapCheck == HEAP_check) { HEAP_init(b->hheap, b->htype); } else if (ATOMstorage(b->htype) == TYPE_str) { strCleanHash(b->hheap, (b->GDKversion != GDKLIBRARY)); /* ensure consistency */ } } /* LOAD tail heap */ if (ATOMvarsized(b->ttype) && ret >= 0) { ret |= HEAPload(b->theap, nme, "theap", b->batRestricted == BAT_READ); if (ret < 0) { if (b->hheap) HEAPfree(b->hheap); HEAPfree(b->batBuns); return NULL; } if (BATatoms[b->ttype].atomHeapCheck == HEAP_check) { HEAP_init(b->theap, b->ttype); } else if (ATOMstorage(b->ttype) == TYPE_str) { strCleanHash(b->theap, (b->GDKversion != GDKLIBRARY)); /* ensure consistency */ } } /* handle fatal errors */ if (ret < 0) { if (!vv) HEAPfree(b->batBuns); if (b->hheap) HEAPfree(b->hheap); if (b->theap) HEAPfree(b->theap); return NULL; } if (b->GDKversion != GDKLIBRARY) { b->GDKversion = GDKLIBRARY; b->batDirtydesc = 1; } /* initialize descriptor */ b->batMapdirty = ret; /* heap mode changed during load? */ b->batDirtydesc = FALSE; b->batParentid = 0; b->batSharecnt = 0; DELTAload(b); /* load succeeded; register it in BBP */ BBPcacheit(b); if (!DELTAdirty(b)) { ALIGNcommit(b); } b->batDirtydesc |= b->batMapdirty; /* if some heap mode changed, make desc dirty */ b->batMapbuns = b->batBuns->storage; if (b->hheap) b->batMaphheap = b->hheap->storage; if (b->theap) b->batMaptheap = b->theap->storage; if ((b->batRestricted == BAT_WRITE && (GDKdebug & 2)) || (GDKdebug & 8)) { ++b->batSharecnt; BATpropcheck(b, BATPROPS_CHECK); --b->batSharecnt; } return (i < 0) ? BATmirror(b) : b;}BAT *BATload(str nme){ str s = strrchr(nme, DIR_SEP); bat i = BBPindex(s ? s + 1 : nme); BAT *b = BBP_cache(i); if (i == 0 || b != NULL) { return b; /* inexistent bat or already loaded */ } return BATload_intern(i);}@}@- BATdeleteThe new behavior is to let the routine produce warnings but always succeed.rationale: on a delete, we must get rid of *all* the files. We do not have to careabout preserving them or be too much concerned if a file that had to be deleted wasnot found (end result is still that it does not exist). The past behavior to deletesome files and then fail was erroneous. The BAT would continue to exist with anincorrect disk status, causing havoc later on.NT forces us to close all files before deleting them; in case of memory mappedfiles this means that we have to unload the BATs before deleting. This isenforced now.@{@cintBATdelete(BAT *b){ bat bid = ABS(b->batCacheid); str o = BBP_physical(bid); BAT *loaded = BBP_cache(bid); int vv = (!b->htype && !b->ttype); if (loaded) { b = loaded; HASHdestroy(b); GDKfree(BATmirror(b)); BBP_cache(-b->batCacheid) = NULL; } if (b->batCopiedtodisk) { IODEBUG THRprintf(GDKout, "#BATdelete %s\n", o); if (GDKunlink(BATDIR, o, "desc")) { GDKwarning("BATdelete(%s): descriptor\n", BATgetId(b)); } } if (b->batCopiedtodisk || (b->batBuns->storage & STORE_MMAP)) { if ((b->htype != TYPE_void || b->ttype != TYPE_void) && HEAPdelete(b->batBuns, o, "buns") && b->batCopiedtodisk) GDKwarning("BATdelete(%s): bun heap\n", BATgetId(b)); } else if (b->batBuns->base && !vv) { HEAPfree(b->batBuns); } if (b->hheap) { if (b->batCopiedtodisk || (b->hheap->storage & STORE_MMAP)) { if (HEAPdelete(b->hheap, o, "hheap") && b->batCopiedtodisk) GDKwarning("BATdelete(%s): head heap\n", BATgetId(b)); } else { HEAPfree(b->hheap); } } if (b->theap) { if (b->batCopiedtodisk || (b->theap->storage & STORE_MMAP)) { if (HEAPdelete(b->theap, o, "theap") && b->batCopiedtodisk) GDKwarning("BATdelete(%s): tail heap\n", BATgetId(b)); } else { HEAPfree(b->theap); } } b->batCopiedtodisk = FALSE; return 0;}@@}@+ Printing and debuggingPrinting BATs is based on the multi-join on heads. The multijoinexploits all possible Monet properties and accelerators. Dueto this property, the n-ary table printing is quite fast andcan be used for producing ASCII dumps of large tables.It all works with hooks. The multijoin routine finds matching rangesof rows. For each found match in a column it first calls a value-routinehook. This routine we use to format a substring.For each found match-tuple (the Cartesian product of all matchesacross columns) a match routine hook is called. We use this routineto print a line.Due to this setup, we only format each value once, though itmight participate in many lines (due to the Cartesian product).The multijoin is quite complex, and we use a @%col_format_t@struct to keep track of column specific data.The multiprint can indicate arbitrary orderings. This is doneby passing a pattern-string that matches the following regexp:@verbatim "[X:] Y0 {,Yi}"@end verbatimwhere X and Yi are column numbers, @strong{starting at 1} for the firstBAT parameter.The table ordering has two aspects:@enumerate@item (1) the order in which the matches appear (a.k.a. the major ordering). This is equivalent to the order of the head values of the BATs (as we match=multijoin on head value).@item (2) within each match, the order in which the Cartesian product is produced. This is used to sub-order on the tail values of the BATs = the columns in the table.@end enumerateConcerning (1), the multijoin limits itself to *respecting*the order one one elected BAT, that can be identified with X.Using this, a major ordering on tail value can be enforced,by first passing "Bx.reverse.sort.reverse" (BAT ordered on tail).As the multijoin will respect the order of X, its tail valueswill be printed in sorted order.Concerning sub-ordering on other columns (2), the multijoinitself employs qsort() to order the Cartesian product onthe matched tail values.@{@c#define LINE(s, X) do { \ int n=X-1; \ if (stream_write(s, "#", 1, 1) != 1) \ break; \ while(n-->0) \ if (stream_write(s, "-", 1, 1) != 1) \ break; \ if (!stream_errnr(s)) \ stream_write(s, "#\n", 2, 1); \ } while (0)#define TABS(s, X) do { \ int n=X; \ while (n-->0) \ if (stream_write(s, "\t", 1, 1) != 1) \ break; \ } while (0)typedef int (*strFcn) (str *s, int *len, ptr val);typedef struct { int tabs; /* tab width of output */ strFcn format; /* tostr function */ /* dynamic fields, set by print_format */ str buf; /* tail value as string */ str tpe; /* type of this column as string */ int size; /* size of buf */ int len; /* strlen(buf) */} col_format_t;static intprint_nil(char **dst, int *len, ptr dummy){ (void) dummy; if (*len < 3) { if (*dst) GDKfree(*dst); *dst = (char *) GDKmalloc(*len = 40); } strcpy(*dst, "nil"); return 3;}#define printfcn(b) ((b->ttype==TYPE_void && b->tseqbase==oid_nil)?\ print_nil:BATatoms[b->ttype].atomToStr)static intprint_tabwidth(BAT *b, str title, col_format_t *c){ strFcn tostr = printfcn(b); size_t cnt = BATcount(b); int max, t = BATttype(b); c->tpe = ATOMname(b->ttype); c->buf = (char *) GDKmalloc(c->size = strLen(title)); max = (int) MAX((2 + strlen(c->tpe)), strlen(title)); if (t >= 0 && t < GDKatomcnt && tostr) { size_t off = BUNindex(b, BUNfirst(b)); int k; size_t j, i, probe = MIN(cnt, MAX(200, MIN(1024, cnt / 100))); for (i = 0; i < probe; i++) { j = off + ((probe == cnt) ? i : (rand() % MIN(16384, cnt))); k = (*tostr) (&c->buf, &c->size, BUNtail(b, BUNptr(b, j))); if (k > max) max = k; } } strcpy(c->buf, title); max += 2; /* account for ", " separator */ /* if (max > 60) max = 60; */ return 1 + (max - 1) / 8;}static voidprint_line(stream *s, col_format_t **l){ col_format_t *c = *(l++); if (stream_write(s, "[ ", 2, 1) != 1) return; if (c->format) { if (stream_write(s, c->buf, c->len, 1) != 1) return; if (stream_write(s, ",", 1, 1) != 1) return; TABS(s, c->tabs - ((c->len + 3) / 8)); if (stream_errnr(s)) return; if (c->tabs * 8 >= c->len + 3 && stream_write(s, " ", 1, 1) != 1) return; if (stream_write(s, " ", 1, 1) != 1) return; } for (c = *l; *(++l); c = *l) { if (!c->format) continue; if (stream_write(s, c->buf, c->len, 1) != 1) return; if (stream_write(s, ",", 1, 1) != 1) return; TABS(s, c->tabs - ((c->len + 3) / 8)); if (stream_errnr(s)) return; if (c->tabs * 8 >= c->len + 3 && stream_write(s, " ", 1, 1) != 1) return; if (stream_write(s, " ", 1, 1) != 1) return; } if (stream_write(s, c->buf, c->len, 1) != 1) return; TABS(s, c->tabs - ((c->len + 2) / 8)); if (stream_errnr(s)) return; stream_printf(s, " ]\n");}static voidprint_format(col_format_t *c, ptr v){ if (c->format) c->len = (*c->format) (&c->buf, &c->size, v);}@= print_head str buf = @1?argv[@2].tpe:argv[@2].buf; /* contains column title */ int len = (int) strlen(buf); if (stream_write(s, buf, len, 1) != 1) return -1; TABS(s, argv[@2].tabs-((@1+len-1)/8)); if (stream_errnr(s)) return -1;@= print_header k = 1; if (stream_write(s, "# ", 2, 1) != 1) return -1; if (argv[0].format) { @:print_head(@1,0)@ } for (;;) { @:print_head(@1,k)@ if (k++ >= argc) break; } if(@1) { if (stream_printf(s, " # type\n") < 0) return -1; } else { if (stream_printf(s, " # name\n") < 0) return -1; }@cstatic intprint_header(int argc, col_format_t *argv, stream *s){ int k; @:print_header(0)@ @:print_header(2)@ return 0;}@-The simple BAT printing routines make use of the complex case.@cintBATprint(BAT *b){ ERRORcheck(b == NULL, "BATprint: BAT expected"); return BATmultiprintf(GDKstdout, 2, &b, TRUE, 0, 1);}intBATprintf(stream *s, BAT *b){ ERRORcheck(b == NULL, "BATprintf: BAT expected"); return BATmultiprintf(s, 2, &b, TRUE, 0, 1);}@+ Multi-Bat PrintingThis routines uses the multi-join operation to printan n-ary table. Such a table is the reconstruction ofthe relational model from Monet's BATs, and consists ofall tail values of matching head-values in n-ary equijoin.@cintBATmultiprintf(stream *s, /* output stream */ int argc, /* #ncolumns = #nbats + */ BAT *argv[], /* the bats 2b printed */ int printhead, /* boolean: print the head column? */ int order, /* respect order of bat X (X=0 is none) */ int printorder /* boolean: print the orderby column? */ ){ col_format_t *c = (col_format_t *) alloca((unsigned) (argc * sizeof(col_format_t))); col_format_t **cp = (col_format_t **) alloca((unsigned) ((argc + 1) * sizeof(void *))); ColFcn *value_fcn = (ColFcn *) alloca((unsigned) (argc * sizeof(ColFcn))); int ret = 0, j, total = 0;@-Init the column descriptor of the head column.@c cp[argc] = NULL; /* terminator */ cp[0] = c; memset(c, 0, (argc--) * sizeof(col_format_t));@-Init the column descriptors of the tail columns.@c value_fcn[0] = (ColFcn) print_format; if (printhead) { BAT *b = BATmirror(argv[0]); total = c[0].tabs = print_tabwidth(b, b->tident, c + 0); c[0].format = printfcn(b); } for (j = 0; j < argc; j++, total += c[j].tabs) { cp[j + 1] = c + (j + 1); if (!printorder && order==j+1) c[j + 1].format = NULL; else c[j + 1].format = printfcn(argv[j]); c[j + 1].tabs = print_tabwidth(argv[j], argv[j]->tident, c + (j + 1)); value_fcn[j + 1] = (ColFcn) print_format; } total = 2 + (total * 8);@-Print the table header and then the multijoin.@c ret = -1; LINE(s, total); if (stream_errnr(s)) goto cleanup; if (print_header(argc, c, s) < 0) goto cleanup; LINE(s, total); if (stream_errnr(s)) goto cleanup; else if (argc == 1) { BAT *b = argv[0]; BUN p, q; BATloop(b, p, q) { print_format(cp[0], BUNhead(b, p)); print_format(cp[1], BUNtail(b, p)); print_line(s, cp); if (stream_errnr(s)) goto cleanup; } MULTIJOIN_LEAD(ret) = 1; MULTIJOIN_SORTED(ret) = (BAThordered(b) & 1); MULTIJOIN_KEY(ret) = b->hkey; MULTIJOIN_SYNCED(ret) = 1; } else { ret = BATmultijoin(argc, argv, (RowFcn) print_line, (void *) s, value_fcn, (void **) cp, order); }@-Cleanup.@c cleanup: for (j = 0; j <= argc; j++) { if (c[j].buf) GDKfree(c[j].buf); } return ret;}@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -