📄 tablet.c
字号:
continue; offset = BUNindex(b, BUNfirst(b)) + as->offset; if (BATcount(b) != cnt || !BAThdense(b) || b->hseqbase != base) return oid_nil; fmt[i].p = BUNptr(b, offset); } return base;}intTABLETcreate_bats(Tablet * as){ Column *fmt = as->format; size_t i; size_t nr = 0; if (as->nr != ~(size_t)0) nr = as->nr; for (i = 0; i < as->nr_attrs; i++) { fmt[i].c = void_bat_create(fmt[i].adt, nr); if (!fmt[i].c) { GDKerror("ASCIIcreate_bats: Failed to create bat of size %d\n", as->nr); return -1; } } return 0;}BAT *TABLETcollect_bats(Tablet * as){ BAT *bats = BATnew(TYPE_str, TYPE_bat, as->nr_attrs); Column *fmt = as->format; size_t i; size_t cnt = BATcount(fmt[0].c); if (bats == NULL) return NULL; for (i = 0; i < as->nr_attrs; i++) { BUNins(bats, (ptr) fmt[i].name, (ptr) &fmt[i].c->batCacheid, FALSE); BATsetaccess(fmt[i].c, BAT_READ); if (cnt != BATcount(fmt[i].c)) { GDKerror("Error: counts are not equal\n"); return NULL; } } return bats;}static INLINE intmyisspace(int s){ return s == ' ' || s == '\t' || s == '\n';}static INLINE char *lstrip(char *s){ while (myisspace((int) *s)) { s++; } return s;}static INLINE char *rstrip(char *e){ e--; while (myisspace((int) *e)) { e--; } e++; if (e && *e) { *e = 0; } return e;}static INLINE char *find_quote(char *s, char quote){ while (*s != quote) s++; return s;}static INLINE char *rfind_quote(char *s, char quote){ while (*s != quote) s--; return s;}static INLINE intinsert_val(Column * fmt, char *s, char *e, char quote){ int res = 0; char *end = e; char bak = *e; ptr *adt; if (quote) { /* string needs the quotes included */ s = find_quote(s, quote); if (!s) { GDKerror("quote '%c' expected but not found in \"%s\"\n", quote, s); return -1; } s++; e = rfind_quote(e, quote); *e = 0; } else if (e > s) { s = lstrip(s); e = rstrip(e); } if (e < s) { e = s; } adt = fmt->frstr(fmt->extra, fmt->adt, s, e, quote); *end = bak; if (!adt) { GDKerror("value %s not inserted\n", s); return -1; } res = (BUNappend(fmt->c, adt, FALSE) == NULL); if (fmt->data != adt) GDKfree(adt); return res;}static INLINE char *skip_string(char *s, char quote){ int esc = 0; while (*s) { if (*s == '\\' && !esc) esc = 1; else if (*s == quote && !esc) break; else esc = 0; s++; } if (*s) s++; else return NULL; return s;}static INLINE intinsert_line(Tablet * as, char *line){ int res = 0; Column *fmt = as->format; char *s, *e = 0, quote = 0; size_t i; int first = 0; for (i = 0; i < as->nr_attrs && res == 0; i++) { first = 1; s = line; /* skip until separator */ while (*line) { if (*line == *(fmt[i].sep)){ if( fmt[i].sep[1] == 0 || strncmp(fmt[i].sep, line, fmt[i].seplen) == 0) { e = line; break; } } /* recognize fields starting with a quote */ if (first && (*line == '\"' || *line == '\'') && (line == s || *(line - 1) != '\\')) { quote = *line; line++; line = skip_string(line, quote); if (!line) { GDKerror("End of string (%c) missing in %s\n", quote, s); return -1; } first = 0; } else { if (first && !isspace((int) (*line))) first = 0; line++; } } if (!e && i == (as->nr_attrs-1)) e = line; if (e) { char *end = e; res |= insert_val(&fmt[i], s, e, quote); quote = 0; line = end + fmt[i].seplen; } else { GDKerror("missing sep %s line %d field %d\n", fmt->sep, BATcount(fmt->c), i); return -1; } e = 0; } return res;}static intTABLET_error(stream *s){ if (!stream_errnr(GDKerr)) { char *err = stream_error(s); stream_printf(GDKerr, "Stream error %s\n", err); /* use free as stream allocates out side GDK */ if (err) free(err); } return -1;}static INLINE intdump_line(char **buf, int *len, Column * fmt, stream *fd, size_t nr_attrs, size_t id){ size_t i; for (i = 0; i < nr_attrs; i++) { Column *f; char *p; int l; f = fmt + i; if (f->c) { p = (char *) bun_tail(f->c, id); l = f->tostr(f->extra, buf, len, f->adt, p); if (stream_write(fd, *buf, 1, l) != l) return TABLET_error(fd); } if (stream_write(fd, f->sep, 1, f->seplen) != f->seplen) return TABLET_error(fd); } return 0;}static INLINE intoutput_line(char **buf, int *len, Column * fmt, stream *fd, size_t nr_attrs, ptr id){ size_t i; for (i = 0; i < nr_attrs; i++) { if (fmt[i].c == NULL) continue; fmt[i].p = BUNfnd(fmt[i].c, id); if (fmt[i].p == 0) break; } if (i == nr_attrs) { for (i = 0; i < nr_attrs; i++) { Column *f; char *p; int l; f = fmt + i; if (f->c) { p = BUNtail(f->c, f->p); l = f->tostr(f->extra, buf, len, f->adt, p); if (stream_write(fd, *buf, 1, l) != l) return TABLET_error(fd); } if (stream_write(fd, f->sep, 1, f->seplen) != f->seplen) return TABLET_error(fd); } } return 0;}static INLINE intoutput_line_dense(char **buf, int *len, Column * fmt, stream *fd, size_t nr_attrs){ size_t i; for (i = 0; i < nr_attrs; i++) { Column *f = fmt + i; if (f->c) { char *p = BUNtail(f->c, f->p); int l = f->tostr(f->extra, buf, len, f->adt, p); if (stream_write(fd, *buf, 1, l) != l) return TABLET_error(fd); f->p = BUNnext(f->c, f->p); } if (stream_write(fd, f->sep, 1, f->seplen) != f->seplen) return TABLET_error(fd); } return 0;}static INLINE intoutput_line_lookup(char **buf, int *len, Column * fmt, stream *fd, size_t nr_attrs, size_t id){ size_t i; for (i = 0; i < nr_attrs; i++) { Column *f = fmt + i; if (f->c) { char *p = BUNtail(f->c, BUNptr(f->c, id +BUNindex(f->c, BUNfirst(f->c)))); int l = f->tostr(f->extra, buf, len, f->adt, p); if (stream_write(fd, *buf, 1, l) != l) return TABLET_error(fd); } if (stream_write(fd, f->sep, 1, f->seplen) != f->seplen) return TABLET_error(fd); } return 0;}static INLINE intread_more(bstream *in, stream *out, size_t n){ if (out) { do { /* query is not finished ask for more */ /* we need more query text */ if (bstream_next(in) < 0) return EOF; if (in->eof) { if (out && stream_write(out, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1) stream_flush(out); in->eof = 0; /* we need more query text */ if (bstream_next(in) < 0) return EOF; } } while (in->len <= in->pos); } else { bstream_read(in, n); } return 1;}ssize_tTABLETload_file(Tablet * as, bstream *b, stream *out){ int res = 0, done = 0; size_t i = 0; char *sep = as->format[as->nr_attrs - 1].sep; int seplen = as->format[as->nr_attrs - 1].seplen; if (tablet_debug) stream_printf(GDKerr, "TABLETload_file\n"); while ((b->pos < b->len || !b->eof) && res == 0 && (as->nr == ~(size_t) 0 || i < as->nr)) { char *s, *end; if (b->pos >= b->len && read_more(b, out, b->size - (b->len - b->pos)) == EOF) { if (as->nr != ~(size_t)0 && i < as->nr) { GDKerror("ASCIIload_file: read error\n"); res = -1; } break; } end = b->buf + b->len; s = b->buf + b->pos; *end = '\0'; done = 0; while (s < end) { char *e = strstr(s, sep); if (e) { *e = '\0'; if (insert_line(as, s) < 0) { s = e + seplen; b->pos = (s - b->buf); res = -1; break; } s = e + seplen; done = 1; } else if (b->eof && s + as->nr_attrs < end) { if (insert_line(as, s) < 0) { s = end; b->pos = (s - b->buf); res = -1; break; } s = end; } else { if (!done) { /* nothing found in current buf * ie. need to enlarge */ size_t size = b->size; if (b->pos == 0 || (b->len - b->pos > b->size >> 1)) size <<= 4; if (read_more(b, out, size) == EOF) { GDKerror("ASCIIload_file: read error\n"); res = -1; break; } end = b->buf + b->len; s = b->buf + b->pos; *end = '\0'; continue; } break; } b->pos = (s - b->buf); i++; if (tablet_debug && (i % 100000) == 0) stream_printf(GDKerr, "inserted " SZFMT "\n", i); if (as->nr != ~(size_t) 0 && i >= as->nr) break; } } as->nr = i; if (res < 0) return res; return (ssize_t) as->nr;}static intdump_file(Tablet * as, stream *fd){ size_t i = 0; int len = BUFSIZ; char *buf = GDKmalloc(len); for (i = 0; i < as->nr; i++) { if (dump_line(&buf, &len, as->format, fd, as->nr_attrs, i) < 0) { GDKfree(buf); return -1; } if (tablet_debug && (i % 1000000) == 0) stream_printf(GDKerr, "dumped " SZFMT " lines\n", i); } GDKfree(buf); return 0;}static intoutput_file_default(Tablet * as, BAT *order, stream *fd){ int len = BUFSIZ, res = 0; char *buf = GDKmalloc(len); BUN p, q; size_t i = 0; size_t offset = BUNindex(order, BUNfirst(order)) + as->offset; for (q = BUNptr(order, offset + as->nr), p = BUNptr(order, offset); p < q; p = BUNnext(order, p)) { ptr h = BUNhead(order, p); if ((res = output_line(&buf, &len, as->format, fd, as->nr_attrs, h)) < 0) { GDKfree(buf); return res; } i++; if (tablet_debug && (i % 1000000) == 0) stream_printf(GDKerr, "dumped " SZFMT " lines\n", i); } GDKfree(buf); return res;}static intoutput_file_dense(Tablet * as, stream *fd){ int len = BUFSIZ, res = 0; char *buf = GDKmalloc(len); size_t i = 0; for (i = 0; i < as->nr; i++) { if ((res = output_line_dense(&buf, &len, as->format, fd, as->nr_attrs)) < 0) { GDKfree(buf); return res; } if (tablet_debug && (i % 1000000) == 0) stream_printf(GDKerr, "dumped " SZFMT " lines\n", i); } GDKfree(buf); return res;}static intoutput_file_ordered(Tablet * as, BAT *order, stream *fd, oid base){ int len = BUFSIZ, res = 0; char *buf = GDKmalloc(len); BUN p, q; size_t i = 0; size_t offset = BUNindex(order, BUNfirst(order)) + as->offset; for (q = BUNptr(order, offset + as->nr), p = BUNptr(order, offset); p < q; p = BUNnext(order, p)) { size_t h = *(oid *) BUNhead(order, p) - base; if ((res = output_line_lookup(&buf, &len, as->format, fd, as->nr_attrs, h)) < 0) { GDKfree(buf); return res; } if (tablet_debug && (i % 1000000) == 0) stream_printf(GDKerr, "dumped " SZFMT " lines\n", i); } GDKfree(buf); return res;}#define SIZE 1*1024*1024BAT *TABLETload(BAT *names, BAT *seps, BAT *types, char *datafile, size_t nr){ BAT *res; stream *s = open_rastream(datafile); bstream *b = NULL; if (s == NULL || stream_errnr(s)) { GDKerror("could not open file %s\n", datafile); if (s) { stream_destroy(s); } return NULL; } res = TABLETinput(names, seps, types, b = bstream_create(s, SIZE), NULL, nr); bstream_destroy(b); stream_close(s); stream_destroy(s); return res;}BAT *TABLETinput(BAT *names, BAT *seps, BAT *types, bstream *s, stream *out, size_t nr){ BAT *bats = NULL; Tablet as; as.nr_attrs = 0; as.nr = nr; if (create_loadformat(&as, names, seps, types) != ~(size_t) 0 && TABLETcreate_bats(&as) >= 0) { if (TABLETload_file(&as, s, out) >= 0) bats = TABLETcollect_bats(&as); } TABLETdestroy_format(&as); return bats;}voidTABLETdump(BAT *names, BAT *seps, BAT *bats, char *datafile, size_t nr){ Tablet as; as.nr_attrs = 0; as.nr = nr; if (create_dumpformat(&as, names, seps, bats) != ~(size_t) 0 && TABLETassign_BATs(&as, bats) != ~(size_t) 0) { stream *s = open_wastream(datafile); if (s != NULL && !stream_errnr(s) && dump_file(&as, s) >= 0) { stream_printf(GDKerr, "saved in %s\n", datafile); } if (s == NULL || stream_errnr(s)) { GDKerror("could not open file %s\n", datafile); } else { stream_close(s); } stream_destroy(s); } TABLETdestroy_format(&as);}intTABLEToutput_file(Tablet * as, BAT *order, stream *s){ oid base = oid_nil; size_t maxnr = BATcount(order); /* only set nr if it is zero or lower (bogus) to the maximum value * possible (BATcount), if already set within BATcount range, * preserve value such that for instance SQL's reply_size still * works */ if (as->nr == ~(size_t)0 || as->nr > maxnr) as->nr = maxnr; if ((base = check_BATs(as)) != oid_nil) { if (BAThdense(order) && order->hseqbase == base) return output_file_dense(as, s); else return output_file_ordered(as, order, s, base); } else { return output_file_default(as, order, s); }}size_tTABLEToutput(BAT *order, BAT *seps, BAT *bats, stream *s){ int res = 0; Tablet as; as.nr_attrs = 0; as.nr = ~(size_t) 0; if (create_dumpformat(&as, NULL, seps, bats) != ~(size_t) 0 && TABLETassign_BATs(&as, bats) != ~(size_t) 0) { res = TABLEToutput_file(&as, order, s); } TABLETdestroy_format(&as); if (res >= 0) return as.nr; return ~(size_t) 0;}static voidtablet_load(BAT **bats, BAT *names, BAT *seps, BAT *types, str datafile, int *N){ size_t nr = ~(size_t)0; if (*N >= 0) nr = *N; *bats = TABLETload(names, seps, types, datafile, nr);}static voidtablet_input(BAT **bats, BAT *names, BAT *seps, BAT *types, void **s, int *N){ bstream *b = NULL; size_t nr = ~(size_t)0; if (*N >= 0) nr = *N; *bats = TABLETinput(names, seps, types, b = bstream_create(*(stream **) s, SIZE), NULL, nr); bstream_destroy(b);}voidtablet_dump(BAT *names, BAT *seps, BAT *bats, str datafile, int *nr){ TABLETdump(names, seps, bats, datafile, *nr);}static voidtablet_output(BAT *order, BAT *seps, BAT *bats, void **s){ (void) TABLEToutput(order, seps, bats, *(stream **) s);}#line 1773 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/tablet.mx"strCMDtablet_load(int *ret, int *nameid, int *sepid, int *typeid, str *filename, int *nr){ BAT *names, *seps, *types, *bn; if ((names = BATdescriptor(*nameid)) == NULL) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -