📄 urlbox.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f urlbox@a Martin Kersten@v 0.1@* URL boxThis module implements the flattened tree model for URLs.It is targeted at the GGLETICK student project@malmodule urlboxcomment "The URL box provides fast access toa large collection of url strings based on avertical fragmented representation.";pattern open():voidaddress URLBOXopencomment "Locate and open the URL box";pattern close():voidaddress URLBOXclosecomment "Close the URL box ";pattern destroy():voidaddress URLBOXdestroycomment "Destroy the URL box";pattern take(u:str):oidaddress URLBOXtakecomment "Get a handle for the URL";pattern deposit(u:str):void address URLBOXdepositcomment "Enter a new url into the box";command depositFile(fnme:str):voidaddress URLBOXdepositFile;pattern releaseAll():void address URLBOXreleaseAllcomment "Release all elements from the box";pattern release(u:str):void address URLBOXreleasecomment "Release a single URL value";pattern release(u:int):void address URLBOXreleaseOidcomment "Release a single URL value";pattern toString(u:int):str address URLBOXtoStringcomment "Get the string representation of an element in the box";pattern discard(name:str):void address URLBOXdiscardcomment "Remove the URL from the box";pattern discard(name:int):void address URLBOXdiscardOidcomment "Remove the URL from the box";pattern discard():void address URLBOXdiscardAllcomment "Remove all URLs from the box";pattern newIterator()(:int,:str)address URLBOXnewIteratorcomment "Locate next element in the box";pattern hasMoreElements()(:int,:str)address URLBOXhasMoreElementscomment "Locate next element in the box";command getLevel(i:int):bat[:int,:str]address URLBOXgetLevel;command getNames():bat[:int,:str]address URLBOXgetNames;command getCount():bat[:int,:lng]address URLBOXgetCount;command getCardinality():bat[:int,:lng]address URLBOXgetCardinality;command getSize():bat[:int,:lng]address URLBOXgetSize;@{pattern prelude():void address URLBOXpreludecomment "Initialize the URL box";urlbox.prelude();@-@+ Implementation@h#ifndef _URL_BOX_H#define _URL_BOX_H#include "mal.h"#include "mal_client.h"#include "mal_interpreter.h"#ifdef WIN32#ifndef LIBURLBOX#define urlbox_export extern __declspec(dllimport)#else#define urlbox_export extern __declspec(dllexport)#endif#else#define urlbox_export extern#endifurlbox_export str URLBOXprelude(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXopen(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXclose(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXdestroy(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXdeposit(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXtake(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXrelease(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXreleaseOid(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXreleaseAll(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXdiscard(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXdiscardOid(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXdiscardAll(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXtoString(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXnewIterator(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);urlbox_export str URLBOXhasMoreElements(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);#endif /* _URL_BOX_H */@- Module initializatonThe content of this box my only be changed by the Administrator.@= authorize { str tmp = NULL; rethrow("urlBox.@1", tmp, AUTHrequireAdmin()); }@c#include "mal_config.h"#include "urlbox.h"#include "mal_linker.h"#include "mal_authorize.h"#define MAXURLDEPTH 50static int urlDepth = 0;static BAT *urlBAT[MAXURLDEPTH];strURLBOXprelude(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; int depth; (void) mb; (void) stk; (void) pci; /* fool compiler */ @:authorize(prelude)@ box = openBox("urlbox"); if (box == 0) throw(MAL, "urlbox.prelude", "failed to open box"); /* if the box was already filled we can skip initialization */ for(depth=0; depth<MAXURLDEPTH; depth++) { urlBAT[depth]=0; } urlDepth= 0; return MAL_SUCCEED;}@- Operator implementation@cstrURLBOXopen(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ (void) mb; (void) stk; (void) pci; /* fool compiler */ @:authorize(open)@ if (openBox("urlbox") != 0) return MAL_SUCCEED; throw(MAL, "urlbox.open", "failed to open box");}strURLBOXclose(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ (void) mb; (void) stk; (void) pci; /* fool compiler */ @:authorize(close)@ if (closeBox("urlbox", TRUE) == 0) return MAL_SUCCEED; throw(MAL, "urlbox.close", "failed to close box");}strURLBOXdestroy(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; (void) mb; (void) stk; (void) pci; /* fool compiler */ @:OpenBox(destroy)@ destroyBox("urlbox"); return MAL_SUCCEED;}@-Access to a box calls for resolving the first parameterto a named box.@= OpenBox @:authorize(@1)@ box= findBox("urlbox"); if( box ==0) throw(MAL, "urlbox.@1","box is not open");@-The real work starts here. We have to insert an URL.@cintURLBOXchop(str url, str *parts){ char *s, *t; int depth=0; s= url; while( *s && *s != '\n'){ t= s+1; while(*t && *t !='\n' && *t!= '/') t++; if( *t ){ *t= 0; } else break; parts[depth++]= s; for( t++; *t && (*t == '\n' || *t== '/'); t++) ; s= t; } return depth;}strURLBOXinsert(char *tuple){ str parts[MAXURLDEPTH]; int i=0,depth; BAT *b; ptr p; int idx= 0,prv=0; char buf[128]; depth= URLBOXchop(tuple, parts); if( depth == 0) return MAL_SUCCEED; if( depth > urlDepth || urlBAT[0]== NULL){ for(i=0; i<=depth; i++){ /* make new bat */ snprintf(buf, 128, "urlbox_%d", i); b = BATdescriptor(BBPindex(buf)); if (b){ urlBAT[i] = b; continue; } b = BATnew(TYPE_int, TYPE_str, 1024); if (b == NULL) GDKfatal("urlbox.create: could not allocate."); BATkey(b,TRUE); BBPrename(b->batCacheid, buf); BATmode(b, PERSISTENT); BATcommit(b); urlBAT[i] = b; } urlDepth= depth; }@-Find the common prefix first@c p= BUNfnd(BATmirror(urlBAT[0]),parts[0]); if( p ) for( i=1; i<depth; i++){ /* printf("search [%d]:%s\n",i,parts[i]);*/ p= BUNfnd(BATmirror(urlBAT[i]),parts[i]); if( p == 0) break; prv= *(int*) p; } else i = 0;@-Insert the remainder as a new url string@c for( ; i<depth; i++){ /* printf("update [%d]:%s\n",i,parts[i]);*/ idx= (int) BATcount(urlBAT[i]); BUNins(urlBAT[i], (ptr) &prv, parts[i], FALSE); prv=idx; } return MAL_SUCCEED;}#define SIZE 1*1024*1024strURLBOXdepositFile(int *r, str *fnme){ stream *fs; bstream *bs; char *s,*t; int len=0; char buf[PATHLENGTH]; Client c= MCgetClient(); (void) r; if( **fnme == '/') snprintf(buf,PATHLENGTH,"%s",*fnme); else snprintf(buf,PATHLENGTH,"%s/%s",c->cwd,*fnme); /* later, handle directory separator */ fs= open_rastream(buf); if( fs == NULL || stream_errnr(fs) ) throw(MAL, "urlbox.deposit","File not accessible"); bs= bstream_create(fs,SIZE); if( bs == NULL) throw(MAL, "urlbox.deposit","Buffered file not available"); while( bstream_read(bs,bs->size-(bs->len-bs->pos)) != 0 && !stream_errnr(bs->s) ){ s= bs->buf; for( t=s; *t ; ){ while(t < bs->buf+bs->len && *t && *t != '\n') t++; if(t== bs->buf+bs->len || *t != '\n'){ /* read next block if possible after shift */ len = t-s; memcpy(bs->buf, s, len); bs->len = len; bs->pos = 0; break; } /* found a string to be processed */ *t = 0; URLBOXinsert(s); *t= '\n'; s= t+1; t= s; } } bstream_destroy(bs); stream_close(fs); stream_destroy(fs); return MAL_SUCCEED;}strURLBOXdeposit(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str url; Box box; char tuple[2048]; (void) mb; @:OpenBox(deposit)@ url = (str) getArgValue(stk, pci, 1); if( strlen(url) <2048) strcpy(tuple,url); else throw(MAL, "urlbox.deposit","url too long"); return URLBOXinsert(url);}strURLBOXtake(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str url, parts[MAXURLDEPTH]; Box box; @:OpenBox(take)@ url = (str) getArgValue(stk, pci, 1); url = GDKstrdup(url); URLBOXchop(url, parts); GDKfree(url); (void) mb; return MAL_SUCCEED;}strURLBOXrelease(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str name; Box box; (void) mb; /* fool compiler */ @:OpenBox(release)@ name = (str) getArgValue(stk, pci, 1); if (releaseBox(box, name)) throw(MAL, "urlbox.release", "failed to release object from box"); return MAL_SUCCEED;}strURLBOXreleaseOid(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str name; Box box; (void) mb; /* fool compiler */ @:OpenBox(release)@ name = (str) getArgValue(stk, pci, 1); if (releaseBox(box, name)) throw(MAL, "urlbox.release", "failed to release object from box"); return MAL_SUCCEED;}strURLBOXreleaseAll(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; (void) mb; (void) stk; (void) pci; /* fool compiler */ @:OpenBox(release)@ releaseAllBox(box); return MAL_SUCCEED;}strURLBOXdiscard(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str name; Box box; (void) mb; /* fool compiler */ @:OpenBox(discard)@ name = (str) getArgValue(stk, pci, 1); if (discardBox(box, name) == 0) throw(MAL, "urlbox.discard", "failed to discard object from box"); return MAL_SUCCEED;}strURLBOXdiscardOid(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str name; Box box; (void) mb; /* fool compiler */ @:OpenBox(discard)@ name = (str) getArgValue(stk, pci, 1); if (discardBox(box, name) == 0) throw(MAL, "urlbox.discard", "failed to discard object from box"); return MAL_SUCCEED;}strURLBOXdiscardAll(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str name; Box box; (void) mb; /* fool compiler */ @:OpenBox(discard)@ name = (str) getArgValue(stk, pci, 1); if (discardBox(box, name) == 0) throw(MAL, "urlbox.discard", "failed to discard object from box"); return MAL_SUCCEED;}strURLBOXtoString(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; int i, len = 0; ValPtr v; str nme, s = 0; (void) mb; /* fool compiler */ @:OpenBox(toString)@ nme = (str) getArgValue(stk, pci, 1); i = findVariable(box->sym, nme); if (i < 0) throw(MAL, "urlbox.toString", "failed to take object from box"); v = &box->val->stk[i]; if (v->vtype == TYPE_str) s = v->val.sval; else (*BATatoms[v->vtype].atomToStr) (&s, &len, v); if (s == NULL) throw(MAL, "urlbox.toString", "illegal value"); VALset(&stk->stk[getArg(pci, 0)], TYPE_str, s); return MAL_SUCCEED;}strURLBOXnewIterator(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; lng *cursor; ValPtr v; (void) mb; /* fool compiler */ @:OpenBox(iterator)@ cursor = (lng *) getArgValue(stk, pci, 0); v = &stk->stk[getArg(pci, 1)]; nextBoxElement(box, cursor, v); return MAL_SUCCEED;}strURLBOXhasMoreElements(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ Box box; lng *cursor; ValPtr v; (void) mb; /* fool compiler */ @:OpenBox(iterator)@ cursor = (lng *) getArgValue(stk, pci, 0); v = &stk->stk[getArg(pci, 1)]; nextBoxElement(box, cursor, v); return MAL_SUCCEED;}strURLBOXgetLevel(int *r, int *level){ if( *level < 0 || *level >= urlDepth) throw(MAL, "urlbox.getLevel","Illegal level"); *r = urlBAT[*level]->batCacheid; BBPincref(*r,TRUE); return MAL_SUCCEED;}strURLBOXgetNames(int *r){ BAT *b; int i; b= BATnew(TYPE_int,TYPE_str, urlDepth+1); if( b== NULL) throw(MAL, "urlbox.getNames","Could not create table"); for(i=0; i<urlDepth; i++){ BUNins(b,&i, BBPname(urlBAT[i]->batCacheid), FALSE); } *r = b->batCacheid; BBPkeepref(*r); return MAL_SUCCEED;}strURLBOXgetCount(int *r){ BAT *b; int i; lng cnt; b= BATnew(TYPE_int,TYPE_lng, urlDepth+1); if( b== NULL) throw(MAL, "urlbox.getNames","Could not create table"); for(i=0; i<urlDepth; i++){ cnt = (lng) BATcount(urlBAT[i]); BUNins(b,&i, &cnt, FALSE); } *r = b->batCacheid; BBPkeepref(*r); return MAL_SUCCEED;}strURLBOXgetCardinality(int *r){ BAT *b, *bn; int i; lng cnt; b= BATnew(TYPE_int,TYPE_lng, urlDepth+1); if( b== NULL) throw(MAL, "urlbox.getNames","Could not create table"); for(i=0; i<urlDepth; i++){ bn = (BAT *) BATkunique(BATmirror(urlBAT[i])); cnt = (lng) BATcount(bn); BBPunfix(bn->batCacheid); BUNins(b,&i, &cnt, FALSE); } *r = b->batCacheid; BBPkeepref(*r); return MAL_SUCCEED;}/* #define ROUND_UP(x,y) ((y)*(((x)+(y)-1)/(y)))*/#define ROUND_UP(x,y) (x)strURLBOXgetSize(int *r){ BAT *b, *bn; int i; lng tot; size_t size; b= BATnew(TYPE_int,TYPE_lng, urlDepth+1); if( b== NULL) throw(MAL, "urlbox.getNames","Could not create table"); for(i=0; i<urlDepth; i++){ bn= urlBAT[i]; size = ROUND_UP(sizeof(BATstore), blksize); if (!VIEWparent(bn)) { size_t cnt = BATcapacity(bn); size += ROUND_UP(bn->batBuns->size, blksize); if (b->hheap) size += ROUND_UP(bn->hheap->size, blksize); if (b->theap) size += ROUND_UP(bn->theap->size, blksize); if (bn->hhash) size += ROUND_UP(sizeof(hash_t) * cnt, blksize); if (bn->thash) size += ROUND_UP(sizeof(hash_t) * cnt, blksize); } tot = size; BBPunfix(bn->batCacheid); BUNins(b,&i, &tot, FALSE); } *r = b->batCacheid; BBPkeepref(*r); return MAL_SUCCEED;}@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -