📄 url.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f url@a M. Kersten@a Y. Zhang@v 1.2@* The URL moduleThe URL module contains a collection of commands to manipulateUniform Resource Locators - a resource on the World Wide Web-represented as a string in Monet. The URL can representanything from a file, a directory or a complete movie.This module is geared towards manipulation of their name only.A complementary module can be used to gain access.[IOgate]The core of the extension involves several operators to extractportions of the URLs for further manipulation.In particular, the domain, the server, and the protocol, and the fileextension can be extracted without copying the complete URL from the heapinto a string variable first.The commands provided are based on the corresponding Java class.A future version should use a special atom, because this may saveconsiderable space. Alternatively, break the URL strings into componentsand represent them with a bunch of BATs.An intermediate step would be to refine the atom STR, then it would bepossible to redefine hashing.@malmodule url;atom url:str;command url(s:str):url address URLnewcomment "Create an URL from a string literal";command getAnchor(u:url) :str address URLgetAnchor comment "Extract the URL anchor (reference)";command getBasename(u:url) :str address URLgetBasename comment "Extract the URL base file name";command getContent(u:url) :str address URLgetContent comment "Get the URL resource in a local file";command getContext(u:url) :str address URLgetContext comment "Get the path context of a URL ";command getDirectory(u:url) :bat[:int,:str] address URLgetDirectory comment "Extract directory names from the URL";command getDomain(u:url) :str address URLgetDomain comment "Extract Internet domain from the URL";command getExtension(u:url) :str address URLgetExtension comment "Extract the file extension of the URL";command getFile(u:url) :str address URLgetFile comment "Extract the last file name of the URL";command getHost(u:url) :str address URLgetHost comment "Extract the server name from the URL";command getPort(u:url) :str address URLgetPort comment "Extract the port id from the URL";command getProtocol(u:url) :str address URLgetProtocol comment "Extract the protocol from the URL";command getQuery(u:url) :str address URLgetQuery comment "Extract the query string from the URL";command getQueryArg(u:url) :bat[:str,:str] address URLgetQueryArg comment "Extract argument mappings from the URL";command getUser(u:url) :str address URLgetUser comment "Extract the user identity from the URL";command getRobotURL(u:url) :str address URLgetRobotURL comment "Extract the location of the robot control file";command isaURL(u:url) :bit address URLisaURL comment "Check conformity of the URL syntax";command new(p:str,h:str,prt:int,f:str):url address URLnew4 comment "Construct URL from protocol, host, port,and file";command new(prot:str,host:str,fnme:str):urladdress URLnew3 comment "Construct URL from protocol, host,and file";command fromstr():url address URLfromString comment "Convert a string to an url. ";command tostr():str address URLtoString comment "Convert url to string equivalent";@-@{@-In Monet 5 we have a different notion on how to administer BATs.@= PseudoM5 /* BATrename(b,"@1_@2");*/ BATroles(b,"@1","@2"); BATmode(b,TRANSIENT); *retval= b->batCacheid;@* Implementation CodeThe URLs are stored as strings without further optimization.A more compact representation is feasible, but not considered yet toinvest energy.@= Pseudo BATrename(b,"@1_@2"); BATroles(b,"@1","@2"); BATmode(b,TRANSIENT); *retval= b;@-The key action is to break an url into its constituents.Parsing is done for each individual request, because this way wesecure concurrent use from different threads.@h#ifndef URL_H#define URL_H#include <gdk.h>#include <ctype.h>typedef str url;#ifdef WIN32#ifndef LIBURL#define url_export extern __declspec(dllimport)#else#define url_export extern __declspec(dllexport)#endif#else#define url_export extern#endifurl_export str escape_str(str *retval, str s);url_export str unescape_str(str *retval, str s);#endif /* URL_H */@c#include "mal_config.h"#include "url.h"#include "mal.h"#include "mal_exception.h"void getword(char *word, char *line, char stop);char x2c(char *what);void plustospace(char *str);url_export str URLnew(str *url, str *val);url_export str URLgetAnchor(str *retval, str *val);url_export str URLgetBasename(str *retval, str *t);url_export str URLgetContent(str *retval, str *Str1);url_export str URLgetContext(str *retval, str *val);url_export str URLgetDirectory(int *ret, str *tv);url_export str URLgetDomain(str *retval, str *tv);url_export str URLgetExtension(str *retval, str *tv);url_export str URLgetFile(str *retval, str *tv);url_export str URLgetHost(str *retval, str *tv);url_export str URLgetPort(str *retval, str *tv);url_export str URLgetProtocol(str *retval, str *tv);url_export str URLgetQuery(str *retval, str *tv);url_export str URLgetQueryArg(int *ret, str *tv);url_export str URLgetUser(str *retval, str *tv);url_export str URLgetRobotURL(str *retval, str *tv);url_export str URLisaURL(bit *retval, str *tv);url_export str URLnew4(str *url, str *protocol, str *server, int *port, str *file);url_export str URLnew3(str *url, str *protocol, str *server, str *file);url_export int URLfromString(str src, int *len, str *url);url_export int URLtoString(str *s, int *len, str src);/* COMMAND "getAnchor": Extract an anchor (reference) from the URL * SIGNATURE: getAnchor(url) : str; */strurl_getAnchor(str *retval, /* put string: pointer to char here. */ url Str1 /* string: pointer to char. */ ){ str s, d; if (Str1 == 0) throw(ILLARG, "url.getAnchor", "url missing"); s = strchr(Str1, '#'); if (s == 0) s= (str) str_nil; d = (str) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}/* COMMAND "getBasename": Extract the base of the last file name of the URL, * thus, excluding the file extension. * SIGNATURE: getBasename(str) : str; */strurl_getBasename(str *retval, url t){ str d = 0, s; if (t == 0) throw(ILLARG, "url.getBasename", "url missing"); s = strrchr(t, '/'); if (s) s++; else s = (str) str_nil; d = (str) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); s = strchr(d, '.'); if (s) *s = 0; *retval = d; return MAL_SUCCEED;}/* COMMAND "getContent": Retrieve the file referenced * SIGNATURE: getContent(str) : str; */strurl_getContent(str *retval, /* put string: pointer to char here. */ url Str1 /* string: pointer to char. */ ){ /* TODO: getContent should not return a string */ if (!Str1) throw(ILLARG, "url.getContent", "url missing"); strcpy(*retval, "functions not implemented"); return MAL_SUCCEED;}/* COMMAND "getContext": Extract the path context from the URL * SIGNATURE: getContext(str) : str; */strurl_getContext(str *retval, url Str1){ str s, d; if (Str1 == 0) throw(ILLARG, "url.getContext", "url missing"); s = strstr(Str1, "://"); if (s) s += 3; else s = Str1; s = strchr(s, '/'); if (s == 0) s= (str) str_nil; d = GDKmalloc(strlen(Str1) - (s - Str1) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}/* COMMAND "getDirectory": Extract the directory names from the URL * SIGNATURE: getDirectory(str) : bat[int,str]; */strurl_getDirectory(BAT **retval, /* put pointer to BAT[int,str] record here. */ url t){ static char buf[1024]; char *s; int i = 0, k = 0; BAT *b = NULL; if (t == 0) throw(ILLARG, "url.getDirectory", "url missing"); while (*t && *t != ':') t++; t++; if (*t != '/') goto getDir_done; t++; if (*t != '/') goto getDir_done; t++; while (*t && *t != '/') t++; b = BATnew(TYPE_int, TYPE_str, 40); if (b == 0) throw(MAL, "url.getDirectory", "could not create BAT"); s = buf; for (t++; *t; t++) { if (*t == '/') { *s = 0; BUNins(b, &k, buf, FALSE); k++; s = buf; *s = 0; i = 0; continue; } *s++ = *t; if (i++ == 1023) throw(PARSE, "url.getDirectory","server name too long"); }getDir_done: @:Pseudo(dir,name)@ return MAL_SUCCEED;}/* COMMAND "getDomain": Extract the Internet domain from the URL * SIGNATURE: getDomain(str) : str; */strURLgetDomain(str *retval, str *u){ static char buf[1024]; char *b, *d, *s = buf; int i = 0; url t= *u; *retval = 0; s = (str)str_nil; if (t == 0) throw(ILLARG, "url.getDomain", "domain missing"); while (*t && *t != ':') t++; t++; if (*t != '/') goto getDomain_done; t++; if (*t != '/') goto getDomain_done; t++; b = buf; d = 0; for (; *t && *t != '/'; t++) { if (*t == '.') d = b; if (*t == ':') break; *b++ = *t; if (i++ == 1023) throw(PARSE, "url.getDomain", "server name too long\n"); } *b = 0; if (d) s = d + 1;getDomain_done: d = (char*) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}/* COMMAND "getExtension": Extract the file extension of the URL * SIGNATURE: getExtension(str) : str; */strurl_getExtension(str *retval, url t){ str d = 0, s; if (t == 0) throw(ILLARG, "url.getExtension", "url missing"); s = strrchr(t, '/'); if (s) { s++; s = strchr(s + 1, '.'); if (s) s++; else s = (str) str_nil; } else s = (str) str_nil; d = (str) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}/* COMMAND "getFile": Extract the last file name of the URL * SIGNATURE: getFile(str) : str; */strurl_getFile(str *retval, url t){ str d = 0, s; if (t == 0) throw(ILLARG, "url.getFile", "url missing"); s = strrchr(t, '/'); if (s) s++; else s = (str) str_nil; d = (str) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}@+ Url_getHostExtract the server identity from the URLSIGNATURE: getHost(str) : str;@cstrurl_getHost(str *retval, /* is GDKfree needed ? */ url t){ static char buf[1024]; char *b, *d, *s; int i = 0; s = (str)str_nil; if (t == 0) throw(ILLARG, "url.getHost", "url missing"); while (*t && *t != ':') t++; t++; if (*t != '/') goto getHost_done; t++; if (*t != '/') goto getHost_done; t++; b = buf; s = buf; for (; *t && *t != '/'; t++) { *b++ = *t; if (i++ == 1023) throw(PARSE, "url.getHost", "server name too long"); } *b = 0;getHost_done: d = (char*) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}@+ url_getPort Extract the port id from the URL SIGNATURE: getPort(str) : str;@cstrurl_getPort(str *retval, url t){ static char buf[1024]; char *b, *d = 0, *s = buf; int i = 0; if (t == 0) throw(ILLARG, "url.getPort", "url missing"); s = (str)str_nil; while (*t && *t != ':') t++; t++; if (*t != '/') goto getPort_done; t++; if (*t != '/') goto getPort_done; t++; b = buf; for (; *t && *t != '/'; t++) { if (*t == ':') d = b; *b++ = *t; if (i++ == 1023) throw(PARSE, "url.getPort", "server name too long"); } *b = 0; if (d) s = d + 1; else s = (str)str_nil;getPort_done: d = (char*) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;}/* COMMAND "getProtocol": Extract the protocol from the URL * SIGNATURE: getProtocol(str) : str; */strurl_getProtocol(str *retval, /* put string: pointer to char here. */ url t){ static char buf[1024]; char *b, *d = 0; int i = 0; if (t == 0) throw(ILLARG, "url.getProtocol", "url missing"); b = buf; for (; *t && *t != ':'; t++) { *b++ = *t; if (i++ == 1023) throw(PARSE, "url_getProtocol", "server name too long"); } *b = 0; d = (char*) GDKmalloc(strlen(buf) + 1); if (d) strcpy(d, buf); *retval = d; return MAL_SUCCEED;}strurl_getQuery(str *retval, url Str1){ char *s, *d; if (Str1 == 0) throw(ILLARG, "url.getQuery", "url missing"); s = strchr(Str1, '?'); if (s == 0) s= (str) str_nil; else s++; d = (char*) GDKmalloc(strlen(s) + 1); if (d) strcpy(d, s); *retval = d; return MAL_SUCCEED;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -