📄 batstr.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f batstr@a M.L. Kersten@+ String multiplexes[TODO: property propagations]The collection of routines provided here are map operationsfor the atom string primitives. In line with the batcalc module, we assume thatif two bat operands are provided that they are alreadyaligned on the head. Moreover, the head of the BATsare limited to :void, which can be cheaply realized usingthe GRPsplit operation.@{@malcommand batcalc.length( s:bat[:oid,:str] ) :bat[:oid,:int]address STRbatLengthcomment "Return the length of a string.";command batcalc.nbytes( s:bat[:oid,:str] ) :bat[:oid,:int]address STRbatBytescomment "Return the string length in bytes.";command batcalc.match(b:bat[:oid,:str], pat:str):bat[:oid,:bit]address STRbatmatchCstcomment "POSIX pattern matching against a string BAT";command batcalc.==( l:bat[:oid,:str], r:bat[:oid,:str]) :bat[:oid,:bit]address STRbatEqualcomment "Equate a bat of strings against each other";command batcalc.==( l:bat[:oid,:str], r:str) :bat[:oid,:bit]address STRbatEqualCstcomment "Equate a bat of strings against a singleton";command batcalc.!=( l:bat[:oid,:str], r:bat[:oid,:str]) :bat[:oid,:bit]address STRbatNotEqualcomment "Equate a bat of strings against each other";command batcalc.!=( l:bat[:oid,:str], r:str) :bat[:oid,:bit]address STRbatNotEqualCstcomment "Equate a bat of strings against a singleton";command batcalc.toLower( s:bat[:oid,:str] ) :bat[:oid,:str]address STRbatLowercomment "Convert a string to lower case.";command batcalc.toUpper( s:bat[:oid,:str] ) :bat[:oid,:str]address STRbatUppercomment "Convert a string to upper case.";command batcalc.trim( s:bat[:oid,:str] ) :bat[:oid,:str]address STRbatStripcomment "Strip whitespaces around a string.";command batcalc.ltrim( s:bat[:oid,:str] ) :bat[:oid,:str]address STRbatLtrimcomment "Strip whitespaces from start of a string.";command batcalc.rtrim( s:bat[:oid,:str] ) :bat[:oid,:str]address STRbatRtrimcomment "Strip whitespaces from end of a string.";command batcalc.startsWith(s:bat[:oid,:str],prefix:bat[:oid,:str]):bat[:oid,:bit]address STRbatPrefixcomment "Prefix check.";command batcalc.startsWith(s:bat[:oid,:str],prefix:str):bat[:oid,:bit]address STRbatPrefixcstcomment "Prefix check.";command batcalc.endsWith( s:bat[:oid,:str], suffix:bat[:oid,:str] ) :bat[:oid,:bit]address STRbatSuffixcomment "Suffix check.";command batcalc.endsWith( s:bat[:oid,:str], suffix:str ) :bat[:oid,:bit]address STRbatSuffixcstcomment "Suffix check.";command batcalc.search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]address STRbatstrSearchcomment "Search for a substring. Returns position, -1 if not found.";command batcalc.search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]address STRbatstrSearchcstcomment "Search for a substring. Returns position, -1 if not found.";command batcalc.r_search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]address STRbatRstrSearchcomment "Reverse search for a substring. Returns position, -1 if not found.";command batcalc.r_search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]address STRbatRstrSearchcstcomment "Reverse search for a substring. Returns position, -1 if not found.";command batcalc.+( l:bat[:oid,:str], r:bat[:oid,:str]) :bat[:oid,:str]address STRbatConcatcomment "Concatenate two strings.";command batcalc.+( l:bat[:oid,:str], r:str) :bat[:oid,:str]address STRbatConcatcstcomment "Concatenate a string to a bat of strings.";command batcalc.+( l:str, r:bat[:oid,:str]) :bat[:oid,:str]address STRcstConcatbatcomment "Concatenate two strings.";command batcalc.string(b:bat[:oid,:str],offset:bat[:oid,:int]) :bat[:oid,:str]address STRbatTailcomment "Return the tail s[offset..n] of a string s[0..n].";command batcalc.string(b:bat[:oid,:str],offset:int) :bat[:oid,:str]address STRbatTailcstcomment "Return the tail s[offset..n] of a string s[0..n].";command batcalc.chrAt( s:bat[:oid,:str], index:bat[:oid,:int]) :bat[:oid,:chr]address STRbatChrAtcomment "String array lookup operation.";command batcalc.chrAt( s:bat[:oid,:str], index:int) :bat[:oid,:chr]address STRbatChrAtcstcomment "String array lookup operation.";command batcalc.substring( s:bat[:oid,:str], start:bat[:oid,:int], index:bat[:oid,:int]) :bat[:oid,:str]address STRbatsubstringcomment "Substring extraction using [start,start+length]";command batcalc.substring( s:bat[:oid,:str], start:int, index:int) :bat[:oid,:str]address STRbatsubstringcstcomment "Substring extraction using [start,start+length]";command batcalc.unicodeAt(s:bat[:oid,:str], index:bat[:oid,:int]) :bat[:oid,:int]address STRbatWChrAtcomment "get a unicode character (as an int) from a string position.";command batcalc.unicodeAt(s:bat[:oid,:str], index:int) :bat[:oid,:int]address STRbatWChrAtcstcomment "get a unicode character (as an int) from a string position.";command batcalc.substitute(s:bat[:oid,:str],src:str,dst:str,rep:bit):bat[:oid,:str]address STRbatSubstitutecstcomment "Substitute first occurrence of 'src' by 'dst'. Iff repeated = true this is repeated while 'src' can be found in the result string. In order to prevent recursion and result strings of unlimited size, repeating is only done iff src is not a substring of dst.";command batcalc.like(s:bat[:oid,:str],pat:str):bat[:oid,:oid]address STRbatlike2comment "Perform SQL like operation against a string bat";command batcalc.like(s:bat[:oid,:str],pat:str,esc:str):bat[:oid,:oid]address STRbatlikecomment "Perform SQL like operation against a string bat";@+ Implementation@c#include "mal_config.h"#include <gdk.h>#include "ctype.h"#include <string.h>#include "mal_exception.h"#include "str.h"#ifdef HAVE_REGEX_H#include <regex.h>#endif#ifdef HAVE_LANGINFO_H#include <langinfo.h>#endif#ifdef HAVE_ICONV_H#include <iconv.h>#endif#ifdef WIN32#ifndef LIBBATSTR#define batstr_export extern __declspec(dllimport)#else#define batstr_export extern __declspec(dllexport)#endif#else#define batstr_export extern#endif#define prepareOperand(X,Y,Z) \ if( (X= BATdescriptor(*Y)) == NULL ) \ throw(MAL, "batstr." Z, "Cannot access descriptor");#define prepareOperand2(X,Y,A,B,Z) \ if( (X= BATdescriptor(*Y)) == NULL ) \ throw(MAL, "batstr." Z, "Cannot access descriptor"); \ if( (A= BATdescriptor(*B)) == NULL ){\ BBPreleaseref(X->batCacheid); \ throw(MAL, "batstr."Z, "Cannot access descriptor"); \ }#define prepareResult(X,Y,T,Z) \ X= BATnew(Y->htype,T,BATcount(Y)); \ if( Y->htype== TYPE_void) \ BATseqbase(X, Y->hseqbase); \ if( X == NULL){ \ BBPreleaseref(Y->batCacheid); \ throw(MAL, "batstr." Z, "no space available "); \ } \ X->hsorted=Y->hsorted; \ X->tsorted=0; #define prepareResult2(X,Y,A,T,Z) \ X= BATnew(Y->htype,T,BATcount(Y)); \ if( Y->htype== TYPE_void) \ BATseqbase(X, Y->hseqbase); \ if( X == NULL){ \ BBPreleaseref(Y->batCacheid); \ BBPreleaseref(A->batCacheid); \ throw(MAL, "batstr." Z, "no space available "); \ } \ X->hsorted=Y->hsorted; \ X->tsorted=0; #define finalizeResult(X,Y,Z) \ if (!((Y)->batDirty&2)) (Y) = BATsetaccess((Y), BAT_READ); \ *X = (Y)->batCacheid; \ BBPkeepref(*(X));\ BBPreleaseref(Z->batCacheid);batstr_export str STRbatsubstringcst(int *ret, int *bid, int *start, int *length);batstr_export str STRbatsubstring(int *ret, int *bid, int *start, int *length);@= BATintbatstr_export str STRbat@1(int *ret, int *l);str STRbat@1(int *ret, int *l){ BAT *bn, *b; BUN p,q; int xx; @3 x; int y, *yp = &y; prepareOperand(b,l,"@1"); prepareResult(bn,b,TYPE_int,"@1"); BATloopFast(b, p, q, xx) { ptr h = BUNhead(b,p); x= (@3) BUNtail(b,p); if(x== 0 || *x == 0 || strcmp(x,@3_nil)== 0) y= int_nil; else @2(yp,x); bunfastins(bn, h, yp); } finalizeResult(ret,bn,b); return MAL_SUCCEED;bunins_failed: BBPreleaseref(b->batCacheid); BBPunfix(bn->batCacheid); throw(MAL, "batstr.@1", "bunins failed");}@c@:BATint(Length,strLength,str)@@:BATint(Bytes,strBytes,str)@@= BATstrbatstr_export str STRbat@1(int *ret, int *l);str STRbat@1(int *ret, int *l){ BAT *bn, *b; BUN p,q; int xx; @3 x; @4 y, *yp = &y; prepareOperand(b,l,"@1"); prepareResult(bn,b,TYPE_@4,"@1"); BATloopFast(b, p, q, xx) { ptr h = BUNhead(b,p); x= (@3) BUNtail(b,p); if(x== 0 || *x == 0 || strcmp(x,@3_nil)== 0) y = (@4)@4_nil; else @2(yp,x); bunfastins(bn, h, *yp); } finalizeResult(ret,bn,b); return MAL_SUCCEED;bunins_failed: BBPreleaseref(b->batCacheid); BBPunfix(bn->batCacheid); throw(MAL, "batstr.@1", "bunins failed");}@c@:BATstr(Lower,strLower,str,str)@@:BATstr(Upper,strUpper,str,str)@@:BATstr(Strip,strStrip,str,str)@@:BATstr(Ltrim,strLtrim,str,str)@@:BATstr(Rtrim,strRtrim,str,str)@@-@c@= STRbatcstcmpbatstr_export str STRbat@2Cst(int *ret, int *l, str *cst);strSTRbat@2Cst(int *ret, int *l, str *cst){ BAT *bn, *b; BUN p, q; int xx; str x; bit y; prepareOperand(b, l, "@1"); prepareResult(bn, b, TYPE_bit, "@1"); BATloopFast(b, p, q, xx) { ptr h = BUNhead(b, p); x = (str) BUNtail(b, p); if (x == 0 || *x == 0 || strcmp(x, str_nil) == 0) y = bit_nil; else y = strcmp(x, *cst) @1 0; bunfastins(bn, h, &y); } bn->tsorted = (BATtordered(b)&1)?@3:0; finalizeResult(ret, bn, b); return MAL_SUCCEED;bunins_failed: BBPreleaseref(b->batCacheid); BBPunfix(bn->batCacheid); throw(MAL, "batstr.@1", "bunins failed");}@c@:STRbatcstcmp(==,Equal,GDK_SORTED_REV)@@:STRbatcstcmp(!=,NotEqual,GDK_SORTED)@@-A general assumption in all cases is the bats are synchronized on theirhead column. This is not checked and may be mis-used to deploy theimplementation for shifted window arithmetic as well.@= chkSize if( BATcount(@1) != BATcount(@2) ) throw(MAL, "batcalc.@3", "requires bats of identical size");@c@= STRbatcmpbatstr_export str STRbat@2(int *ret, int *l, int *r);strSTRbat@2(int *ret, int *l, int *r){ BAT *bn, *left, *right; BUN o, p, q; int xx, yy; prepareOperand2(left, l, right, r, "@1"); @:chkSize(left,right,CMDcompare@1)@ prepareResult2(bn, left, right, TYPE_bit, "@1"); o = BUNfirst(left); p = BUNfirst(right); q = BUNlast(right); xx = BUNsize(left); yy = BUNsize(right); while (p < q) { bit y = TRUE; ptr h = BUNhead(left, o); ptr tl = BUNtail(left, o); ptr tr = BUNtail(right, p); if (tl == 0 || tr == 0 || !(strcmp(tl, tr) @1 0)) y = FALSE; bunfastins(bn, h, &y); o += xx; p += yy; } BBPreleaseref(right->batCacheid); bn->tsorted = (BATtordered(left)&BATtordered(right)&1)?@3:0; finalizeResult(ret, bn, left); return MAL_SUCCEED;bunins_failed: BBPreleaseref(left->batCacheid); BBPreleaseref(right->batCacheid); BBPunfix(*ret); throw(MAL, "batstr.@1", "bunins failed");}@c@:STRbatcmp(==,Equal,GDK_SORTED_REV)@@:STRbatcmp(!=,NotEqual,GDK_SORTED)@
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -