📄 xmerl_regexp.erl

📁 OTP是开放电信平台的简称
💻 ERL
📖 第 1 页 / 共 4 页
字号:
	{error,E} -> {error,E}    end;matches(S, {regexp,RE}) -> {match,matches_re(S, RE, 1)};matches(S, {comp_regexp,RE}) -> {match,matches_comp(S, RE, 1)}.matches_re([_|Cs]=S0, RE, P0) ->    case re_apply(S0, P0, RE) of	{match,P0,S1,_Subs} ->			%0 length match	    [{P0,0}|matches_re(tl(S1), RE, P0+1)];	{match,P1,S1,_Subs} ->	    [{P0,P1-P0}|matches_re(S1, RE, P1)];	nomatch -> matches_re(Cs, RE, P0+1);	never_match -> []    end;matches_re([], _RE, _P) -> [].matches_comp([_|Cs]=S0, RE, P0) ->    case comp_apply(S0, P0, RE) of	{match,P0,S1} ->			%0 length match	    [{P0,0}|matches_comp(tl(S1), RE, P0+1)];	{match,P1,S1} ->	    [{P0,P1-P0}|matches_comp(S1, RE, P1)];	nomatch -> matches_comp(Cs, RE, P0+1)    end;matches_comp([], _RE, _P) -> [].%% sub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.%%  Substitute the first match of the regular expression RegExp with%%  the string Replace in String. Accept pre-parsed regular%%  expressions.sub(String, RegExp, Rep) when list(RegExp) ->    case parse(RegExp) of	{ok,RE} -> sub(String, RE, Rep);	{error,E} -> {error,E}    end;sub(String, {regexp,RE}, Rep) ->    case sub_re(String, 1, RE, [], Rep) of	{yes,NewStr} -> {ok,NewStr,1};	no -> {ok,String,0}    end;sub(String, {comp_regexp,RE}, Rep) ->    case sub_comp(String, 1, RE, [], Rep) of	{yes,NewStr} -> {ok,NewStr,1};	no -> {ok,String,0}    end.%% sub_re(String, Position, Regexp, Before, Replacement) ->%%      {NewString,Count}.%% sub_comp(String, Position, Regexp, Before, Replacement) ->%%      {NewString,Count}.%% Step forward over String until a match is found saving stepped over%% chars in Before. Return reversed Before prepended to replacement%% and rest of string.sub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->    case re_apply(S0, P0, RE) of	{match,P0,_S1,_} ->			%Ignore 0 length match	    sub_re(Cs, P0+1, RE, [C|Bef], Rep);	{match,P1,Rest,_Gps} ->	    {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};	nomatch -> sub_re(Cs, P0+1, RE, [C|Bef], Rep);	never_match -> no			%No need to go on    end;sub_re([], _P, _RE, _Bef, _Rep) -> no.sub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->    case comp_apply(S0, P0, RE) of	{match,P0,_S1} ->			%Ignore 0 length match	    sub_comp(Cs, P0+1, RE, [C|Bef], Rep);	{match,P1,Rest} ->	    {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};	nomatch -> sub_comp(Cs, P0+1, RE, [C|Bef], Rep)    end;sub_comp([], _P, _RE, _Bef, _Rep) -> no.sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)];sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];sub_repl([], _M, Rest) -> Rest.%%  gsub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.%%  Substitute every match of the regular expression RegExp with the%%  string New in String. Accept pre-parsed regular expressions.gsub(String, RegExp, Rep) when list(RegExp) ->    case parse(RegExp) of	{ok,RE} -> gsub(String, RE, Rep);	{error,E} -> {error,E}    end;gsub(String, {regexp,RE}, Rep) ->    case gsub_re(String, 1, RE, [], Rep) of	{NewStr,N} -> {ok,NewStr,N};	no -> {ok,String,0}			%No substitutions    end;gsub(String, {comp_regexp,RE}, Rep) ->    case gsub_comp(String, 1, RE, [], Rep) of	{NewStr,N} -> {ok,NewStr,N};	no -> {ok,String,0}			%No substitutions    end.%% gsub_re(String, Position, Regexp, Before, Replacement) ->%%      {NewString,Count}.%% gsub_comp(String, Position, Regexp, Before, Replacement) ->%%      {NewString,Count}.%% Step forward over String until a match is found saving stepped over%% chars in Before. Call recursively to do rest of string after%% match. Return reversed Before prepended to return from recursive%% call.gsub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->    case re_apply(S0, P0, RE) of	{match,P0,_S1,_} ->			%Ignore 0 length match	    gsub_re(Cs, P0+1, RE, [C|Bef], Rep);	{match,P1,S1,_Gps} ->	    case gsub_re(S1, P1, RE, [], Rep) of		{NewStr,N0} ->			%Substituitions		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),		     N0+1};		no ->				%No substituitions.		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}	    end;	%%No match so step forward saving C on Bef.	nomatch -> gsub_re(Cs, P0+1, RE, [C|Bef], Rep);	never_match -> no			%No need to go on    end;gsub_re([], _P, _RE, _Bef, _Rep) -> no.gsub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->    case comp_apply(S0, P0, RE) of	{match,P0,_S1} ->			%Ignore 0 length match	    gsub_comp(Cs, P0+1, RE, [C|Bef], Rep);	{match,P1,S1} ->	    case gsub_comp(S1, P1, RE, [], Rep) of		{NewStr,N0} ->			%Substituitions		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),		     N0+1};		no ->				%No substituitions.		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}	    end;	%%No match so step forward saving C on Bef.	nomatch -> gsub_comp(Cs, P0+1, RE, [C|Bef], Rep)    end;gsub_comp([], _P, _RE, _Bef, _Rep) -> no.%% split(String, RegExp) -> {ok,[SubString]} | {error,E}.%%  Split a string into substrings where the RegExp describes the%%  field seperator. The RegExp " " is specially treated.split(String, " ") ->				%This is really special    {ok,{regexp,RE}} = parse("[ \t]+"),    case split_apply_re(String, RE, true) of	[[]|Ss] -> {ok,Ss};	Ss -> {ok,Ss}    end;split(String, RegExp) when list(RegExp) ->    case parse(RegExp) of	{ok,{regexp,RE}} -> {ok,split_apply_re(String, RE, false)};	{error,E} -> {error,E}    end;split(String, {regexp,RE}) -> {ok,split_apply_re(String, RE, false)};split(String, {comp_regexp,RE}) -> {ok,split_apply_comp(String, RE, false)}.split_apply_re(S, RE, Trim) -> split_apply_re(S, 1, RE, Trim, []).split_apply_re([], _P, _RE, true, []) -> [];split_apply_re([], _P, _RE, _T, Sub) -> [reverse(Sub)];split_apply_re([C|Cs]=S, P0, RE, T, Sub) ->    case re_apply(S, P0, RE) of	{match,P0,_S1,_} ->			%Ignore 0 length match	    split_apply_re(Cs, P0+1, RE, T, [C|Sub]);	{match,P1,S1,_} ->	    [reverse(Sub)|split_apply_re(S1, P1, RE, T, [])];	nomatch ->	    split_apply_re(Cs, P0+1, RE, T, [C|Sub]);	never_match -> [reverse(Sub, S)]	%No need to go on    end.split_apply_comp(S, RE, Trim) -> split_apply_comp(S, 1, RE, Trim, []).%%split_apply_comp([], _P, _RE, true, []) -> [];split_apply_comp([], _P, _RE, _T, Sub) -> [reverse(Sub)];split_apply_comp([C|Cs]=S, P0, RE, T, Sub) ->    case comp_apply(S, P0, RE) of	{match,P0,_S1} ->			%Ignore 0 length match	    split_apply_comp(Cs, P0+1, RE, T, [C|Sub]);	{match,P1,S1} ->	    [reverse(Sub)|split_apply_comp(S1, P1, RE, T, [])];	nomatch ->	    split_apply_comp(Cs, P0+1, RE, T, [C|Sub])    end.%% sub_match(String, RegExp) ->%%      {match,Start,Length,SubExprs} | nomatch | {error,E}.%%  Find the longest match of RegExp in String.sub_match(S, RegExp) when list(RegExp) ->    case parse(RegExp) of	{ok,RE} -> sub_match(S, RE);	{error,E} -> {error,E}    end;sub_match(S, {regexp,RE}) ->    case sub_match_re(RE, S, 1, 0, -1, none) of	{Start,Len,Subs} when Len >= 0 ->	    {match,Start,Len,Subs};	{_Start,_Len,_Subs} -> nomatch    end.sub_match_re(RE, S0, Pos0, Mst, Mlen, Msubs) ->    case first_match_re(RE, S0, Pos0) of	{St,Len,Subs} ->			%Found a match	    Pos1 = St + 1,			%Where to start next match	    S1 = lists:nthtail(Pos1-Pos0, S0),	    if Len > Mlen -> sub_match_re(RE, S1, Pos1, St, Len, Subs);	       true -> sub_match_re(RE, S1, Pos1, Mst, Mlen, Msubs)	    end;	nomatch -> {Mst,Mlen,Msubs}    end.%% sub_first_match(String, RegExp) ->%%       {match,Start,Length,SubExprs} | nomatch | {error,E}.%%  Find the longest match of RegExp in String, return Start and Length%%  as well as tuple of sub-expression matches.sub_first_match(S, RegExp) when is_list(RegExp) ->    {ok,RE} = parse(RegExp),    sub_first_match(S, RE);sub_first_match(S, {regexp,RE}) ->    case first_match_re(RE, S, 1) of	{St,Len,Subs} -> {match,St,Len,Subs};	nomatch -> nomatch    end.%% This is the regular expression grammar used. It is equivalent to the%% one used in AWK, except that we allow ^ $ to be used anywhere and fail%% in the matching.%%%% reg -> reg1 : '$1'.%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.%% reg1 -> reg2 : '$1'.%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.%% reg2 -> reg3 : '$1'.%% reg3 -> reg3 "*" : {kclosure,'$1'}.%% reg3 -> reg3 "+" : {pclosure,'$1'}.%% reg3 -> reg3 "?" : {optional,'$1'}.%% reg3 -> reg3 "{" [Min],[Max] "}" : {closure_range, Num, '$1'} see below%% reg3 -> reg4 : '$1'.%% reg4 -> "(" reg ")" : '$2'.%% reg4 -> "\\" char : '$2'.%% reg4 -> "^" : bos.%% reg4 -> "$" : eos.%% reg4 -> "." : char.%% reg4 -> "[" class "]" : {char_class,char_class('$2')}%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}%% reg4 -> "\"" chars "\"" : char_string('$2')%% reg4 -> char : '$1'.%% reg4 -> empty : epsilon.%%  The grammar of the current regular expressions. The actual parser%%  is a recursive descent implementation of the grammar.reg(S, Sc) -> reg1(S, Sc).%% reg1 -> reg2 reg1'%% reg1' -> "|" reg2%% reg1' -> emptyreg1(S0, Sc0) ->    {L,Sc1,S1} = reg2(S0, Sc0),    reg1p(S1, L, Sc1).reg1p([$||S0], L, Sc0) ->    {R,Sc1,S1} = reg2(S0, Sc0),    reg1p(S1, {'or',L,R}, Sc1);reg1p(S, L, Sc) -> {L,Sc,S}.%% reg2 -> reg3 reg2'%% reg2' -> reg3%% reg2' -> emptyreg2(S0, Sc0) ->    {L,Sc1,S1} = reg3(S0, Sc0),    reg2p(S1, L, Sc1).reg2p([C|S0], L, Sc0) when C /= $|, C /= $) ->    {R,Sc1,S1} = reg3([C|S0], Sc0),    %% reg2p(S1, {concat,L,R}, Sc1);    case is_integer(R) of 	true ->  	    case L of 		{literal,Lit} -> 		    reg2p(S1, {literal,Lit ++[R]}, Sc1); 		{concat,S2,Char} when is_integer(Char) -> 		    reg2p(S1, {concat,S2,{literal,[Char,R]}}, Sc1); 		{concat,S2,{literal,Lit}}  -> 		    reg2p(S1, {concat,S2,{literal,Lit ++ [R]}}, Sc1); 		Char when is_integer(Char) ->  		    reg2p(S1, {literal,[Char,R]}, Sc1); 		_ -> 		    reg2p(S1, {concat,L,R}, Sc1) 	    end; 	false -> 	    reg2p(S1, {concat,L,R}, Sc1)    end;reg2p(S, L, Sc) -> {L,Sc,S}.%% reg3 -> reg4 reg3'%% reg3' -> "*" reg3'%% reg3' -> "+" reg3'%% reg3' -> "?" reg3'%% reg3' -> "{" [Min],[Max] "}" reg3'%% reg3' -> emptyreg3(S0, Sc0) ->    {L,Sc1,S1} = reg4(S0, Sc0),    reg3p(S1, L, Sc1).reg3p([$*|S], L, Sc) -> reg3p(S, {kclosure,L}, Sc);reg3p([$+|S], L, Sc) -> reg3p(S, {pclosure,L}, Sc);reg3p([$?|S], L, Sc) -> reg3p(S, {optional,L}, Sc);reg3p([${|Cs0], L, Sc) ->			% $}    case interval_range(Cs0) of	{none,none,_Cs1} -> parse_error({interval_range,[${|Cs0]});	{N,M,[$}|Cs1]} -> reg3p(Cs1, {iclosure,L,N,M}, Sc);	{_N,_M,_Cs1} -> parse_error({unterminated,"{"})    end;reg3p(S, L, Sc) -> {L,Sc,S}.reg4([$(|S0], Sc0) ->    Sc1 = Sc0+1,    case reg(S0, Sc1) of	{R,Sc2,[$)|S1]} -> {{subexpr,Sc1,R},Sc2,S1};	{_R,_Sc,_S} -> parse_error({unterminated,"("})    end;reg4([$^|S], Sc) -> {bos,Sc,S};reg4([$$|S], Sc) -> {eos,Sc,S};reg4([$.|S], Sc) -> {{comp_class,"\n"},Sc,S};reg4("[^" ++ S0, Sc) ->    case char_class(S0) of	{Cc,[$]|S1]} -> {{comp_class,Cc},Sc,S1};	{_Cc,_S} -> parse_error({unterminated,"["})    end;reg4([$[|S0], Sc) ->    case char_class(S0) of	{Cc,[$]|S1]} -> {{char_class,Cc},Sc,S1};	{_Cc,_S1} -> parse_error({unterminated,"["})    end;%reg4([$"|S0], Sc) ->%    case char_string(S0) of%	{St,[$"|S1]} -> {St,Sc,S1};%	{St,S1} -> parse_error({unterminated,"\""})%    end;reg4([C0|S0], Sc) when  is_integer(C0), C0 /= $*, C0 /= $+, C0 /= $?, C0 /= $], C0 /= $), C0 /= $} ->    %% Handle \ quoted characters as well, at least those we see.    {C1,S1} = char(C0, S0),    {C1,Sc,S1};reg4(S=[$)|_], Sc) -> {epsilon,Sc,S};reg4([C|_S], _Sc) -> parse_error({illegal,[C]});
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -