📄 xmerl_regexp.erl
字号:
{error,E} -> {error,E} end;matches(S, {regexp,RE}) -> {match,matches_re(S, RE, 1)};matches(S, {comp_regexp,RE}) -> {match,matches_comp(S, RE, 1)}.matches_re([_|Cs]=S0, RE, P0) -> case re_apply(S0, P0, RE) of {match,P0,S1,_Subs} -> %0 length match [{P0,0}|matches_re(tl(S1), RE, P0+1)]; {match,P1,S1,_Subs} -> [{P0,P1-P0}|matches_re(S1, RE, P1)]; nomatch -> matches_re(Cs, RE, P0+1); never_match -> [] end;matches_re([], _RE, _P) -> [].matches_comp([_|Cs]=S0, RE, P0) -> case comp_apply(S0, P0, RE) of {match,P0,S1} -> %0 length match [{P0,0}|matches_comp(tl(S1), RE, P0+1)]; {match,P1,S1} -> [{P0,P1-P0}|matches_comp(S1, RE, P1)]; nomatch -> matches_comp(Cs, RE, P0+1) end;matches_comp([], _RE, _P) -> [].%% sub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.%% Substitute the first match of the regular expression RegExp with%% the string Replace in String. Accept pre-parsed regular%% expressions.sub(String, RegExp, Rep) when list(RegExp) -> case parse(RegExp) of {ok,RE} -> sub(String, RE, Rep); {error,E} -> {error,E} end;sub(String, {regexp,RE}, Rep) -> case sub_re(String, 1, RE, [], Rep) of {yes,NewStr} -> {ok,NewStr,1}; no -> {ok,String,0} end;sub(String, {comp_regexp,RE}, Rep) -> case sub_comp(String, 1, RE, [], Rep) of {yes,NewStr} -> {ok,NewStr,1}; no -> {ok,String,0} end.%% sub_re(String, Position, Regexp, Before, Replacement) ->%% {NewString,Count}.%% sub_comp(String, Position, Regexp, Before, Replacement) ->%% {NewString,Count}.%% Step forward over String until a match is found saving stepped over%% chars in Before. Return reversed Before prepended to replacement%% and rest of string.sub_re([C|Cs]=S0, P0, RE, Bef, Rep) -> case re_apply(S0, P0, RE) of {match,P0,_S1,_} -> %Ignore 0 length match sub_re(Cs, P0+1, RE, [C|Bef], Rep); {match,P1,Rest,_Gps} -> {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))}; nomatch -> sub_re(Cs, P0+1, RE, [C|Bef], Rep); never_match -> no %No need to go on end;sub_re([], _P, _RE, _Bef, _Rep) -> no.sub_comp([C|Cs]=S0, P0, RE, Bef, Rep) -> case comp_apply(S0, P0, RE) of {match,P0,_S1} -> %Ignore 0 length match sub_comp(Cs, P0+1, RE, [C|Bef], Rep); {match,P1,Rest} -> {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))}; nomatch -> sub_comp(Cs, P0+1, RE, [C|Bef], Rep) end;sub_comp([], _P, _RE, _Bef, _Rep) -> no.sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)];sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];sub_repl([], _M, Rest) -> Rest.%% gsub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.%% Substitute every match of the regular expression RegExp with the%% string New in String. Accept pre-parsed regular expressions.gsub(String, RegExp, Rep) when list(RegExp) -> case parse(RegExp) of {ok,RE} -> gsub(String, RE, Rep); {error,E} -> {error,E} end;gsub(String, {regexp,RE}, Rep) -> case gsub_re(String, 1, RE, [], Rep) of {NewStr,N} -> {ok,NewStr,N}; no -> {ok,String,0} %No substitutions end;gsub(String, {comp_regexp,RE}, Rep) -> case gsub_comp(String, 1, RE, [], Rep) of {NewStr,N} -> {ok,NewStr,N}; no -> {ok,String,0} %No substitutions end.%% gsub_re(String, Position, Regexp, Before, Replacement) ->%% {NewString,Count}.%% gsub_comp(String, Position, Regexp, Before, Replacement) ->%% {NewString,Count}.%% Step forward over String until a match is found saving stepped over%% chars in Before. Call recursively to do rest of string after%% match. Return reversed Before prepended to return from recursive%% call.gsub_re([C|Cs]=S0, P0, RE, Bef, Rep) -> case re_apply(S0, P0, RE) of {match,P0,_S1,_} -> %Ignore 0 length match gsub_re(Cs, P0+1, RE, [C|Bef], Rep); {match,P1,S1,_Gps} -> case gsub_re(S1, P1, RE, [], Rep) of {NewStr,N0} -> %Substituitions {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)), N0+1}; no -> %No substituitions. {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1} end; %%No match so step forward saving C on Bef. nomatch -> gsub_re(Cs, P0+1, RE, [C|Bef], Rep); never_match -> no %No need to go on end;gsub_re([], _P, _RE, _Bef, _Rep) -> no.gsub_comp([C|Cs]=S0, P0, RE, Bef, Rep) -> case comp_apply(S0, P0, RE) of {match,P0,_S1} -> %Ignore 0 length match gsub_comp(Cs, P0+1, RE, [C|Bef], Rep); {match,P1,S1} -> case gsub_comp(S1, P1, RE, [], Rep) of {NewStr,N0} -> %Substituitions {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)), N0+1}; no -> %No substituitions. {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1} end; %%No match so step forward saving C on Bef. nomatch -> gsub_comp(Cs, P0+1, RE, [C|Bef], Rep) end;gsub_comp([], _P, _RE, _Bef, _Rep) -> no.%% split(String, RegExp) -> {ok,[SubString]} | {error,E}.%% Split a string into substrings where the RegExp describes the%% field seperator. The RegExp " " is specially treated.split(String, " ") -> %This is really special {ok,{regexp,RE}} = parse("[ \t]+"), case split_apply_re(String, RE, true) of [[]|Ss] -> {ok,Ss}; Ss -> {ok,Ss} end;split(String, RegExp) when list(RegExp) -> case parse(RegExp) of {ok,{regexp,RE}} -> {ok,split_apply_re(String, RE, false)}; {error,E} -> {error,E} end;split(String, {regexp,RE}) -> {ok,split_apply_re(String, RE, false)};split(String, {comp_regexp,RE}) -> {ok,split_apply_comp(String, RE, false)}.split_apply_re(S, RE, Trim) -> split_apply_re(S, 1, RE, Trim, []).split_apply_re([], _P, _RE, true, []) -> [];split_apply_re([], _P, _RE, _T, Sub) -> [reverse(Sub)];split_apply_re([C|Cs]=S, P0, RE, T, Sub) -> case re_apply(S, P0, RE) of {match,P0,_S1,_} -> %Ignore 0 length match split_apply_re(Cs, P0+1, RE, T, [C|Sub]); {match,P1,S1,_} -> [reverse(Sub)|split_apply_re(S1, P1, RE, T, [])]; nomatch -> split_apply_re(Cs, P0+1, RE, T, [C|Sub]); never_match -> [reverse(Sub, S)] %No need to go on end.split_apply_comp(S, RE, Trim) -> split_apply_comp(S, 1, RE, Trim, []).%%split_apply_comp([], _P, _RE, true, []) -> [];split_apply_comp([], _P, _RE, _T, Sub) -> [reverse(Sub)];split_apply_comp([C|Cs]=S, P0, RE, T, Sub) -> case comp_apply(S, P0, RE) of {match,P0,_S1} -> %Ignore 0 length match split_apply_comp(Cs, P0+1, RE, T, [C|Sub]); {match,P1,S1} -> [reverse(Sub)|split_apply_comp(S1, P1, RE, T, [])]; nomatch -> split_apply_comp(Cs, P0+1, RE, T, [C|Sub]) end.%% sub_match(String, RegExp) ->%% {match,Start,Length,SubExprs} | nomatch | {error,E}.%% Find the longest match of RegExp in String.sub_match(S, RegExp) when list(RegExp) -> case parse(RegExp) of {ok,RE} -> sub_match(S, RE); {error,E} -> {error,E} end;sub_match(S, {regexp,RE}) -> case sub_match_re(RE, S, 1, 0, -1, none) of {Start,Len,Subs} when Len >= 0 -> {match,Start,Len,Subs}; {_Start,_Len,_Subs} -> nomatch end.sub_match_re(RE, S0, Pos0, Mst, Mlen, Msubs) -> case first_match_re(RE, S0, Pos0) of {St,Len,Subs} -> %Found a match Pos1 = St + 1, %Where to start next match S1 = lists:nthtail(Pos1-Pos0, S0), if Len > Mlen -> sub_match_re(RE, S1, Pos1, St, Len, Subs); true -> sub_match_re(RE, S1, Pos1, Mst, Mlen, Msubs) end; nomatch -> {Mst,Mlen,Msubs} end.%% sub_first_match(String, RegExp) ->%% {match,Start,Length,SubExprs} | nomatch | {error,E}.%% Find the longest match of RegExp in String, return Start and Length%% as well as tuple of sub-expression matches.sub_first_match(S, RegExp) when is_list(RegExp) -> {ok,RE} = parse(RegExp), sub_first_match(S, RE);sub_first_match(S, {regexp,RE}) -> case first_match_re(RE, S, 1) of {St,Len,Subs} -> {match,St,Len,Subs}; nomatch -> nomatch end.%% This is the regular expression grammar used. It is equivalent to the%% one used in AWK, except that we allow ^ $ to be used anywhere and fail%% in the matching.%%%% reg -> reg1 : '$1'.%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.%% reg1 -> reg2 : '$1'.%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.%% reg2 -> reg3 : '$1'.%% reg3 -> reg3 "*" : {kclosure,'$1'}.%% reg3 -> reg3 "+" : {pclosure,'$1'}.%% reg3 -> reg3 "?" : {optional,'$1'}.%% reg3 -> reg3 "{" [Min],[Max] "}" : {closure_range, Num, '$1'} see below%% reg3 -> reg4 : '$1'.%% reg4 -> "(" reg ")" : '$2'.%% reg4 -> "\\" char : '$2'.%% reg4 -> "^" : bos.%% reg4 -> "$" : eos.%% reg4 -> "." : char.%% reg4 -> "[" class "]" : {char_class,char_class('$2')}%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}%% reg4 -> "\"" chars "\"" : char_string('$2')%% reg4 -> char : '$1'.%% reg4 -> empty : epsilon.%% The grammar of the current regular expressions. The actual parser%% is a recursive descent implementation of the grammar.reg(S, Sc) -> reg1(S, Sc).%% reg1 -> reg2 reg1'%% reg1' -> "|" reg2%% reg1' -> emptyreg1(S0, Sc0) -> {L,Sc1,S1} = reg2(S0, Sc0), reg1p(S1, L, Sc1).reg1p([$||S0], L, Sc0) -> {R,Sc1,S1} = reg2(S0, Sc0), reg1p(S1, {'or',L,R}, Sc1);reg1p(S, L, Sc) -> {L,Sc,S}.%% reg2 -> reg3 reg2'%% reg2' -> reg3%% reg2' -> emptyreg2(S0, Sc0) -> {L,Sc1,S1} = reg3(S0, Sc0), reg2p(S1, L, Sc1).reg2p([C|S0], L, Sc0) when C /= $|, C /= $) -> {R,Sc1,S1} = reg3([C|S0], Sc0), %% reg2p(S1, {concat,L,R}, Sc1); case is_integer(R) of true -> case L of {literal,Lit} -> reg2p(S1, {literal,Lit ++[R]}, Sc1); {concat,S2,Char} when is_integer(Char) -> reg2p(S1, {concat,S2,{literal,[Char,R]}}, Sc1); {concat,S2,{literal,Lit}} -> reg2p(S1, {concat,S2,{literal,Lit ++ [R]}}, Sc1); Char when is_integer(Char) -> reg2p(S1, {literal,[Char,R]}, Sc1); _ -> reg2p(S1, {concat,L,R}, Sc1) end; false -> reg2p(S1, {concat,L,R}, Sc1) end;reg2p(S, L, Sc) -> {L,Sc,S}.%% reg3 -> reg4 reg3'%% reg3' -> "*" reg3'%% reg3' -> "+" reg3'%% reg3' -> "?" reg3'%% reg3' -> "{" [Min],[Max] "}" reg3'%% reg3' -> emptyreg3(S0, Sc0) -> {L,Sc1,S1} = reg4(S0, Sc0), reg3p(S1, L, Sc1).reg3p([$*|S], L, Sc) -> reg3p(S, {kclosure,L}, Sc);reg3p([$+|S], L, Sc) -> reg3p(S, {pclosure,L}, Sc);reg3p([$?|S], L, Sc) -> reg3p(S, {optional,L}, Sc);reg3p([${|Cs0], L, Sc) -> % $} case interval_range(Cs0) of {none,none,_Cs1} -> parse_error({interval_range,[${|Cs0]}); {N,M,[$}|Cs1]} -> reg3p(Cs1, {iclosure,L,N,M}, Sc); {_N,_M,_Cs1} -> parse_error({unterminated,"{"}) end;reg3p(S, L, Sc) -> {L,Sc,S}.reg4([$(|S0], Sc0) -> Sc1 = Sc0+1, case reg(S0, Sc1) of {R,Sc2,[$)|S1]} -> {{subexpr,Sc1,R},Sc2,S1}; {_R,_Sc,_S} -> parse_error({unterminated,"("}) end;reg4([$^|S], Sc) -> {bos,Sc,S};reg4([$$|S], Sc) -> {eos,Sc,S};reg4([$.|S], Sc) -> {{comp_class,"\n"},Sc,S};reg4("[^" ++ S0, Sc) -> case char_class(S0) of {Cc,[$]|S1]} -> {{comp_class,Cc},Sc,S1}; {_Cc,_S} -> parse_error({unterminated,"["}) end;reg4([$[|S0], Sc) -> case char_class(S0) of {Cc,[$]|S1]} -> {{char_class,Cc},Sc,S1}; {_Cc,_S1} -> parse_error({unterminated,"["}) end;%reg4([$"|S0], Sc) ->% case char_string(S0) of% {St,[$"|S1]} -> {St,Sc,S1};% {St,S1} -> parse_error({unterminated,"\""})% end;reg4([C0|S0], Sc) when is_integer(C0), C0 /= $*, C0 /= $+, C0 /= $?, C0 /= $], C0 /= $), C0 /= $} -> %% Handle \ quoted characters as well, at least those we see. {C1,S1} = char(C0, S0), {C1,Sc,S1};reg4(S=[$)|_], Sc) -> {epsilon,Sc,S};reg4([C|_S], _Sc) -> parse_error({illegal,[C]});
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -