📄 xmerl_regexp.erl

📁 OTP是开放电信平台的简称
💻 ERL
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
    {NFA2,N2,E2};build_nfa({kclosure,RE}, N0, S, NFA0) ->    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),    E = N1,    {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],     N1+1,E};build_nfa({pclosure,RE}, N0, S, NFA0) ->    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),    E = N1,    {[#nfa_state{no=S,edges=[{epsilon,N0}]},      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],     N1+1,E};build_nfa({optional,RE}, N0, S, NFA0) ->    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),    E = N1,    {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},      #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1],     N1+1,E};build_nfa({iclosure,RE,I1,I2}, N, S, NFA) ->    Exp = expand_iclosure(RE, I1, I2),    build_nfa(Exp, N, S, NFA);build_nfa({char_class,Cc}, N, S, NFA) ->    {[#nfa_state{no=S,edges=[{nfa_char_class(Cc),N}]}|NFA],N+1,N};build_nfa({comp_class,Cc}, N, S, NFA) ->    {[#nfa_state{no=S,edges=[{nfa_comp_class(Cc),N}]}|NFA],N+1,N};build_nfa(epsilon, N, S, NFA) ->    {NFA,N,S};build_nfa({group,RE}, N, S, NFA) ->		%%% FIXME %%%%%%%    build_nfa(RE, N, S, NFA);build_nfa({subexpr,_N,RE}, N, S, NFA) ->	%%% FIXME %%%%%%%    build_nfa(RE, N, S, NFA);build_nfa(bos, N, S, NFA) ->    {[#nfa_state{no=S,edges=[{[bos],N}]}|NFA],N+1,N};build_nfa(eos, N, S, NFA) ->    {[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};%%{[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};build_nfa(C, N, S, NFA) when integer(C) ->    {[#nfa_state{no=S,edges=[{[{C,C}],N}]}|NFA],N+1,N}.nfa_char_class(Cc) ->    Crs = lists:foldl(fun({C1,C2}, Set) -> add_element({C1,C2}, Set);			 (C, Set) -> add_element({C,C}, Set) end, [], Cc),    %% io:fwrite("cc: ~p\n", [Crs]),    pack_crs(Crs).pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 ->    %% C1      C2    %%   C3  C4    pack_crs([Cr|Crs]);pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 ->    %% C1    C2    %%    C3   C4    pack_crs([{C1,C4}|Crs]);pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 ->    %% C1   C2    %%        C3  C4    pack_crs([{C1,C4}|Crs]);pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)];pack_crs([]) -> [].nfa_comp_class(Cc) ->    Crs = nfa_char_class(Cc),    %% io:fwrite("comp: ~p\n", [Crs]),    comp_crs(Crs, 0).comp_crs([{C1,C2}|Crs], Last) ->    [{Last,C1-1}|comp_crs(Crs, C2+1)];comp_crs([], Last) -> [{Last,maxchar}].%% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}.%%  Build a DFA from an NFA using "subset construction". The major%%  difference from the book is that we keep the marked and unmarked%%  DFA states in seperate lists. New DFA states are added to the%%  unmarked list and states are marked by moving them to the marked%%  list. We assume that the NFA accepting state numbers are in%%  ascending order for the rules and use ordsets to keep this order.build_dfa(NFA0, Start) ->    %% We want NFA as sorted tuple for fast access, assume lowest state 1.    NFA1 = list_to_tuple(keysort(#nfa_state.no, NFA0)),    D = #dfa_state{no=0,nfa=eclosure([Start], NFA1),accept=no},    {build_dfa([D], 1, [], NFA1),0}.%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.%%  Traverse the unmarked states. Temporarily add the current unmarked%%  state to the marked list before calculating translation, this is%%  to avoid adding too many duplicate states. Add it properly to the%%  marked list afterwards with correct translations.build_dfa([U|Us0], N0, Ms, NFA) ->    {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),    M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},    build_dfa(Us1, N1, [M|Ms], NFA);build_dfa([], _N, Ms, _NFA) -> Ms.%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->%%	{Transitions,UnmarkedStates,NextState}.%%  Foreach NFA state set calculate the legal translations. N.B. must%%  search *BOTH* the unmarked and marked lists to check if DFA state%%  already exists. As the range of characters is potentially VERY%%  large we cannot explicitly test all characters. Instead we first%%  calculate the set of all disjoint character ranges which are%%  possible candidates to the set of NFA states.build_dfa(Set, Us, N, Ts, Ms, NFA) ->    %% List of all transition sets.    Crs0 = [Cr || S <- Set,		  {Crs,_St} <- (element(S, NFA))#nfa_state.edges,		  list(Crs),		  Cr <- Crs ],    Crs1 = lists:usort(Crs0),			%Must remove duplicates!    %% Build list of disjoint test ranges.    Test = disjoint_crs(Crs1),    %% io:fwrite("bd: ~p\n    ~p\n    ~p\n    ~p\n", [Set,Crs0,Crs1,Test]),    build_dfa(Test, Set, Us, N, Ts, Ms, NFA).%% disjoint_crs([CharRange]) -> [CharRange].%%  Take a sorted list of char ranges and make a sorted list of%%  disjoint char ranges. No new char range extends past an existing%%  char range.disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 ->    %% C1  C2    %%        C3  C4    [Cr1|disjoint_crs([Cr2|Crs])];disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 ->    %% C1     C2    %% C3       C4    [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))];disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 ->    %% C1     C2    %%    C3     C4    [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))];disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 ->    %% C1      C2    %%    C3   C4    [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))];disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 ->    %% C1        C2    %%    C3   C4    [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))];disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)];disjoint_crs([]) -> [].build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) ->    case eclosure(move(Set, Cr, NFA), NFA) of	S when S /= [] ->	    case keysearch(S, #dfa_state.nfa, Us) of		{value,#dfa_state{no=T}} ->		    build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);		false ->		    case keysearch(S, #dfa_state.nfa, Ms) of			{value,#dfa_state{no=T}} ->			    build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);			false ->			    U = #dfa_state{no=N,nfa=S},			    build_dfa(Crs, Set, [U|Us], N+1, [{Cr,N}|Ts], Ms, NFA)		    end	    end;	[] ->	    build_dfa(Crs, Set, Us, N, Ts, Ms, NFA)    end;build_dfa([], _Set, Us, N, Ts, _Ms, _NFA) ->    {Ts,Us,N}.   %% eclosure([State], NFA) -> [State].%% move([State], Char, NFA) -> [State].%%  These are straight out of the book. As eclosure uses ordsets then%%  the generated state sets are in ascending order.eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).eclosure([St|Sts], NFA, Ec) ->    #nfa_state{edges=Es} = element(St, NFA),    eclosure([ N || {epsilon,N} <- Es,		    not is_element(N, Ec) ] ++ Sts,	     NFA, add_element(St, Ec));eclosure([], _NFA, Ec) -> Ec.move(Sts, Cr, NFA) ->    [ St || N <- Sts,	    {Crs,St} <- (element(N, NFA))#nfa_state.edges,	    list(Crs),%% 	    begin%% 		io:fwrite("move1: ~p\n", [{Sts,Cr,Crs,in_crs(Cr,Crs)}]),%% 		true%% 	    end,	    in_crs(Cr, Crs) ].in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true;in_crs(Cr, [Cr|_Crs]) -> true;			%Catch bos and eos.in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs);in_crs(_Cr, []) -> false.%% accept([State], NFA) -> true | false.%%  Scan down the state list until we find an accepting state.accept([St|Sts], NFA) ->    case element(St, NFA) of	#nfa_state{accept={yes,A}} -> {yes,A};	#nfa_state{accept=no} -> accept(Sts, NFA)    end;accept([], _NFA) -> no.%% minimise_dfa(DFA, StartState, FirstState) -> {DFA,StartState}.%%  Minimise the DFA by removing equivalent states. We consider a%%  state if both the transitions and the their accept state is the%%  same.  First repeatedly run throught the DFA state list removing%%  equivalent states and updating remaining transitions with%%  remaining equivalent state numbers. When no more reductions are%%  possible then pack the remaining state numbers to get consecutive%%  states.minimise_dfa(DFA0, Start, N) ->    case min_dfa(DFA0) of	{DFA1,[]} ->				%No reduction!	    {DFA2,Rs} = pack_dfa(DFA1, N),	    {min_update(DFA2, Rs),min_new_state(Start, Rs)};	{DFA1,Rs} ->	    minimise_dfa(min_update(DFA1, Rs), min_new_state(Start, Rs), N)    end.min_dfa(DFA) -> min_dfa(DFA, [], []).min_dfa([D|DFA0], Rs0, MDFA) ->    {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept, 			    D#dfa_state.no, Rs0, []),    min_dfa(DFA1, Rs1, [D|MDFA]);min_dfa([], Rs, MDFA) -> {MDFA,Rs}.min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->    min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->    min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);min_delete([], _T, _A, _NewN, Rs, MDFA) -> {MDFA,Rs}.min_update(DFA, Rs) ->    [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].min_update_trans(Tr, Rs) ->    [ {C,min_new_state(S, Rs)} || {C,S} <- Tr ].min_new_state(Old, [{Old,New}|_Reds]) -> New;min_new_state(Old, [_R|Reds]) -> min_new_state(Old, Reds);min_new_state(Old, []) -> Old.pack_dfa(DFA, N) -> pack_dfa(DFA, N, [], []).pack_dfa([D|DFA], NewN, Rs, PDFA) ->    pack_dfa(DFA, NewN+1, [{D#dfa_state.no,NewN}|Rs],	     [D#dfa_state{no=NewN}|PDFA]);pack_dfa([], _NewN, Rs, PDFA) -> {PDFA,Rs}.%% comp_apply(String, StartPos, DFAReg) -> {match,RestPos,Rest} | nomatch.%% Apply the DFA of a regular expression to a string.  If%%  there is a match return the position of the remaining string and%%  the string if else return 'nomatch'.%%%%  StartPos should be the real start position as it is used to decide%%  if we are at the beginning of the string.comp_apply(Cs, P, {DFA,Start,_Fail}) ->    comp_apply(element(Start, DFA), Cs, P, DFA, nomatch).comp_apply(#c_state{spec=[]}=St, Cs, P, DFA, Accept) ->    comp_apply_tr(St, Cs, P, DFA, Accept);comp_apply(#c_state{spec=Sp}=St, Cs, P, DFA, Accept) ->    comp_apply_sp(St, Cs, P, DFA, Accept, Sp).comp_apply_tr(#c_state{trans=none,accept=A}, Cs, P, _DFA, Accept) ->    %% End state.    accept_value(A, Cs, P, Accept);comp_apply_tr(#c_state{trans=Tr,tmin=Tmin,smin=Smin,tmax=Tmax,smax=Smax,accept=A},	      [C|Cs]=Cs0, P, DFA, Accept) ->    %% Get the next state number to go to.    NextSt = if  C =< Tmin -> Smin;		%Below transition table		 C >= Tmax -> Smax;		%Above transition table		 true ->			%Otherwise use table 		     element(C - Tmin, Tr)	     end,    comp_apply(element(NextSt, DFA), Cs, P+1, DFA,	       accept_value(A, Cs0, P, Accept));comp_apply_tr(#c_state{trans=_Tr,accept=A}, [], P, _DFA, Accept) ->    accept_value(A, [], P, Accept).comp_apply_sp(_St, Cs, 1, DFA, Accept, [{bos,S}|_]) ->    comp_apply(element(S, DFA), Cs, 1, DFA, Accept);comp_apply_sp(_St, [$\n], P, DFA, Accept, [{eos,S}|_]) ->    comp_apply(element(S, DFA), [], P, DFA, Accept);comp_apply_sp(_St, [], P, DFA, Accept, [{eos,S}|_]) ->    comp_apply(element(S, DFA), [], P, DFA, Accept);comp_apply_sp(St, Cs, P, DFA, Accept, [_|Sp]) ->    comp_apply_sp(St, Cs, P, DFA, Accept, Sp);comp_apply_sp(St, Cs, P, DFA, Accept, []) ->    comp_apply_tr(St, Cs, P, DFA, Accept).    accept_value(true, Cs, P, _Accept) -> {match,P,Cs};accept_value(false, _Cs, _P, Accept) -> Accept.%% compile(RegExp) -> {ok,RE} | {error,E}.%%  Parse the regexp described in the string RegExp.compile(RegExp) ->    case make_dfa([{RegExp,yes}], 2) of	{ok,{DFA0,Start}} ->	    Fail = 1,	    DFA1 = [#dfa_state{no=Fail,accept=no,trans=[]}|DFA0],	    DFA = tuplelise_dfa(DFA1, 1),	    {ok,{comp_regexp,{DFA,Start,Fail}}};	{error,E} -> {error,E}    end.%% tuplelise_dfa(DFAstates, NoAcceptState) -> {{CompState},FirstState}.tuplelise_dfa(DFA0, NoAccept) ->    DFA1 = map(fun (#dfa_state{no=N,trans=Ts,accept=A}) ->		       {Tr,Tmin,Smin,Tmax,Smax,Sp} = build_trans(Ts, NoAccept),		       #c_state{no=N,trans=Tr,tmin=Tmin,smin=Smin,				tmax=Tmax,smax=Smax,				accept=fix_accept(A),spec=Sp}	       end, DFA0),    list_to_tuple(keysort(#dfa_state.no, DFA1)).build_trans(Ts0, NoAccept) ->    %% Split transitions into character ranges and specials.    {Ts1,Sp1} = foldl(fun ({{_,_},_}=T, {Ts,Sp}) -> {[T|Ts],Sp};			  ({_,_}=T, {Ts,Sp}) -> {Ts,[T|Sp]}		      end, {[],[]}, Ts0),    if Ts1 == [] ->	    {none,none,none,none,none,Sp1};       true ->	    %% Have transitions, convert to tuple.	    Ts2 = keysort(1, Ts1),	    {Tmin,Smin,Ts3} = min_trans(Ts2, NoAccept),	    %% io:fwrite("exptr: ~p\n", [{Ts3,Tmin}]),	    {Trans,Tmax,Smax} = expand_trans(Ts3, Tmin, NoAccept),	    {list_to_tuple(Trans),Tmin,Smin,Tmax,Smax,Sp1}    end.   min_trans([{{0,C2},S}|Crs], _Def) -> {C2,S,Crs};min_trans([{{C1,_C2},_S}|_]=Crs, Def) -> {C1-1,Def,Crs}.expand_trans([{{C1,maxchar},S}], Last, Def) ->    Trs = duplicate(C1-(Last+1), Def),    {Trs,C1,S};expand_trans([{{C1,C2},S}], Last, Def) ->    Trs = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S),    {Trs,C2+1,Def};expand_trans([{{C1,C2},S}|Crs], Last, Def) ->    {Trs0,Tmax,Smax} = expand_trans(Crs, C2, Def),    Trs1 = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S) ++ Trs0,    {Trs1,Tmax,Smax}.fix_accept({yes,_}) -> true;fix_accept(no) -> false.
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -