edoc_scanner.erl

来自「OTP是开放电信平台的简称」· ERL 代码 · 共 359 行

ERL
359
字号
%% ``The contents of this file are subject to the Erlang Public License,%% Version 1.1, (the "License"); you may not use this file except in%% compliance with the License. You should have received a copy of the%% Erlang Public License along with this software. If not, it can be%% retrieved via the world wide web at http://www.erlang.org/.%% %% Software distributed under the License is distributed on an "AS IS"%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See%% the License for the specific language governing rights and%% limitations under the License.%% %% The Initial Developer of the Original Code is Ericsson Utvecklings%% AB. Portions created by Ericsson are Copyright 1999, Ericsson%% Utvecklings AB. All Rights Reserved.''%%%% $Id$%%%% @private%% @copyright Richard Carlsson 2001-2003. Portions created by Ericsson%% are Copyright 1999, Ericsson Utvecklings AB. All Rights Reserved.%% @author Richard Carlsson <richardc@csd.uu.se>%% @see edoc%% @end %% @doc Tokeniser for EDoc. Based on the Erlang standard library module%% {@link //stdlib/erl_scan}.-module(edoc_scanner).%% NOTE: the interface to this module is ancient and should be updated.%% Please do not regard these exported functions as stable. Their%% behaviour is described in the documentation of the module `erl_scan'.%%%% Since there are no `full stop' tokens in EDoc specifications, the%% `tokens' function *always* returns `{more, Continuation}' unless an%% error occurs.-export([string/1,string/2,format_error/1]).-import(lists, [reverse/1]).string(Cs) -> string(Cs, 1).string(Cs, StartPos) ->    case scan(Cs, StartPos) of	{ok,Toks} -> {ok,Toks,StartPos};	{error,E} -> {error,E,StartPos}    end.%% format_error(Error)%%  Return a string describing the error.format_error({string,Quote,Head}) ->    ["unterminated string starting with " ++ io_lib:write_string(Head,Quote)];format_error({illegal,Type}) -> io_lib:fwrite("illegal ~w", [Type]);format_error(char) -> "unterminated character";format_error(scan) -> "premature end";format_error({base,Base}) -> io_lib:fwrite("illegal base '~w'", [Base]);format_error(float) -> "bad float";format_error(Other) -> io_lib:write(Other).%% Reserved words, not atoms:reserved('where') -> true;reserved(_) -> false.%% scan(CharList, StartPos)%%  This takes a list of characters and tries to tokenise them.%%%%  The token list is built in reverse order (in a stack) to save appending%%  and then reversed when all the tokens have been collected. Most tokens%%  are built in the same way.%%%%  Returns:%%	{ok,[Tok]}%%	{error,{ErrorPos,edoc_scanner,What}}scan(Cs, Pos) ->    scan1(Cs, [], Pos).%% scan1(Characters, TokenStack, Position)%%  Scan a list of characters into tokens.scan1([$\n|Cs], Toks, Pos) ->            	        % Newline    scan1(Cs, Toks, Pos+1);scan1([C|Cs], Toks, Pos) when C >= 0, C =< $  -> 	% Skip blanks    scan1(Cs, Toks, Pos);scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z ->	% Unquoted atom    scan_atom(C, Cs, Toks, Pos);scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 ->	% Numbers    scan_number(C, Cs, Toks, Pos);scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 ->	% Signed numbers    scan_signed_number($-, C, Cs, Toks, Pos);scan1([$+,C| Cs], Toks, Pos) when C >= $0, C =< $9 ->	% Signed numbers    scan_signed_number($+, C, Cs, Toks, Pos);scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z ->	% Variables    scan_variable(C, Cs, Toks, Pos);scan1([$_|Cs], Toks, Pos) ->				% Variables    scan_variable($_, Cs, Toks, Pos);scan1([$$|Cs], Toks, Pos) ->			% Character constant    case scan_char_const(Cs, Toks, Pos) of	{ok, Result} ->	    {ok, Result};	{error, truncated_char} ->	    scan_error(char, Pos);	{error, illegal_character} ->	    scan_error({illegal, char}, Pos)    end;scan1([$'|Cs0], Toks, Pos) ->				% Quoted atom    case scan_string(Cs0, $', Pos) of	{S,Cs1,Pos1} ->	    case catch list_to_atom(S) of		A when is_atom(A) ->		    scan1(Cs1, [{atom,Pos,A}|Toks], Pos1);		_Error -> scan_error({illegal,atom}, Pos)	    end;	{error, premature_end} ->	    scan_error({string,$',Cs0}, Pos);	{error, truncated_char} ->	    scan_error(char, Pos);	{error, illegal_character} ->	    scan_error({illegal, atom}, Pos)    end;scan1([$"|Cs0], Toks, Pos) ->				% String    case scan_string(Cs0, $", Pos) of	{S,Cs1,Pos1} ->	    case Toks of		[{string, Pos0, S0} | Toks1] ->		    scan1(Cs1, [{string, Pos0, S0 ++ S} | Toks1],			  Pos1);		_ ->		    scan1(Cs1, [{string,Pos,S}|Toks], Pos1)	    end;	{error, premature_end} ->	    scan_error({string,$",Cs0}, Pos);	{error, truncated_char} ->	    scan_error(char, Pos);	{error, illegal_character} ->	    scan_error({illegal, string}, Pos)    end;%% Punctuation characters and operators, first recognise multiples.scan1([$-,$>|Cs], Toks, Pos) ->    scan1(Cs, [{'->',Pos}|Toks], Pos);scan1([$:,$:|Cs], Toks, Pos) ->    scan1(Cs, [{'::',Pos}|Toks], Pos);scan1([$/,$/|Cs], Toks, Pos) ->    scan1(Cs, [{'//',Pos}|Toks], Pos);scan1([C|Cs], Toks, Pos) ->    % Punctuation character    P = list_to_atom([C]),    scan1(Cs, [{P,Pos}|Toks], Pos);scan1([], Toks0, _Pos) ->    Toks = reverse(Toks0),    {ok,Toks}.%% Note that `_' is not accepted as a variable token.scan_variable(C, Cs, Toks, Pos) ->    {Wcs,Cs1} = scan_name(Cs, []),    W = [C|reverse(Wcs)],    case W of	"_" ->	    scan_error({illegal,token}, Pos);	_ ->	    case catch list_to_atom(W) of		A when is_atom(A) ->		    scan1(Cs1, [{var,Pos,A}|Toks], Pos);		_ ->		    scan_error({illegal,variable}, Pos)	    end    end.scan_atom(C, Cs, Toks, Pos) ->    {Wcs,Cs1} = scan_name(Cs, []),    W = [C|reverse(Wcs)],    case catch list_to_atom(W) of	A when is_atom(A) ->	    case reserved(A) of		true ->		    scan1(Cs1, [{A,Pos}|Toks], Pos);		false ->		    scan1(Cs1, [{atom,Pos,A}|Toks], Pos)	    end;	_ ->	    scan_error({illegal,token}, Pos)    end.%% scan_name(Cs) -> lists:splitwith(fun (C) -> name_char(C) end, Cs).scan_name([C|Cs], Ncs) ->    case name_char(C) of	true ->	    scan_name(Cs, [C|Ncs]);	false ->	    {Ncs,[C|Cs]}		% Must rebuild here, sigh!    end;scan_name([], Ncs) ->    {Ncs,[]}.name_char(C) when C >= $a, C =< $z -> true;name_char(C) when C >= $\337, C =< $\377, C /= $\367 -> true;name_char(C) when C >= $A, C =< $Z -> true;name_char(C) when C >= $\300, C =< $\336, C /= $\327 -> true;name_char(C) when C >= $0, C =< $9 -> true;name_char($_) -> true;name_char($@) -> true;name_char(_) -> false.%% scan_string(CharList, QuoteChar, Pos) ->%%	{StringChars,RestChars, NewPos}scan_string(Cs, Quote, Pos) ->    scan_string(Cs, [], Quote, Pos).scan_string([Quote|Cs], Scs, Quote, Pos) ->    {reverse(Scs),Cs,Pos};scan_string([], _Scs, _Quote, _Pos) ->    {error, premature_end};scan_string(Cs0, Scs, Quote, Pos) ->    case scan_char(Cs0, Pos) of	{C,Cs,Pos1} ->	    %% Only build the string here	    scan_string(Cs, [C|Scs], Quote, Pos1);	Error ->	    Error    end.%% Note that space characters are not allowedscan_char_const([$\040 | _Cs0], _Toks, _Pos) ->    {error, illegal_character};scan_char_const(Cs0, Toks, Pos) ->    case scan_char(Cs0, Pos) of	{C,Cs,Pos1} ->	    scan1(Cs, [{char,Pos,C}|Toks], Pos1);	Error ->	    Error    end.%% {Character,RestChars,NewPos} = scan_char(Chars, Pos)%% Read a single character from a string or character constant. The%% pre-scan phase has checked for errors here.%% Note that control characters are not allowed.scan_char([$\\|Cs], Pos) ->    scan_escape(Cs, Pos);scan_char([C | _Cs], _Pos) when C =< 16#1f ->    {error, illegal_character};scan_char([C|Cs], Pos) ->    {C,Cs,Pos};scan_char([], _Pos) ->    {error, truncated_char}.%% The following conforms to Standard Erlang escape sequences.scan_escape([O1, O2, O3 | Cs], Pos) when        % \<1-3> octal digits  O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->    Val = (O1*8 + O2)*8 + O3 - 73*$0,    {Val,Cs,Pos};scan_escape([O1, O2 | Cs], Pos) when  O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7 ->    Val = (O1*8 + O2) - 9*$0,    {Val,Cs,Pos};scan_escape([O1 | Cs], Pos) when  O1 >= $0, O1 =< $7 ->    {O1 - $0,Cs,Pos};scan_escape([$^, C | Cs], Pos) ->    % \^X -> CTL-X    if C >= $\100, C =< $\137 ->	    {C - $\100,Cs,Pos};       true -> {error, illegal_control_character}    end;scan_escape([C | Cs], Pos) ->    case escape_char(C) of	C1 when C1 > $\000 -> {C1,Cs,Pos};	_ -> {error, undefined_escape_sequence}    end;scan_escape([], _Pos) ->    {error, truncated_char}.%% Note that we return $\000 for undefined escapes.escape_char($b) -> $\010;		% \b = BSescape_char($d) -> $\177;		% \d = DELescape_char($e) -> $\033;		% \e = ESCescape_char($f) -> $\014;		% \f = FFescape_char($n) -> $\012;		% \n = LFescape_char($r) -> $\015;		% \r = CRescape_char($s) -> $\040;		% \s = SPCescape_char($t) -> $\011;		% \t = HTescape_char($v) -> $\013;		% \v = VTescape_char($\\) -> $\134;		% \\ = \escape_char($') -> $\047;		% \' = 'escape_char($") -> $\042;		% \" = "escape_char(_C) -> $\000.%% scan_number(Char, CharList, TokenStack, Pos)%%  We handle sign and radix notation:%%    [+-]<digits>		- the digits in base [+-]10%%    [+-]<digits>.<digits>%%    [+-]<digits>.<digits>E+-<digits>%%    [+-]<digits>#<digits>	- the digits read in base [+-]B%%%%  Except for explicitly based integers we build a list of all the%%  characters and then use list_to_integer/1 or list_to_float/1 to%%  generate the value.%%  SPos == Start position%%  CPos == Current positionscan_number(C, Cs0, Toks, Pos) ->    {Ncs,Cs,Pos1} = scan_integer(Cs0, [C], Pos),    scan_after_int(Cs, Ncs, Toks, Pos, Pos1).scan_signed_number(S, C, Cs0, Toks, Pos) ->    {Ncs,Cs,Pos1} = scan_integer(Cs0, [C, S], Pos),    scan_after_int(Cs, Ncs, Toks, Pos, Pos1).scan_integer([C|Cs], Stack, Pos) when C >= $0, C =< $9 ->    scan_integer(Cs, [C|Stack], Pos);scan_integer(Cs, Stack, Pos) ->    {Stack,Cs,Pos}.scan_after_int([$.,C|Cs0], Ncs0, Toks, SPos, CPos) when C >= $0, C =< $9 ->    {Ncs,Cs,CPos1} = scan_integer(Cs0, [C,$.|Ncs0], CPos),    scan_after_fraction(Cs, Ncs, Toks, SPos, CPos1);	scan_after_int(Cs, Ncs, Toks, SPos, CPos) ->    N = list_to_integer(reverse(Ncs)),    scan1(Cs, [{integer,SPos,N}|Toks], CPos).scan_after_fraction([$E|Cs], Ncs, Toks, SPos, CPos) ->    scan_exponent(Cs, [$E|Ncs], Toks, SPos, CPos);scan_after_fraction([$e|Cs], Ncs, Toks, SPos, CPos) ->    scan_exponent(Cs, [$e|Ncs], Toks, SPos, CPos);scan_after_fraction(Cs, Ncs, Toks, SPos, CPos) ->    case catch list_to_float(reverse(Ncs)) of	N when is_float(N) ->	    scan1(Cs, [{float,SPos,N}|Toks], CPos);	_Error -> scan_error({illegal,float}, SPos)    end.%% scan_exponent(CharList, NumberCharStack, TokenStack, StartPos, CurPos)%%  Generate an error here if E{+|-} not followed by any digits.scan_exponent([$+|Cs], Ncs, Toks, SPos, CPos) ->    scan_exponent1(Cs, [$+|Ncs], Toks, SPos, CPos);scan_exponent([$-|Cs], Ncs, Toks, SPos, CPos) ->    scan_exponent1(Cs, [$-|Ncs], Toks, SPos, CPos);scan_exponent(Cs, Ncs, Toks, SPos, CPos) ->    scan_exponent1(Cs, Ncs, Toks, SPos, CPos).scan_exponent1([C|Cs0], Ncs0, Toks, SPos, CPos) when C >= $0, C =< $9 ->    {Ncs,Cs,CPos1} = scan_integer(Cs0, [C|Ncs0], CPos),    case catch list_to_float(reverse(Ncs)) of	N when is_float(N) ->	    scan1(Cs, [{float,SPos,N}|Toks], CPos1);	_Error -> scan_error({illegal,float}, SPos)    end;scan_exponent1(_, _, _, _, CPos) ->    scan_error(float, CPos).scan_error(In, Pos) ->    {error,{Pos,edoc_scanner,In}}.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?