⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexer.c

📁 我搜集到的一个java常用类库的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
static void MapStr(char *str, uint code){    uint i;    while (*str)    {        i = (uint)(*str++);        lexmap[i] |= code;    }}void InitMap(void){    MapStr("\r\n\f", newline|white);    MapStr(" \t", white);    MapStr("-.:_", namechar);    MapStr("0123456789", digit|namechar);    MapStr("abcdefghijklmnopqrstuvwxyz", lowercase|letter|namechar);    MapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", uppercase|letter|namechar);}/* parser for ASP within start tags Some people use ASP for to customize attributes Tidy isn't really well suited to dealing with ASP This is a workaround for attributes, but won't deal with the case where the ASP is used to tailor the attribute value. Here is an example of a work around for using ASP in attribute values:  href="<%=rsSchool.Fields("ID").Value%>" where the ASP that generates the attribute value is masked from Tidy by the quotemarks.*/Node *ParseAsp(Lexer *lexer){    uint c;    Node *asp = null;    lexer->txtstart = lexer->lexsize;    for (;;)    {        c = ReadChar(lexer->in);        AddCharToLexer(lexer, c);        if (c != '%')            continue;        c = ReadChar(lexer->in);        AddCharToLexer(lexer, c);        if (c == '>')            break;    }    lexer->lexsize -= 2;    lexer->txtend = lexer->lexsize;    if (lexer->txtend > lexer->txtstart)        asp = AspToken(lexer);    lexer->txtstart = lexer->txtend;    return asp;}    /* PHP is like ASP but is based upon XML processing instructions, e.g. <?php ... ?>*/Node *ParsePhp(Lexer *lexer){    uint c;    Node *php = null;    lexer->txtstart = lexer->lexsize;    for (;;)    {        c = ReadChar(lexer->in);        AddCharToLexer(lexer, c);        if (c != '?')            continue;        c = ReadChar(lexer->in);        AddCharToLexer(lexer, c);        if (c == '>')            break;    }    lexer->lexsize -= 2;    lexer->txtend = lexer->lexsize;    if (lexer->txtend > lexer->txtstart)        php = PhpToken(lexer);    lexer->txtstart = lexer->txtend;    return php;}   /* consumes the '>' terminating start tags */char  *ParseAttribute(Lexer *lexer, Bool *isempty, Node **asp, Node **php){    int map, start, len = 0;    char *attr;    uint c;    *asp = null;  /* clear asp pointer */    *php = null;  /* clear php pointer */ /* skip white space before the attribute */    for (;;)    {        c = ReadChar(lexer->in);        if (c == '/')        {            c = ReadChar(lexer->in);            if (c == '>')            {                *isempty = yes;                return null;            }            UngetChar(c, lexer->in);            c = '/';            break;        }        if (c == '>')            return null;        if (c =='<')        {            c = ReadChar(lexer->in);            if (c == '%')            {                *asp = ParseAsp(lexer);                return null;            }            else if (c == '?')            {                *php = ParsePhp(lexer);                return null;            }            UngetChar(c, lexer->in);            ReportAttrError(lexer, lexer->token, null, UNEXPECTED_GT);            return null;        }        if (c == '"' || c == '\'')        {            ReportAttrError(lexer, lexer->token, null, UNEXPECTED_QUOTEMARK);            continue;        }        if (c == EndOfStream)        {            ReportAttrError(lexer, lexer->token, null, UNEXPECTED_END_OF_FILE);            UngetChar(c, lexer->in);            return null;        }        map = MAP(c);        if ((map & white) == 0)           break;    }    start = lexer->lexsize;    for (;;)    {     /* but push back '=' for parseValue() */        if (c == '=' || c == '>')        {            UngetChar(c, lexer->in);            break;        }        if (c == '<' || c == EndOfStream)        {            UngetChar(c, lexer->in);            break;        }        map = MAP(c);        if ((map & white) != 0)            break;     /* what should be done about non-namechar characters? */     /* currently these are incorporated into the attr name */        if (!XmlTags && (map & uppercase) != 0)            c += (uint)('a' - 'A');        ++len;        AddCharToLexer(lexer, c);        c = ReadChar(lexer->in);    }    attr = (len > 0 ? wstrndup(lexer->lexbuf+start, len) : null);    lexer->lexsize = start;    return attr;}/* invoked when < is seen in place of attribute value but terminates on whitespace if not ASP, PHP or Tango this routine recognizes ' and " quoted strings*/int ParseServerInstruction(Lexer *lexer){    int c, map, delim = '"';    Bool isrule = no;    c = ReadChar(lexer->in);    AddCharToLexer(lexer, c);    /* check for ASP, PHP or Tango */    if (c == '%' || c == '?' || c == '@')        isrule = yes;    for (;;)    {        c = ReadChar(lexer->in);        if (c == EndOfStream)            break;        if (c == '>')        {            if (isrule)                AddCharToLexer(lexer, c);            else                UngetChar(c, lexer->in);            break;        }        /* if not recognized as ASP, PHP or Tango */        /* then also finish value on whitespace */        if (!isrule)        {            map = MAP(c);            if ((map & white) != 0)                break;        }        AddCharToLexer(lexer, c);        if (c == '"')        {            do            {                c = ReadChar(lexer->in);                AddCharToLexer(lexer, c);            }            while (c != '"');            delim = '\'';            continue;        }        if (c == '\'')        {            do            {                c = ReadChar(lexer->in);                AddCharToLexer(lexer, c);            }            while (c != '\'');        }    }    return delim;}/* values start with "=" or " = " etc. *//* doesn't consume the ">" at end of start tag */char *ParseValue(Lexer *lexer, char *name, Bool foldCase, Bool *isempty, int *pdelim){    int len = 0, start, map;    Bool seen_gt = no;    uint c, lastc, delim, quotewarning;    char *value;    delim = (char) 0;    *pdelim = '"'; /* skip white space before the '=' */    for (;;)    {        c = ReadChar(lexer->in);        if (c == EndOfStream)        {            UngetChar(c, lexer->in);            break;        }        map = MAP(c);        if ((map & white) == 0)           break;    }/*  c should be '=' if there is a value  other legal possibilities are white  space, '/' and '>'*/    if (c != '=')    {        UngetChar(c, lexer->in);        return null;    } /* skip white space after '=' */    for (;;)    {        c = ReadChar(lexer->in);        if (c == EndOfStream)        {            UngetChar(c, lexer->in);            break;        }        map = MAP(c);        if ((map & white) == 0)           break;    } /* check for quote marks */    if (c == '"' || c == '\'')        delim = c;    else if (c == '<')    {        start = lexer->lexsize;        AddCharToLexer(lexer, c);        *pdelim = ParseServerInstruction(lexer);        len = lexer->lexsize - start;        lexer->lexsize = start;        return (len > 0 ? wstrndup(lexer->lexbuf+start, len) : null);    }    else        UngetChar(c, lexer->in); /*   and read the value string   check for quote mark if needed */    quotewarning = 0;    start = lexer->lexsize;    c = '\0';    for (;;)    {        lastc = c;  /* track last character */        c = ReadChar(lexer->in);        if (c == EndOfStream)        {            ReportAttrError(lexer, lexer->token, null, UNEXPECTED_END_OF_FILE);            UngetChar(c, lexer->in);            break;        }        if (delim == (char)0)        {            if (c == '>')            {                UngetChar(c, lexer->in);                break;            }            if (c == '<')            {                /* UngetChar(c, lexer->in); */                ReportAttrError(lexer, lexer->token, null, UNEXPECTED_GT);                /* break; */            }            /*             For cases like <br clear=all/> need to avoid treating /> as             part of the attribute value, however care is needed to avoid             so treating <a href=http://www.acme.com/> in this way, which             would map the <a> tag to <a href="http://www.acme.com"/>            */            if (c == '/')            {                /* peek ahead in case of /> */                c = ReadChar(lexer->in);                if (c == '>' && !IsUrl(name))                {                    *isempty = yes;                    UngetChar(c, lexer->in);                    break;                }                /* unget peeked char */                UngetChar(c, lexer->in);                c = '/';            }        }        else  /* delim is '\'' or '"' */        {            if (c == delim)                break;            /* treat CRLF, CR and LF as single line break */            if (c == '\r')            {                if ((c = ReadChar(lexer->in)) != '\n')                    UngetChar(c, lexer->in);                c = '\n';            }            if (c == '\n' || c == '<' || c == '>')                ++quotewarning;            if (c == '>')                seen_gt = yes;        }        if (c == '&')        {            AddCharToLexer(lexer, c);            ParseEntity(lexer, null);            continue;        }        /*         kludge for JavaScript attribute values         with line continuations in string literals        */        if (c == '\\')        {            c = ReadChar(lexer->in);            if (c != '\n')            {                UngetChar(c, lexer->in);                c = '\\';            }        }        map = MAP(c);        if (map & white)        {            if (delim == (char)0)                break;            c = ' ';            if (lastc == ' ')                continue;        }        else if (foldCase && (map & uppercase) != 0)            c += (uint)('a' - 'A');        AddCharToLexer(lexer, c);    }    if (quotewarning > 10 && seen_gt)    {        /*           there is almost certainly a missing trailling quote mark           as we have see too many newlines, < or > characters.           an exception is made for Javascript attributes and the           javascript URL scheme which may legitimately include < and >        */        if (!IsScript(name) &&            !(IsUrl(name) && wstrncmp(lexer->lexbuf+start, "javascript:", 11) == 0))                ReportError(lexer, null, null, SUSPECTED_MISSING_QUOTE);     }    len = lexer->lexsize - start;    lexer->lexsize = start;    if (len > 0 || del

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -