⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 push.re

📁 a little DFA compiler.
💻 RE
字号:
/* *  A push-model scanner example for re2c -f *  Written Mon Apr 11 2005 by mgix@mgix.com *  This file is in the public domain. * */// ----------------------------------------------------------------------#include <fcntl.h>#include <stdio.h>#include <stddef.h>#include <stdlib.h>#include <string.h>#if defined(WIN32)    typedef signed char     int8_t;    typedef signed short    int16_t;    typedef signed int      int32_t;    typedef unsigned char   uint8_t;    typedef unsigned short  uint16_t;    typedef unsigned int    uint32_t;#else    #include <stdint.h>    #include <unistd.h>    #ifndef O_BINARY        #define O_BINARY 0    #endif#endif// ----------------------------------------------------------------------#define TOKENS              \                            \    TOK(kEOF)               \    TOK(kEOL)               \    TOK(kUnknown)           \    TOK(kIdentifier)        \    TOK(kDecimalConstant)   \                            \    TOK(kEqual)             \    TOK(kLeftParen)         \    TOK(kRightParen)        \    TOK(kMinus)             \    TOK(kPlus)              \    TOK(kStar)              \    TOK(kSlash)             \                            \    TOK(kIf)                \    TOK(kFor)               \    TOK(kElse)              \    TOK(kGoto)              \    TOK(kBreak)             \    TOK(kWhile)             \    TOK(kReturn)            \// ----------------------------------------------------------------------static const char *tokenNames[] ={    #define TOK(x) #x,        TOKENS    #undef TOK};// ----------------------------------------------------------------------class PushScanner{public:    enum Token    {        #define TOK(x) x,            TOKENS        #undef TOK    };private:    bool        eof;    int32_t     state;    uint8_t     *limit;    uint8_t     *start;    uint8_t     *cursor;    uint8_t     *marker;    uint8_t     *buffer;    uint8_t     *bufferEnd;    uint8_t     yych;    uint32_t    yyaccept;public:    // ----------------------------------------------------------------------    PushScanner()    {        limit = 0;        start = 0;        state = -1;        cursor = 0;        marker = 0;        buffer = 0;        eof = false;        bufferEnd = 0;    }    // ----------------------------------------------------------------------    ~PushScanner()    {    }    // ----------------------------------------------------------------------    void send(        Token token    )    {        size_t tokenSize = cursor-start;        const char *tokenName = tokenNames[token];        printf(            "scanner is pushing out a token of type %d (%s)",            token,            tokenName        );        if(token==kEOF) putchar('\n');        else        {            size_t tokenNameSize = strlen(tokenNames[token]);            size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);            for(size_t i=0; i<padSize; ++i) putchar(' ');            printf(" : ---->");            fwrite(                start,                tokenSize,                1,                stdout            );            printf("<----\n");        }    }    // ----------------------------------------------------------------------    uint32_t push(        const void  *input,        ssize_t     inputSize    )    {        printf(            "scanner is receiving a new data batch of length %d\n"            "scanner continues with saved state = %d\n",            inputSize,            state        );        /*         * Data source is signaling end of file when batch size         * is less than maxFill. This is slightly annoying because         * maxFill is a value that can only be known after re2c does         * its thing. Practically though, maxFill is never bigger than         * the longest keyword, so given our grammar, 32 is a safe bet.         */        uint8_t null[64];        const ssize_t maxFill = 32;        if(inputSize<maxFill)        {            eof = true;            input = null;            inputSize = sizeof(null);            memset(null, 0, sizeof(null));        }        /*         * When we get here, we have a partially         * consumed buffer which is in the following state:         *                                                                last valid char        last valid buffer spot         *                                                                v                      v         * +-------------------+-------------+---------------+-------------+----------------------+         * ^                   ^             ^               ^             ^                      ^         * buffer              start         marker          cursor        limit                  bufferEnd         *          * We need to stretch the buffer and concatenate the new chunk of input to it         *         */        size_t used = limit-buffer;        size_t needed = used+inputSize;        size_t allocated = bufferEnd-buffer;        if(allocated<needed)        {            size_t limitOffset = limit-buffer;            size_t startOffset = start-buffer;            size_t markerOffset = marker-buffer;            size_t cursorOffset = cursor-buffer;                buffer = (uint8_t*)realloc(buffer, needed);                bufferEnd = needed+buffer;            marker = markerOffset + buffer;            cursor = cursorOffset + buffer;            start = buffer + startOffset;            limit = limitOffset + buffer;        }        memcpy(limit, input, inputSize);        limit += inputSize;        // The scanner starts here        #define YYLIMIT         limit        #define YYCURSOR        cursor        #define YYMARKER        marker        #define YYCTYPE         uint8_t        #define SKIP(x)         { start = cursor; goto yy0; }        #define SEND(x)         { send(x); SKIP();          }        #define YYFILL(n)       { goto fill;                }        #define YYGETSTATE()    state        #define YYSETSTATE(x)   { state = (x);  }    start:        /*!re2c            eol = "\n";            eof = "\000";            digit = [0-9];            integer = digit+;            alpha = [A-Za-z_];            any = [\000-\0377];            space = [ \h\t\v\f\r];            "if"                    { SEND(kIf);             }            "for"                   { SEND(kFor);            }            "else"                  { SEND(kElse);           }            "goto"                  { SEND(kGoto);           }            "break"                 { SEND(kBreak);          }            "while"                 { SEND(kWhile);          }            "return"                { SEND(kReturn);         }            alpha (alpha|digit)*    { SEND(kIdentifier);     }            integer                 { SEND(kDecimalConstant);}            "="                     { SEND(kEqual);          }            "("                     { SEND(kLeftParen);      }            ")"                     { SEND(kRightParen);     }            "-"                     { SEND(kMinus);          }            "+"                     { SEND(kPlus);           }            "*"                     { SEND(kStar);           }            "/"                     { SEND(kSlash);          }            eol                     { SKIP();                }            space                   { SKIP();                }            eof                     { send(kEOF); return 1;  }            any                     { SEND(kUnknown);        }        */    fill:        ssize_t unfinishedSize = cursor-start;        printf(            "scanner needs a refill. Exiting for now with:\n"            "    saved fill state = %d\n"            "    unfinished token size = %d\n",            state,            unfinishedSize        );        if(0<unfinishedSize && start<limit)        {            printf("    unfinished token is :");            fwrite(start, 1, cursor-start, stdout);            putchar('\n');        }        putchar('\n');        /*         * Once we get here, we can get rid of         * everything before start and after limit.         */        if(eof==true) goto start;        if(buffer<start)        {            size_t startOffset = start-buffer;            memmove(buffer, start, limit-start);            marker -= startOffset;            cursor -= startOffset;            limit -= startOffset;            start -= startOffset;        }        return 0;    }};// ----------------------------------------------------------------------int main(    int     argc,    char    **argv){    // Parse cmd line    int input = 0;    if(1<argc)    {        input = open(argv[1], O_RDONLY | O_BINARY);        if(input<0)        {            fprintf(                stderr,                "could not open file %s\n",                argv[1]            );            exit(1);        }    }    /*     * Tokenize input file by pushing batches     * of data one by one into the scanner.     */    const size_t batchSize = 256;    uint8_t buffer[batchSize];    PushScanner scanner;    while(1)    {        ssize_t n = read(input, buffer, batchSize);        scanner.push(buffer, n);        if(n<batchSize) break;    }    scanner.push(0, -1);    close(input);    // Done    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -