⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tokenizer.c

📁 COP8 CPU的一个解释型BASIC源码
💻 C
字号:
////////////////////////////////////////////////////////////////
//
//                S C R I P T H E R M
//             A SCRIPTABLE THERMOMETER
// 
// entry of the National Semiconductor COP8FLASH Design Contest
//        submitted by Alberto Ricci Bitti (C) 2001
//              a.riccibitti@ra.nettuno.it
//
//--------------------------------------------------------------
//       FOR A BETTER VIEW SET TAB SIZE=4, INDENT SIZE=4
//--------------------------------------------------------------
// FILE   : tokenizer.c
// PURPOSE: reads ASCII strings from program memory
//          and converts them in 'tokens', each token 
//          corresponding to a language element (e.g.
//          a statement like PRINT, a number, a string...)
//
////////////////////////////////////////////////////////////////


#include <stdio.h>
#include <stdlib.h>
#include "ascii.h"
#include "memory.h"
#include "errors.h"
#include "script.h"
#include "language.h"
#include "tokenizer.h"


unsigned char   token;
ram_pointer_t   token_address;

#define DELIMITERS  "@ :;.,+-<>/*=()&|~^\t\n\r"
#define WHITESPACES " \n\r\t"


// like the standard instr, adapted to work with flash
static char in_str(const unsigned char __cptr *s, unsigned char c)
{
	register char tmp;
	do 
	{
		tmp = *s++;
		if (tmp == c)	return 1;
	} while (tmp != '\0');
	return 0;
}



//consider \0 and all tokens(c >= 0x80) as delimiters
static unsigned char is_delimiter(char c)
{
	return (c == 0 || c & 0x80 ) ? 1 : in_str(DELIMITERS, c);
}



static unsigned char is_white(char c)
{
	return  c == 0 ? 0 : in_str(WHITESPACES, c);
}



static unsigned char is_token(char c)
{
	return  (c==0) || (c & 0x80);
}



unsigned char is_digit(unsigned char c)
{
	return c >= '0' && c <= '9';
}



unsigned char is_alpha(unsigned char c)
{
	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}



unsigned char is_print(unsigned char c)
{
	return (c >= 0x20 && c <= 0x7E);
}



//compares code from flash to the token number i
unsigned char check_token(unsigned char i)
{
    unsigned char n = 0;
	unsigned char tmp;
	while (tmp = dictionary[i].command[n] )
	{   
		if (peek_flash( token_address + n ) != tmp)
		    return 0;
	    n++;
	} 
	return n;
}



//returns the current token and advances program pointer
unsigned char get_token(void)
{	unsigned char i;
	unsigned char n;
    // skip spaces
	while(is_white(peek_flash(prog))) 
		prog++;
    // record token position
	token_address = prog;
	// if already decoded, return token
	if (is_token(peek_flash(prog)))
	    return token = peek_flash(prog++);
    // check for strings
	if(peek_flash(prog) == APEX)
	{
		while(peek_flash(++prog) != APEX)
		{
			ensure(peek_flash(prog) != END_OF_LINE, SYNTAX_ERROR); //unbalanced quotes
		};
		prog++;
		return token = QUOTE;
	};
	// check for immediate number
	if (is_digit(peek_flash(token_address)))
	{
		while( is_digit(peek_flash(prog)) ) 
			prog++;
		return	token = NUMBER;
	};
    //look up token dictionary
	for (i = 0 ; i < DICTIONARY_SIZE ; i++) 
	{	
		if ( n = check_token(i) ) 
		{
			prog += n;
			return(token = dictionary[i].token);
		};
    };
	//not a token: can be number, variable, function
	//go straight to next delimiter
	if (is_alpha(peek_flash(token_address)))
	{
		while( is_alpha(peek_flash(prog)) || is_digit(peek_flash(prog)) ) 
			prog++;
		if (peek_flash(prog) == '(' || peek_flash(prog) == PAR_OPEN || peek_flash(prog) == PAR_VOID) 	
			return token = FUNCTION_NAME;
		else
			return token = VARIABLE_NAME;
	};
	
	//none of above: must be an error
	syntax_error(SYNTAX_ERROR);
	return(0); //just to avoid a warning
}



//cancels last get_token() (cannot be repeated)
void unget_token()
{
	prog = token_address;
}



//checks if two names match (eg. variables of functions)
unsigned char same_token(ram_pointer_t reference, ram_pointer_t  to_check)
{	
	while( !is_delimiter(peek_flash(reference)) )
	{
		if ( peek_flash(reference++) != peek_flash(to_check++) ) return 0;
	};
	return is_delimiter( peek_flash(to_check) );
}


/////////////////////////////////////////////////////////////////////////
// precompiles the program replacing token strings with token codes
// the function is executed from RAM only
// (and in-place, as the resulting program is shorter)
// After compilation, tokens cannot be confused with ASCII strings as 
// they start from 0x80
/////////////////////////////////////////////////////////////////////////

void zip_tokens(void)
{   ram_pointer_t zip;
    zip = prog = PROGRAM_START;
    do
    {
        get_token();
        if (token == QUOTE || token == FUNCTION_NAME || token == VARIABLE_NAME || token == NUMBER)
        {   //copy strings, constants, function names and variables 
        	unsigned char tmp;
            while (token_address != prog)
            {   
                tmp = peek_flash(token_address++);
                poke_ram(zip++, tmp);
            };
            if ( is_white( tmp = peek_flash(prog)))
                poke_ram(zip++, tmp);
        }
        else		
        {	//all other tokens are replaced by the token code
            poke_ram(zip++, token);
        };
    }   
    while (token != END_FILE);
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -