📄 temp.txt
字号:
1582 void getSingleOperator(char c, Token &token); 1583 void keywordLookup(Token &token); 1584 1585 1586 static Token ReservedWords[MAXRESERVED] = { 1587 {IF, "if"}, 1588 {THEN, "then"}, 1589 {ELSE, "else"}, 1590 {WHILE, "while"}, 1591 {DO, "do"}, 1592 {BEGIN, "begin"}, 1593 {END, "end"} 1594 }; 1595 1596 1597 Lexer::Lexer(char *filename){ 1598 buf = new char[TOKENBUFSIZE]; 1599 FILE *fp = fopen(filename, "r"); 1600 index = 0; 1601 if(fp==NULL){ 1602 src = NULL; 1603 printf("\n\n********************************************\n"); 1604 printf("* FATAL ERROR! LEXER COULD NOT OPEN FILE!!!\n"); 1605 printf("* %s : No such file.\n", filename); 1606 printf("********************************************\n\n"); 1607 length = 0; 1608 }else{ 1609 int i = 0; 1610 while(fgetc(fp)!=EOF){ 1611 i++; 1612 } 1613 length = i; 1614 src = new char[i+1]; 1615 1616 rewind(fp); 1617 i = 0; 1618 while(src[i] = fgetc(fp)){ 1619 if(src[i] == EOF){ 1620 src[i] = '\0'; 1621 break; 1622 } 1623 i++; 1624 } 1625 1626 fclose(fp); 1627 } 1628 } 1629 1630 Lexer::~Lexer(){ 1631 delete[] src; 1632 } 1633 1634 void Lexer::reset(){ 1635 index = 0; 1636 } 1637 1638 bool Lexer::isFinished(){ 1639 return (index == length - 1); 1640 } 1641 1642 bool Lexer::isReady(){ 1643 return (src != NULL); 1644 } 1645 1646 char* Lexer::getSrc(){ 1647 return src; 1648 } 1649 1650 int Lexer::getIndex(){ 1651 return index; 1652 } 1653 1654 1655 /***=======================================================****/ 1656 1657 1658 Token Lexer::nextToken(){ 1659 Token token; 1660 ScannerState state = START; 1661 int bufindex = 0; 1662 bool next = true; // index++ 1663 char c; 1664 1665 if(index==length-1){ 1666 token.type = ERROR; 1667 token.name = "NO CHAR LEFT."; 1668 return token; 1669 } 1670 1671 c = src[index]; 1672 while(c==' ' || c=='\n' || c=='\r' || c=='\t'){ 1673 index ++; 1674 c = src[index]; 1675 } 1676 1677 // get started 1678 if((c>='a' && c<='z') || (c>='A' && c<='Z')){ 1679 state = INID; 1680 token.type = ID; 1681 buf[bufindex++] = c; 1682 }else if(c>='0' && c<='9'){ 1683 state = INNUM; 1684 token.type = NUM; 1685 buf[bufindex++] = c; 1686 }else if(c=='='){ 1687 state = INEQ; 1688 }else if(c=='<'){ 1689 state = INLE; 1690 }else if(c=='>'){ 1691 state = INGE; 1692 }else{ 1693 state = DONE; 1694 getSingleOperator(c, token); 1695 } 1696 index ++; 1697 1698 while(state!=DONE){ 1699 c = src[index]; 1700 switch(state){ 1701 case INEQ: 1702 if(c=='='){ 1703 token.type = EQ; 1704 token.name = "=="; 1705 }else{ 1706 token.type = ASSIGN; 1707 token.name = "="; 1708 next = false; 1709 } 1710 state = DONE; 1711 break; 1712 case INLE: 1713 if(c=='='){ 1714 token.type = LE; 1715 token.name = "<="; 1716 }else{ 1717 token.type = LT; 1718 token.name = "<"; 1719 next = false; 1720 } 1721 state = DONE; 1722 break; 1723 case INGE: 1724 if(c=='='){ 1725 token.type = GE; 1726 token.name = ">="; 1727 }else{ 1728 token.type = GT; 1729 token.name = ">"; 1730 next = false; 1731 } 1732 state = DONE; 1733 break; 1734 case INID: 1735 if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9')){ 1736 buf[bufindex++] = c; 1737 }else{ 1738 state = DONE; 1739 next = false; 1740 } 1741 break; 1742 case INNUM: 1743 if(c>='0' && c<='9'){ 1744 buf[bufindex++] = c; 1745 }else{ 1746 state = DONE; 1747 next = false; 1748 } 1749 break; 1750 default: 1751 state = DONE; 1752 token.type = ERROR; 1753 token.name = "ERROR!"; 1754 printf("Error! Because no state is define! This should never happen! \ 1755 Current character is: %c\n", src[index]); 1756 break; 1757 }// end scanner state 1758 index ++; 1759 }// end while 1760 1761 if(next==false){ 1762 index --; 1763 } 1764 1765 if(bufindex == TOKENBUFSIZE - 1){ 1766 // OUT OF BUFFER! It should never happen. 1767 token.type = ERROR; 1768 token.name = "OUT OF BUFFER!"; 1769 } 1770 1771 if(bufindex!=0){ 1772 buf[bufindex] = '\0'; 1773 token.name = new char[bufindex]; 1774 strcpy(token.name, buf); 1775 if(token.type==ID){ // looking for reserved word and set the right type 1776 keywordLookup(token); 1777 } 1778 } 1779 1780 return token; 1781 } 1782 1783 1784 // looking for reserved word and set the right type 1785 void keywordLookup(Token &token){ 1786 for(int i=0;i<MAXRESERVED;i++){ 1787 if(strcmp(token.name, ReservedWords[i].name)==0){ 1788 token.type = ReservedWords[i].type; 1789 if(token.type == BEGIN) 1790 token.name = "{"; 1791 if(token.type == END) 1792 token.name = "}"; 1793 break; 1794 } 1795 } 1796 } 1797 1798 1799 void getSingleOperator(char c, Token &token){ 1800 switch(c){ 1801 case '{': 1802 token.type = BEGIN; 1803 token.name = "{"; 1804 break; 1805 case '}': 1806 token.type = END; 1807 token.name = "}"; 1808 break; 1809 case '+': 1810 token.type = PLUS; 1811 token.name = "+"; 1812 break; 1813 case '*': 1814 token.type = MUL; 1815 token.name = "*"; 1816 break; 1817 case '#': 1818 case '\0': // end of file 1819 token.type = LEXER_DONE; 1820 token.name = "FINISH"; 1821 break; 1822 case '&': 1823 token.type = AND; 1824 token.name = "&"; 1825 break; 1826 case '|': 1827 token.type = OR; 1828 token.name = "|"; 1829 break; 1830 case ';': 1831 token.type = SEMI; 1832 token.name = ";"; 1833 break; 1834 default: 1835 token.type = ERROR; 1836 token.name = "ERROR! Unknown character."; 1837 printf("Error occured when state=START! Current character is: %c\n", c); 1838 break; 1839 } 1840 } 1841 1842 1843 1844 /************************************************** 1845 * file: main.cpp 1846 * author: wuzuyang 1847 * describe: for test 1848 **************************************************/ 1849 1850 #include <stdio.h> 1851 #include "Parser.h" 1852 1853 int main(int argc, char* argv[]){ 1854 char *filename = "test.txt"; 1855 char *fileout = ""; 1856 FILE *fp = NULL; 1857 1858 if(argc > 1){ 1859 filename = argv[1]; 1860 } 1861 1862 if(argc > 2){ 1863 fileout = argv[2]; 1864 fp = fopen(fileout, "w"); 1865 } 1866 1867 Parser parser = Parser(filename); 1868 SyntaxTree *tree = parser.parse(); 1869 1870 if(tree!=NULL){ 1871 printf("\n************= Tree =****************:\n"); 1872 tree->display(); 1873 if(fp != NULL) tree->display(fp); 1874 }else{ 1875 parser.printError(); 1876 } 1877 1878 if(fp != NULL) fclose(fp); 1879 1880 printf("\n"); 1881 1882 return 0; 1883 } 1884 1885 1886 1887 /************************************************** 1888 * file: Parser.cpp 1889 * date: 2006-04-12 1890 * author: wuzuyang 1891 * describe: parser 1892 **************************************************/ 1893 1894 #define TRACE_DEBUG 1895 1896 #ifdef TRACE_DEBUG 1897 #define TRACE(FUNCTION) printf("%-16s token = %s\n", FUNCTION, currentToken.name); 1898 #else 1899 #define TRACE(FUNCTION) 1900 #endif 1901 1902 1903 #include "Parser.h" 1904 #include <stdio.h> 1905 #include <stdlib.h> 1906 1907 Parser::Parser(char* sourcefile){ 1908 lexer = new Lexer(sourcefile); 1909 if(lexer->isReady()) 1910 currentToken = lexer->nextToken(); 1911 } 1912 1913 Parser::~Parser(){ 1914 delete lexer; 1915 } 1916 1917 1918 Token Parser::nextToken(){ 1919 currentToken = lexer->nextToken(); 1920 return currentToken; 1921 } 1922 1923 void Parser::printError(const char *error){ // private method 1924 printf("%s when token = '%s'\n", error, currentToken.name); 1925 } 1926 1927 void Parser::printError(){ // public method 1928 if(lexer->isReady()) 1929 printf("\n*** ERROR BEFORE: ***\n%s", lexer->getSrc()+lexer->getIndex()); 1930 else 1931 printf("Lexer is not ready!\n"); 1932 } 1933 1934 /*==================================================================*/ 1935 1936 SyntaxTree* Parser::parse(){ 1937 SyntaxTree* tree = NULL; 1938 if(lexer->isReady()){ 1939 tree = Statement(); 1940 } 1941 return tree; 1942 } 1943 1944 1945 SyntaxTree* Parser::Statement(){ 1946 TRACE("in Statement();"); 1947 1948 SyntaxTree *tree = NULL; 1949 1950 switch(currentToken.type){ 1951 case LEXER_DONE: 1952 tree = new SyntaxTree(LEXER_DONE); 1953 break; 1954 case ID: 1955 this->nextToken(); 1956 tree = Assign(); 1957 if(tree != NULL){ 1958 tree->addLeft(ID); 1959 } 1960 break; 1961 case WHILE: 1962 this->nextToken(); 1963 tree = While(); 1964 break; 1965 case BEGIN: 1966 this->nextToken(); 1967 tree = Block(); 1968 if(currentToken.type != END){ 1969 tree = NULL; 1970 this->printError("ERROR! begin without end"); 1971 } 1972 //this->nextToken(); 1973 this->currentToken.type = SEMI; 1974 break; 1975 case IF: 1976 this->nextToken(); 1977 tree = Condition(); 1978 break; 1979 default: 1980 tree = NULL; 1981 break; 1982 } 1983 1984 TRACE("out Statement();"); 1985 return tree; 1986 } 1987 1988 1989 SyntaxTree* Parser::Assign(){ 1990 TRACE("in Assign();"); 1991 1992 SyntaxTree *tree = NULL; 1993 1994 if(currentToken.type == ASSIGN){ 1995 this->nextToken(); 1996 SyntaxTree *temptree = Expression(); 1997 1998 if(temptree != NULL){ 1999 tree = new SyntaxTree(ASSIGN); 2000 tree->addRight(temptree); 2001 } 2002 }else{ 2003 this->printError("ERROR! Assignment statement expects '=';"); 2004 return NULL; 2005 } 2006 2007 TRACE("out Assign();"); 2008 return tree; 2009 } 2010 2011 2012 SyntaxTree* Parser::Expression(){ 2013 TRACE("in Expression();"); 2014 2015 SyntaxTree *temp = NULL; 2016 SyntaxTree *tree = T(); 2017 2018 if(tree == NULL){ 2019 return NULL; 2020 } 2021 2022 while(currentToken.type == PLUS){ 2023 temp = new SyntaxTree(PLUS); 2024 temp->addLeft(tree); 2025 tree = temp; 2026 this->nextToken(); 2027 2028 temp = T(); 2029 if(temp != NULL){ 2030 tree->addRight(temp); 2031 }else{ 2032 this->printError("ERROR! Behind '+';"); 2033 return NULL; 2034 } 2035 } 2036 2037 return tree; 2038 } 2039 2040 2041 SyntaxTree* Parser::T(){ 2042 SyntaxTree *temp = NULL; 2043 SyntaxTree *tree = F(); 2044 2045 if(tree == NULL){ 2046 return NULL; 2047 } 2048 2049 while(currentToken.type == MUL){ 2050 temp = new SyntaxTree(MUL); 2051 temp->addLeft(tree); 2052 tree = temp; 2053 this->nextToken(); 2054 2055 temp = F(); 2056 if(temp != NULL){ 2057 tree->addRight(temp); 2058 }else{ 2059 this->printError("ERROR in T();"); 2060 return NULL; 2061 } 2062 } 2063 2064 return tree; 2065 } 2066 2067 2068 SyntaxTree* Parser::F(){ 2069 SyntaxTree *tree = NULL; 2070 2071 if(currentToken.type == ID){ 2072 tree = new SyntaxTree(ID); 2073 }else if(currentToken.type == NUM){ 2074 tree = new SyntaxTree(NUM, atoi(currentToken.name)); 2075 }else{ 2076 this->printError("ERROR! in F();"); 2077 } 2078 2079 this->nextToken(); 2080 2081 return tree; 2082 } 2083 2084 /*==================================================================*/ 2085 2086 SyntaxTree* Parser::Boolean(){ 2087 TRACE("in Boolean();"); 2088 2089 SyntaxTree *temp = NULL; 2090 SyntaxTree *tree = T2(); 2091 2092 if(tree == NULL){ 2093 return NULL; 2094 } 2095 2096 while(currentToken.type == OR){ 2097 temp = new SyntaxTree(OR); 2098 temp->addLeft(tree); 2099 tree = temp; 2100 this->nextToken(); 2101 2102 temp = T2(); 2103 if(temp != NULL){ 2104 tree->addRight(temp); 2105 }else{ 2106 this->printError("ERROR! behind OR;"); 2107 return NULL; 2108 }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -