📄 lexconst.cpp

📁 lex语法分析
💻 CPP
字号:
// LexConst.cpp: implementation of the CLexConst class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "lex.h"
#include "LexConst.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////


string CTagConst::m_strTags[TAGS_NUM] = {
	"AJ0",
	"AJC",
	"AJS",
	"AT0",
	"AV0",
	"AVP",
	"AVQ",
	"CJC",
	"CJS",
	"CJT",
	"CRD",
	"DPS",
	"DT0",
	"DTQ",
	"EX0",
	"ITJ",
	"NN0",
	"NN1",
	"NN2",
	"NP0",
	"ORD",
	"PNI",
	"PNP",
	"PNQ",
	"PNX",
	"POS",
	"PRF",
	"PRP",
	"PUL",
	"PUN",
	"PUQ",
	"PUR",
	"TO0",
	"UNC",
	"VBB",
	"VBD",
	"VBG",
	"VBI",
	"VBN",
	"VBZ",
	"VDB",
	"VDD",
	"VDG",
	"VDI",
	"VDN",
	"VDZ",
	"VHB",
	"VHD",
	"VHG",
	"VHI",
	"VHN",
	"VHZ",
	"VM0",
	"VVB",
	"VVD",
	"VVG",
	"VVI",
	"VVN",
	"VVZ",
	"XX0",
	"ZZ0"
};
unsigned int CTagConst::GetTagNo(string strTag)
{  //此处采用折半查找法
	unsigned int low,high,mid;
	bool bMatchTag;
	unsigned int nTag;

	if(strTag.length()>3){
		strTag.erase(strTag.begin()+3,strTag.end());
	}
	low  = 0;
	high = TAGS_NUM - 1;
	for(bMatchTag=false; (low<=high)&&!bMatchTag; )
	{
		mid = (low+high) / 2;
		if(strTag.compare(m_strTags[mid]) > 0)
		{
			low = mid + 1;
		}
		else if(strTag.compare(m_strTags[mid]) < 0)
		{
			high = mid - 1;
		}
		else {
			bMatchTag = true;
		}
	}
	// 注意tag是从 1 开始编号的，因为这样可以和
	// CLexBTree中的ClearCell一致
	if(bMatchTag)
		nTag = mid + 1;
	else 
		nTag = CTagConst::tagUNKNOWN;
	return nTag;
}
int CTagConst::GetTagsNum() {
	return TAGS_NUM;
}
int CTagConst::GetSmallSetTagsNum() {
	return SMALLSET_TAGS_NUM;
}

CFusedConst::SFusedWord CFusedConst::aryFusedWord[67] = {
	{ "'d've",          "'d",          "'ve",          ""},
	{ "'tis",           "'t",          "is",           ""},
	{ "'twas",          "'t",          "was",          ""},
	{ "'twere",         "'t",          "were",         ""},
	{ "'twould",        "'t",          "would",        ""},
	{ "I'd've",         "I",           "'d",           "'ve"},
	{ "ain't",          "ai",          "n't",          ""},
	{ "aint",           "ai",          "nt",           ""},
	{ "aintcha",        "ai",          "nt",           "cha"},
	{ "an'all",         "an'",         "all",          ""},
	{ "arent",          "are",         "nt",           ""},
	{ "c'mon",          "c'm",         "on",           ""},
	{ "cannae",         "can",         "nae",          ""},
	{ "can't",          "ca",          "n't",          ""},
	{ "cannot",         "can",         "not",          ""},
	{ "couldnt",        "could",       "nt",           ""},
	{ "d'ya",           "d'",          "ya",           ""},
	{ "d'you",          "d'",          "you",          ""},
	{ "didnt",          "did",         "nt",           ""},
	{ "doesnt",         "does",        "nt",           ""},
	{ "dont",           "do",          "nt",           ""},
	{ "dunnit",         "dun",         "n",            "it"},
	{ "dunno",          "du",          "n",            "no"},
	{ "geroff",         "ger",         "off",          ""},
	{ "gimme",          "gim",         "me",           ""},
	{ "gonna",          "gon",         "na",           ""},
	{ "gorra",          "gor",         "ra",           ""},
	{ "gotta",          "got",         "ta",           ""},
	{ "hadnt",          "had",         "nt",           ""},
	{ "hasnt",          "has",         "nt",           ""},
	{ "havent",         "have",        "nt",           ""},
	{ "he'd've",        "he",          "'d",           "'ve"},
	{ "hes",            "he",          "s",            ""},
	{ "i'd've",         "i",           "'d",           "'ve"},
	{ "innit",          "in",          "n",            "it"},
	{ "isnt",           "is",          "nt",           ""},
	{ "it'd've",        "it",          "'d",           "'ve"},
	{ "lorra",          "lor",         "ra",           ""},
	{ "m'lud",          "m'",          "lud",          ""},
	{ "ought'a",        "ough",        "t",            "'a"},
	{ "oughta",         "ought",       "a",            ""},
	{ "shan't",         "sha",         "n't",          ""},
	{ "she'd've",       "she",         "'d",           "'ve"},
	{ "shes",           "she",         "s",            ""},
	{ "shouldn't've",   "should",      "n't",          "'ve"},
	{ "shouldnt",       "should",      "nt",           ""},
	{ "t'other",        "t'",          "other",        ""},
	{ "thats",          "that",        "s",            ""},
	{ "theres",         "there",       "s",            ""},
	{ "they'd've",      "they",        "'d",           "'ve"},
	{ "theyve",         "they",        "ve",           ""},
	{ "tis",            "t",           "is",           ""},
	{ "twas",           "t",           "was",          ""},
	{ "twere",          "t",           "were",         ""},
	{ "twould",         "t",           "would",        ""},
	{ "t'other",        "t'",          "other",        ""},
	{ "wanna",          "wan",         "na",           ""},
	{ "wannit",         "wann",        "it",           ""},
	{ "wasnae",         "was",         "nae",          ""},
	{ "wasnt",          "was",         "nt",           ""},
	{ "we'd've",        "we",          "'d",           "'ve"},
	{ "werent",         "were",        "nt",           ""},
	{ "weve",           "we",          "ve",           ""},
	{ "won't",          "wo",          "n't",          ""},
	{ "wotta",          "wott",        "a",            ""},
	{ "wouldnt",        "would",       "nt",           ""},
	{ "you'd've",       "you",         "'d",           "'ve"}
};
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -