📄 index.py

📁 libxml,在UNIX/LINUX下非常重要的一个库,为XML相关应用提供方便.目前上载的是最新版本,若要取得最新版本,请参考里面的readme.
💻 PY
📖 第 1 页 / 共 3 页
字号:
    if name == None:        return -1    if id == None:        return -1    c = DB.cursor()    try:	ret = c.execute("""INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s', '%d', '%d')""" %                    (name, id, relevance))    except:        try:	    ret = c.execute("""UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" %                    (relevance, name, id))        except:	    print "Update word archive (%s, %d, %d) failed command" % (name, id, relevance)	    print """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id)	    print sys.exc_type, sys.exc_value	    return -1	         return ret##########################################################################									##                  Word dictionnary and analysis routines		##									############################################################################ top 100 english word without the one len < 3 + own set#dropWords = {    'the':0, 'this':0, 'can':0, 'man':0, 'had':0, 'him':0, 'only':0,    'and':0, 'not':0, 'been':0, 'other':0, 'even':0, 'are':0, 'was':0,    'new':0, 'most':0, 'but':0, 'when':0, 'some':0, 'made':0, 'from':0,    'who':0, 'could':0, 'after':0, 'that':0, 'will':0, 'time':0, 'also':0,    'have':0, 'more':0, 'these':0, 'did':0, 'was':0, 'two':0, 'many':0,    'they':0, 'may':0, 'before':0, 'for':0, 'which':0, 'out':0, 'then':0,    'must':0, 'one':0, 'through':0, 'with':0, 'you':0, 'said':0,    'first':0, 'back':0, 'were':0, 'what':0, 'any':0, 'years':0, 'his':0,    'her':0, 'where':0, 'all':0, 'its':0, 'now':0, 'much':0, 'she':0,    'about':0, 'such':0, 'your':0, 'there':0, 'into':0, 'like':0, 'may':0,    'would':0, 'than':0, 'our':0, 'well':0, 'their':0, 'them':0, 'over':0,    'down':0,    'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0, 'cur':0,}wordsDict = {}wordsDictHTML = {}wordsDictArchive = {}def cleanupWordsString(str):    str = string.replace(str, ".", " ")    str = string.replace(str, "!", " ")    str = string.replace(str, "?", " ")    str = string.replace(str, ",", " ")    str = string.replace(str, "'", " ")    str = string.replace(str, '"', " ")    str = string.replace(str, ";", " ")    str = string.replace(str, "(", " ")    str = string.replace(str, ")", " ")    str = string.replace(str, "{", " ")    str = string.replace(str, "}", " ")    str = string.replace(str, "<", " ")    str = string.replace(str, ">", " ")    str = string.replace(str, "=", " ")    str = string.replace(str, "/", " ")    str = string.replace(str, "*", " ")    str = string.replace(str, ":", " ")    str = string.replace(str, "#", " ")    str = string.replace(str, "\\", " ")    str = string.replace(str, "\n", " ")    str = string.replace(str, "\r", " ")    str = string.replace(str, "\xc2", " ")    str = string.replace(str, "\xa0", " ")    return str    def cleanupDescrString(str):    str = string.replace(str, "'", " ")    str = string.replace(str, "\n", " ")    str = string.replace(str, "\r", " ")    str = string.replace(str, "\xc2", " ")    str = string.replace(str, "\xa0", " ")    l = string.split(str)    str = string.join(str)    return strdef splitIdentifier(str):    ret = []    while str != "":        cur = string.lower(str[0])	str = str[1:]	if ((cur < 'a') or (cur > 'z')):	    continue	while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):	    cur = cur + string.lower(str[0])	    str = str[1:]	while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):	    cur = cur + str[0]	    str = str[1:]	while (str != "") and (str[0] >= '0') and (str[0] <= '9'):	    str = str[1:]	ret.append(cur)    return retdef addWord(word, module, symbol, relevance):    global wordsDict    if word == None or len(word) < 3:        return -1    if module == None or symbol == None:        return -1    if dropWords.has_key(word):        return 0    if ord(word[0]) > 0x80:        return 0    if wordsDict.has_key(word):        d = wordsDict[word]	if d == None:	    return 0	if len(d) > 500:	    wordsDict[word] = None	    return 0	try:	    relevance = relevance + d[(module, symbol)]	except:	    pass    else:        wordsDict[word] = {}    wordsDict[word][(module, symbol)] = relevance    return relevance    def addString(str, module, symbol, relevance):    if str == None or len(str) < 3:        return -1    ret = 0    str = cleanupWordsString(str)    l = string.split(str)    for word in l:	if len(word) > 2:	    ret = ret + addWord(word, module, symbol, 5)    return retdef addWordHTML(word, resource, id, section, relevance):    global wordsDictHTML    if word == None or len(word) < 3:        return -1    if resource == None or section == None:        return -1    if dropWords.has_key(word):        return 0    if ord(word[0]) > 0x80:        return 0    section = cleanupDescrString(section)    if wordsDictHTML.has_key(word):        d = wordsDictHTML[word]	if d == None:	    print "skipped %s" % (word)	    return 0	try:	    (r,i,s) = d[resource]	    if i != None:	        id = i	    if s != None:	        section = s	    relevance = relevance + r	except:	    pass    else:        wordsDictHTML[word] = {}    d = wordsDictHTML[word];    d[resource] = (relevance, id, section)    return relevance    def addStringHTML(str, resource, id, section, relevance):    if str == None or len(str) < 3:        return -1    ret = 0    str = cleanupWordsString(str)    l = string.split(str)    for word in l:	if len(word) > 2:	    try:		r = addWordHTML(word, resource, id, section, relevance)		if r < 0:		    print "addWordHTML failed: %s %s" % (word, resource)		ret = ret + r	    except:		print "addWordHTML failed: %s %s %d" % (word, resource, relevance)		print sys.exc_type, sys.exc_value    return retdef addWordArchive(word, id, relevance):    global wordsDictArchive    if word == None or len(word) < 3:        return -1    if id == None or id == -1:        return -1    if dropWords.has_key(word):        return 0    if ord(word[0]) > 0x80:        return 0    if wordsDictArchive.has_key(word):        d = wordsDictArchive[word]	if d == None:	    print "skipped %s" % (word)	    return 0	try:	    r = d[id]	    relevance = relevance + r	except:	    pass    else:        wordsDictArchive[word] = {}    d = wordsDictArchive[word];    d[id] = relevance    return relevance    def addStringArchive(str, id, relevance):    if str == None or len(str) < 3:        return -1    ret = 0    str = cleanupWordsString(str)    l = string.split(str)    for word in l:        i = len(word)	if i > 2:	    try:		r = addWordArchive(word, id, relevance)		if r < 0:		    print "addWordArchive failed: %s %s" % (word, id)		else:		    ret = ret + r	    except:		print "addWordArchive failed: %s %s %d" % (word, id, relevance)		print sys.exc_type, sys.exc_value    return ret##########################################################################									##                  XML API description analysis				##									##########################################################################def loadAPI(filename):    doc = libxml2.parseFile(filename)    print "loaded %s" % (filename)    return docdef foundExport(file, symbol):    if file == None:        return 0    if symbol == None:        return 0    addFunction(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    return 1     def analyzeAPIFile(top):    count = 0    name = top.prop("name")    cur = top.children    while cur != None:        if cur.type == 'text':	    cur = cur.next	    continue	if cur.name == "exports":	    count = count + foundExport(name, cur.prop("symbol"))	else:	    print "unexpected element %s in API doc <file name='%s'>" % (name)        cur = cur.next    return countdef analyzeAPIFiles(top):    count = 0    cur = top.children            while cur != None:        if cur.type == 'text':	    cur = cur.next	    continue	if cur.name == "file":	    count = count + analyzeAPIFile(cur)	else:	    print "unexpected element %s in API doc <files>" % (cur.name)        cur = cur.next    return countdef analyzeAPIEnum(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    addEnum(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    return 1def analyzeAPIConst(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    addConst(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    return 1def analyzeAPIType(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    addType(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    return 1def analyzeAPIFunctype(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    addFunctype(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    return 1def analyzeAPIStruct(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    addStruct(symbol, file)    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    info = top.prop("info")    if info != None:	info = string.replace(info, "'", " ")	info = string.strip(info)	l = string.split(info)	for word in l:	    if len(word) > 2:		addWord(word, file, symbol, 5)    return 1def analyzeAPIMacro(top):    file = top.prop("file")    if file == None:        return 0    symbol = top.prop("name")    if symbol == None:        return 0    symbol = string.replace(symbol, "'", " ")    symbol = string.strip(symbol)    info = None    cur = top.children    while cur != None:        if cur.type == 'text':	    cur = cur.next	    continue	if cur.name == "info":	    info = cur.content	    break        cur = cur.next    l = splitIdentifier(symbol)    for word in l:	addWord(word, file, symbol, 10)    if info == None:	addMacro(symbol, file)        print "Macro %s description has no <info>" % (symbol)        return 0    info = string.replace(info, "'", " ")    info = string.strip(info)    addMacro(symbol, file, info)    l = string.split(info)    for word in l:	if len(word) > 2:	    addWord(word, file, symbol, 5)    return 1def analyzeAPIFunction(top):
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -