📄 buildhtb.py

📁 Emdros is a text database middleware-layer aimed at storage and retrieval of "text plus information
💻 PY
📖 第 1 页 / 共 2 页
字号:
上一页 12
        \\normalfont\\ttfamily}%        \\item[]}        {\\end{list}}"""        print >> f, "\\maketitle\n"        print >> f, "\\tableofcontents\n"        for id in hierarchy.docline:            if id != 1000:                print >>f, "%s{%s}" % (self.getSection(hierarchy.levels[id]),hierarchy.getEntry(id).getTitle().replace("&quot;","\""))                pagestring = self.pages[id]                pagestring = self.expandTagsLaTeX(pagestring, id, hierarchy)                print >>f, pagestring        print >> f, "\n\\end{document}"        f.close()    def expandTags(self, pagestring, id, hierarchy, extension):        page_anchor_re = re.compile(r'<page_anchor[ \t\n]+ID=\"([0-9]+)">')        page_anchor_end_re = re.compile(r'</page_anchor>')        index_tag_re = re.compile(r'<index[ \t\n]+term="[^"]+">')        tmpstr = page_anchor_end_re.sub("</A>", pagestring)        # Take care of page_ref        page_ref_re = re.compile(r'<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>')        ref_list = re.split(r'(<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>)', tmpstr)        tmpstr = ref_list[0]        if len(ref_list) == 1:            pass        else:            for index in range(1, len(ref_list), 3):                ref_id = int(ref_list[index+1])                ref_title = hierarchy.getEntry(int(ref_id)).getTitle()                tmpstr = tmpstr + r'<A HREF="%d%s">%s</A>' % (ref_id, extension, ref_title)                if index + 2 < len(ref_list):                    tmpstr = tmpstr + ref_list[index+2]                            if extension is None:            h2_re = re.compile(r'<[Hh]2>')            h2_re_end = re.compile(r'</[Hh]2>')            tmpstr = page_anchor_re.sub(r'<A HREF="#\1">', tmpstr)            level = hierarchy.levels[id] + 1            tmpstr = h2_re.sub("<h%d>" % level, tmpstr)            tmpstr = h2_re_end.sub("</h%d>" % level, tmpstr)        else:            tmpstr = page_anchor_re.sub(r'<A HREF="\1%s">' % extension, tmpstr)        tmpstr = index_tag_re.sub("", tmpstr)        tmpstr = tmpstr.replace("{PREBACKSLASH}", "\\")        tmpstr = tmpstr.replace("{PREUNDERSCORE}", "_")        tmpstr = tmpstr.replace("{PREHASH}", "#")        tmpstr = tmpstr.replace("{OPENBRACE}", "{")        tmpstr = tmpstr.replace("{PREOPENBRACE}", "{")        tmpstr = tmpstr.replace("{CLOSEBRACE}", "}")        tmpstr = tmpstr.replace("{PRECLOSEBRACE}", "}")        tmpstr = tmpstr.replace("{PRELT}", "&lt;")        tmpstr = tmpstr.replace("{PREGT}", "&gt;")	tmpstr = tmpstr.replace("{PREAMP}", "&amp;")	tmpstr = tmpstr.replace("{PREDOLLAR}","$")	tmpstr = tmpstr.replace("{PREPIPE}", "|")        return tmpstr    def expandTagsLaTeX(self, pagestring, id, hierarchy):        # To strip...        to_strip_re = re.compile(r'(<index[ \t\n]+term="[^"]+">)|(<A[^>]+>)|(</[Aa]>)|(<page_anchor[ \t\n]+ID=\"([0-9]+)">)|(</page_anchor>)')        tmpstr = to_strip_re.sub("", pagestring)        # Take care of page_ref        page_ref_re = re.compile(r'<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>')        ref_list = re.split(r'(<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>)', tmpstr)        tmpstr = ref_list[0]        if len(ref_list) == 1:            pass        else:            for index in range(1, len(ref_list), 3):                ref_id = int(ref_list[index+1])                ref_title = hierarchy.getEntry(int(ref_id)).getTitle()                tmpstr = tmpstr + "%s" % ref_title                if index + 2 < len(ref_list):                    tmpstr = tmpstr + ref_list[index+2]        # Simple replacements        tmpstr = tmpstr.replace("&quot;", "\"")	tmpstr = tmpstr.replace("$","{DOLLAR}")	tmpstr = tmpstr.replace("\\","$\\backslash$")	tmpstr = tmpstr.replace("_","\\_")	tmpstr = tmpstr.replace("&nbsp;", "\\ ")	tmpstr = tmpstr.replace("&lt;", "$<$")	tmpstr = tmpstr.replace("&gt;", "$>$")	tmpstr = tmpstr.replace("&amp;", "&")	tmpstr = tmpstr.replace("&","\&")	tmpstr = tmpstr.replace("{PREBACKSLASH}", "\\")	tmpstr = tmpstr.replace("{PREUNDERSCORE}","_")	tmpstr = tmpstr.replace("#", "\\#")	tmpstr = tmpstr.replace("{DOLLAR}", "\\$")	tmpstr = tmpstr.replace("{PREAMP}", "&")	tmpstr = tmpstr.replace("{PREDOLLAR}","$")	tmpstr = tmpstr.replace("^", "\\^")        tmpstr = tmpstr.replace("{OPENBRACE}", "\\{")        tmpstr = tmpstr.replace("{CLOSEBRACE}", "\\}")	tmpstr = tmpstr.replace("|", "$|$")	tmpstr = tmpstr.replace("{PREPIPE}", "|")        tmpstr = tmpstr.replace("忙", "{\\ae}").replace("酶", "{\\o}").replace("氓", "{\\aa}")        tmpstr = tmpstr.replace("脝", "{\\AE}").replace("脴", "{\\O}").replace("脜", "{\\AA}")        tmpstr = tmpstr.replace("茅", "{\\'e}").replace("脡", "{\\'E}")        tmpstr = tmpstr.replace("脿", "{\\`a}")        # IMG        p_img_re = re.compile(r'<[Pp]><[Ii][Mm][Gg][^>]+[Ss][Rr][Cc]="([^"]+)"[^>]*></[Pp]>')        tmpstr = p_img_re.sub(r'\n\\bigskip\n\\includegraphics[scale=0.5]{\1}\n\\bigskip\n', tmpstr)        img_re = re.compile(r'<[Ii][Mm][Gg][^>]+[Ss][Rr][Cc]="([^"]+)"[^>]*>')        tmpstr = img_re.sub(r'\\includegraphics[scale=0.5]{\1}', tmpstr)                # H2        h2_re = re.compile(r'<[Hh]2>')        h2_re_end = re.compile(r'</[Hh]2>')        level = hierarchy.levels[id] + 1        tmpstr = h2_re.sub("%s{" % self.getSection(level), tmpstr)        tmpstr = h2_re_end.sub("}\n", tmpstr)        # H3        h3_re = re.compile(r'<[Hh]3>')        h3_re_end = re.compile(r'</[Hh]3>')        level = hierarchy.levels[id] + 2        tmpstr = h3_re.sub("%s{" % self.getSection(level), tmpstr)        tmpstr = h3_re_end.sub("}\n\n", tmpstr)        # BR        br_re = re.compile(r'<[Bb][Rr]>')        tmpstr = br_re.sub("\\\\newline\n", tmpstr)        # PRE        preMQLExample_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="MQLExample"[^>]*>([^<]+)</[Pp][Rr][Ee]>')        tmpstr = preMQLExample_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr)        preCODE_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="code"[^>]*>([^<]+)</[Pp][Rr][Ee]><!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->')        tmpstr = preCODE_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr)        preINTERFACE_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="interface"[^>]*>([^<]+)</[Pp][Rr][Ee]><!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->')        tmpstr = preINTERFACE_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr)        pre_re = re.compile(r'<[Pp][Rr][Ee][^>]*>([^<]+)</[Pp][Rr][Ee]>[^<]*<!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->')        tmpstr = pre_re.sub(r'\\begin{minipage}{\2cm}\\begin{verbatim}\n\1\n\\end{verbatim}\\end{minipage}', tmpstr)                # UL / OL / LI        ul_re = re.compile(r'<[Uu][Ll]>')        ul_re_end = re.compile(r'</[Uu][Ll]>')        tmpstr = ul_re.sub(r'\\begin{itemize}', tmpstr)        tmpstr = ul_re_end.sub(r'\\end{itemize}', tmpstr)        ol_re = re.compile(r'<[Oo][Ll]>')        ol_re_end = re.compile(r'</[Oo][Ll]>')        tmpstr = ol_re.sub(r'\\begin{enumerate}', tmpstr)        tmpstr = ol_re_end.sub(r'\\end{enumerate}', tmpstr)        il_re = re.compile(r'<[Ll][Ii]>')        tmpstr = il_re.sub(r'\n\\item ', tmpstr)        # STRONG        strong_re = re.compile(r'<[Ss][Tt][Rr][Oo][Nn][Gg]>')        strong_re_end = re.compile(r'</[Ss][Tt][Rr][Oo][Nn][Gg]>')        tmpstr = strong_re.sub(r'{\\bf ', tmpstr)        tmpstr = strong_re_end.sub(r'}', tmpstr)        # B        b_re = re.compile(r'<[Bb]>')        b_re_end = re.compile(r'</[Bb]>')        tmpstr = b_re.sub(r'{\\bf ', tmpstr)        tmpstr = b_re_end.sub(r'}', tmpstr)        # TABLE / TR / TD        table_re = re.compile(r'<[Tt][Aa][Bb][Ll][Ee][^>]*>[^<]*<!-- columns[^:]*:[^0-9]*([0-9]+)[^-]*-->')        table_re_end = re.compile(r'</[Tt][Aa][Bb][Ll][Ee]>')        tr_td_re = re.compile(r'<[Tt][Rr]>[^<]*<[Tt][DdHh]>')        td_td_re_end = re.compile(r'[ \n]*</[Tt][DdHh]>[^<]*<[Tt][DdHh]>')        td_tr_re_end = re.compile(r'</[Tt][DdHh]>[^<]*</[Tt][Rr]>')        tmpstr = table_re.sub(r'\n\\bigskip\n\\begin{tabular}{*{\1}{|l}|}\n\\hline', tmpstr)        tmpstr = table_re_end.sub(r'\n\\end{tabular}\n\\bigskip\n', tmpstr)        tmpstr = tr_td_re.sub(r'\\begin{minipage}[t]{4cm}', tmpstr)        tmpstr = td_td_re_end.sub(r'\\end{minipage} & \\begin{minipage}[t]{4cm}', tmpstr)        tmpstr = td_tr_re_end.sub(r'\\end{minipage}\\\\\\hline', tmpstr)        # P align="center"        # P        p_re = re.compile(r'<[Pp]>')        p_re_end = re.compile(r'</[Pp]>')	p_center_re_remove = re.compile(r'<[Pp][ \t\n]+align="center"[ \t\n]*>')        tmpstr = p_re_end.sub(r'\n', tmpstr)        tmpstr = p_center_re_remove.sub(r'\n', tmpstr)        tmpstr = p_re.sub(r'\n\n', tmpstr)        # EM        em_re = re.compile(r'<[Ee][Mm]>')        em_re_end = re.compile(r'</[Ee][Mm]>')        tmpstr = em_re.sub(r'{\\em ', tmpstr)        tmpstr = em_re_end.sub(r'}', tmpstr)        # BIG        big_re = re.compile(r'<[Bb][Ii][Gg]>')        big_re_end = re.compile(r'</[Bb][Ii][Gg]>')        tmpstr = big_re.sub("{\\Large ", tmpstr)        tmpstr = big_re_end.sub("}", tmpstr)        # CENTER        center_re = re.compile(r'<[Cc][Ee][Nn][Tt][Ee][Rr]>')        center_re_end = re.compile(r'</[Cc][Ee][Nn][Tt][Ee][Rr]>')        tmpstr = center_re.sub(r'\n\\begin{center}\n', tmpstr)        tmpstr = center_re_end.sub(r'\n\\end{center}\n', tmpstr)        tmpstr = tmpstr.replace("{PREHASH}", "#").replace("{PRELT}","<").replace("{PREGT}",">").replace("{PREOPENBRACE}","{").replace("{PRECLOSEBRACE}","}")        return tmpstr    def writeHHC(self, bookname, hierarchy, extension):        f = open(bookname + ".hhc", "w")        top_level_ids = hierarchy.getImmediateChildren(1000)        self.writeHHCList(f, top_level_ids, hierarchy, extension)        f.close()    def writeHHCList(self, f, idlist, hierarchy, extension):        if len(idlist) > 0:            print >>f, "<ul>"            for id in idlist:                print >>f, "  <li><object type=\"text/sitemap\">"                print >>f, "          <param name=\"Name\" value=\"%s\">" % hierarchy.getEntry(id).getTitle()                print >>f, "          <param name=\"ID\" value=%d>" % id                print >>f, "          <param name=\"Local\" value=\"%d%s\">" % (id, str(extension))                print >>f, "      </object>"                id_children = hierarchy.getImmediateChildren(id)                self.writeHHCList(f, id_children, hierarchy, extension)            print >>f, "</ul>"    def writeHHK(self, bookname, hierarchy, extension):        self.produceIndex(hierarchy)        f = open(bookname + ".hhk", "w")        topics = self.myindex.keys()        topics.sort()        if len(topics) > 0:            print >>f, "<ul>"            for topic in topics:                idlist = self.myindex[topic]                idlist.sort()                for id in idlist:                    print >>f, "  <li><object type=\"text/sitemap\">"                    print >>f, "      <param name=\"Name\" value=\"%s\">" % topic                    print >>f, "      <param name=\"Local\" value=\"%d%s\">" % (id, extension)                    print >>f, "      </object>"                                print >>f, "</ul>"        f.close()    def produceIndex(self, hierarchy):        index_tag_re = re.compile(r'<index[ \t\n]+term="([^"]+)">')        for pageid in self.pages.keys():            title = hierarchy.getEntry(pageid).getTitle()            if title not in self.myindex.keys():                self.myindex[title] = []            self.myindex[title].append(pageid)            pagestring = self.pages[pageid]            indexterms = index_tag_re.findall(pagestring)            if len(indexterms) > 0:                for topic in indexterms:                    if topic not in self.myindex.keys():                        self.myindex[topic] = []                    self.myindex[topic].append(pageid)                    def doit(bookname, booktitle, contents_filename, files_filename, purpose):    # Read "files.txt"    f = open(files_filename)    hierarchy = Hierarchy()    hierarchy.readFromFile(f)    f.close()    # Read "Content.txt"    f = open(contents_filename)    pages = Pages()    pages.readFromFile(f)    f.close()    if purpose == "htb":        pages.writeHTMLPages(hierarchy, ".htm", False)        pages.writeHHC(bookname, hierarchy, ".htm")        pages.writeHHK(bookname, hierarchy, ".htm")    elif purpose == "single":        pages.writeOneBigHTMLPage(bookname, booktitle, hierarchy)    elif purpose == "latex":        pages.writeOneBigLaTeXPage(bookname, booktitle, hierarchy)    elif purpose == "web":        pages.writeHTMLPages(hierarchy, ".html", True)    else:        raise Exception("Error: Unknown purpose '%s'" % purpose)# Do htb by defaultpurpose = "htb"if len(sys.argv) >= 2:    arg1 = sys.argv[1]    if arg1[0:2] == "--":        purpose = arg1[2:]    else:	print "Unknown switch: %s" % arg1	sys.exit(1)        doit(PDF_PREFIX, DOC_TITLE, "Content.txt", "files.txt", purpose)
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -