📄 buildhtb.py
字号:
\\normalfont\\ttfamily}% \\item[]} {\\end{list}}""" print >> f, "\\maketitle\n" print >> f, "\\tableofcontents\n" for id in hierarchy.docline: if id != 1000: print >>f, "%s{%s}" % (self.getSection(hierarchy.levels[id]),hierarchy.getEntry(id).getTitle().replace(""","\"")) pagestring = self.pages[id] pagestring = self.expandTagsLaTeX(pagestring, id, hierarchy) print >>f, pagestring print >> f, "\n\\end{document}" f.close() def expandTags(self, pagestring, id, hierarchy, extension): page_anchor_re = re.compile(r'<page_anchor[ \t\n]+ID=\"([0-9]+)">') page_anchor_end_re = re.compile(r'</page_anchor>') index_tag_re = re.compile(r'<index[ \t\n]+term="[^"]+">') tmpstr = page_anchor_end_re.sub("</A>", pagestring) # Take care of page_ref page_ref_re = re.compile(r'<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>') ref_list = re.split(r'(<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>)', tmpstr) tmpstr = ref_list[0] if len(ref_list) == 1: pass else: for index in range(1, len(ref_list), 3): ref_id = int(ref_list[index+1]) ref_title = hierarchy.getEntry(int(ref_id)).getTitle() tmpstr = tmpstr + r'<A HREF="%d%s">%s</A>' % (ref_id, extension, ref_title) if index + 2 < len(ref_list): tmpstr = tmpstr + ref_list[index+2] if extension is None: h2_re = re.compile(r'<[Hh]2>') h2_re_end = re.compile(r'</[Hh]2>') tmpstr = page_anchor_re.sub(r'<A HREF="#\1">', tmpstr) level = hierarchy.levels[id] + 1 tmpstr = h2_re.sub("<h%d>" % level, tmpstr) tmpstr = h2_re_end.sub("</h%d>" % level, tmpstr) else: tmpstr = page_anchor_re.sub(r'<A HREF="\1%s">' % extension, tmpstr) tmpstr = index_tag_re.sub("", tmpstr) tmpstr = tmpstr.replace("{PREBACKSLASH}", "\\") tmpstr = tmpstr.replace("{PREUNDERSCORE}", "_") tmpstr = tmpstr.replace("{PREHASH}", "#") tmpstr = tmpstr.replace("{OPENBRACE}", "{") tmpstr = tmpstr.replace("{PREOPENBRACE}", "{") tmpstr = tmpstr.replace("{CLOSEBRACE}", "}") tmpstr = tmpstr.replace("{PRECLOSEBRACE}", "}") tmpstr = tmpstr.replace("{PRELT}", "<") tmpstr = tmpstr.replace("{PREGT}", ">") tmpstr = tmpstr.replace("{PREAMP}", "&") tmpstr = tmpstr.replace("{PREDOLLAR}","$") tmpstr = tmpstr.replace("{PREPIPE}", "|") return tmpstr def expandTagsLaTeX(self, pagestring, id, hierarchy): # To strip... to_strip_re = re.compile(r'(<index[ \t\n]+term="[^"]+">)|(<A[^>]+>)|(</[Aa]>)|(<page_anchor[ \t\n]+ID=\"([0-9]+)">)|(</page_anchor>)') tmpstr = to_strip_re.sub("", pagestring) # Take care of page_ref page_ref_re = re.compile(r'<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>') ref_list = re.split(r'(<page_ref[ \t\n]+ID=\"([0-9]+)"[ \t\n]*[/]?>)', tmpstr) tmpstr = ref_list[0] if len(ref_list) == 1: pass else: for index in range(1, len(ref_list), 3): ref_id = int(ref_list[index+1]) ref_title = hierarchy.getEntry(int(ref_id)).getTitle() tmpstr = tmpstr + "%s" % ref_title if index + 2 < len(ref_list): tmpstr = tmpstr + ref_list[index+2] # Simple replacements tmpstr = tmpstr.replace(""", "\"") tmpstr = tmpstr.replace("$","{DOLLAR}") tmpstr = tmpstr.replace("\\","$\\backslash$") tmpstr = tmpstr.replace("_","\\_") tmpstr = tmpstr.replace(" ", "\\ ") tmpstr = tmpstr.replace("<", "$<$") tmpstr = tmpstr.replace(">", "$>$") tmpstr = tmpstr.replace("&", "&") tmpstr = tmpstr.replace("&","\&") tmpstr = tmpstr.replace("{PREBACKSLASH}", "\\") tmpstr = tmpstr.replace("{PREUNDERSCORE}","_") tmpstr = tmpstr.replace("#", "\\#") tmpstr = tmpstr.replace("{DOLLAR}", "\\$") tmpstr = tmpstr.replace("{PREAMP}", "&") tmpstr = tmpstr.replace("{PREDOLLAR}","$") tmpstr = tmpstr.replace("^", "\\^") tmpstr = tmpstr.replace("{OPENBRACE}", "\\{") tmpstr = tmpstr.replace("{CLOSEBRACE}", "\\}") tmpstr = tmpstr.replace("|", "$|$") tmpstr = tmpstr.replace("{PREPIPE}", "|") tmpstr = tmpstr.replace("忙", "{\\ae}").replace("酶", "{\\o}").replace("氓", "{\\aa}") tmpstr = tmpstr.replace("脝", "{\\AE}").replace("脴", "{\\O}").replace("脜", "{\\AA}") tmpstr = tmpstr.replace("茅", "{\\'e}").replace("脡", "{\\'E}") tmpstr = tmpstr.replace("脿", "{\\`a}") # IMG p_img_re = re.compile(r'<[Pp]><[Ii][Mm][Gg][^>]+[Ss][Rr][Cc]="([^"]+)"[^>]*></[Pp]>') tmpstr = p_img_re.sub(r'\n\\bigskip\n\\includegraphics[scale=0.5]{\1}\n\\bigskip\n', tmpstr) img_re = re.compile(r'<[Ii][Mm][Gg][^>]+[Ss][Rr][Cc]="([^"]+)"[^>]*>') tmpstr = img_re.sub(r'\\includegraphics[scale=0.5]{\1}', tmpstr) # H2 h2_re = re.compile(r'<[Hh]2>') h2_re_end = re.compile(r'</[Hh]2>') level = hierarchy.levels[id] + 1 tmpstr = h2_re.sub("%s{" % self.getSection(level), tmpstr) tmpstr = h2_re_end.sub("}\n", tmpstr) # H3 h3_re = re.compile(r'<[Hh]3>') h3_re_end = re.compile(r'</[Hh]3>') level = hierarchy.levels[id] + 2 tmpstr = h3_re.sub("%s{" % self.getSection(level), tmpstr) tmpstr = h3_re_end.sub("}\n\n", tmpstr) # BR br_re = re.compile(r'<[Bb][Rr]>') tmpstr = br_re.sub("\\\\newline\n", tmpstr) # PRE preMQLExample_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="MQLExample"[^>]*>([^<]+)</[Pp][Rr][Ee]>') tmpstr = preMQLExample_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr) preCODE_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="code"[^>]*>([^<]+)</[Pp][Rr][Ee]><!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->') tmpstr = preCODE_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr) preINTERFACE_re = re.compile(r'<[Pp][Rr][Ee][ \n\t]+class="interface"[^>]*>([^<]+)</[Pp][Rr][Ee]><!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->') tmpstr = preINTERFACE_re.sub(r'\\begin{verbatim}\n\1\n\\end{verbatim}', tmpstr) pre_re = re.compile(r'<[Pp][Rr][Ee][^>]*>([^<]+)</[Pp][Rr][Ee]>[^<]*<!-- widthincm[^:]*:[^0-9]*([0-9]+)[^-]*-->') tmpstr = pre_re.sub(r'\\begin{minipage}{\2cm}\\begin{verbatim}\n\1\n\\end{verbatim}\\end{minipage}', tmpstr) # UL / OL / LI ul_re = re.compile(r'<[Uu][Ll]>') ul_re_end = re.compile(r'</[Uu][Ll]>') tmpstr = ul_re.sub(r'\\begin{itemize}', tmpstr) tmpstr = ul_re_end.sub(r'\\end{itemize}', tmpstr) ol_re = re.compile(r'<[Oo][Ll]>') ol_re_end = re.compile(r'</[Oo][Ll]>') tmpstr = ol_re.sub(r'\\begin{enumerate}', tmpstr) tmpstr = ol_re_end.sub(r'\\end{enumerate}', tmpstr) il_re = re.compile(r'<[Ll][Ii]>') tmpstr = il_re.sub(r'\n\\item ', tmpstr) # STRONG strong_re = re.compile(r'<[Ss][Tt][Rr][Oo][Nn][Gg]>') strong_re_end = re.compile(r'</[Ss][Tt][Rr][Oo][Nn][Gg]>') tmpstr = strong_re.sub(r'{\\bf ', tmpstr) tmpstr = strong_re_end.sub(r'}', tmpstr) # B b_re = re.compile(r'<[Bb]>') b_re_end = re.compile(r'</[Bb]>') tmpstr = b_re.sub(r'{\\bf ', tmpstr) tmpstr = b_re_end.sub(r'}', tmpstr) # TABLE / TR / TD table_re = re.compile(r'<[Tt][Aa][Bb][Ll][Ee][^>]*>[^<]*<!-- columns[^:]*:[^0-9]*([0-9]+)[^-]*-->') table_re_end = re.compile(r'</[Tt][Aa][Bb][Ll][Ee]>') tr_td_re = re.compile(r'<[Tt][Rr]>[^<]*<[Tt][DdHh]>') td_td_re_end = re.compile(r'[ \n]*</[Tt][DdHh]>[^<]*<[Tt][DdHh]>') td_tr_re_end = re.compile(r'</[Tt][DdHh]>[^<]*</[Tt][Rr]>') tmpstr = table_re.sub(r'\n\\bigskip\n\\begin{tabular}{*{\1}{|l}|}\n\\hline', tmpstr) tmpstr = table_re_end.sub(r'\n\\end{tabular}\n\\bigskip\n', tmpstr) tmpstr = tr_td_re.sub(r'\\begin{minipage}[t]{4cm}', tmpstr) tmpstr = td_td_re_end.sub(r'\\end{minipage} & \\begin{minipage}[t]{4cm}', tmpstr) tmpstr = td_tr_re_end.sub(r'\\end{minipage}\\\\\\hline', tmpstr) # P align="center" # P p_re = re.compile(r'<[Pp]>') p_re_end = re.compile(r'</[Pp]>') p_center_re_remove = re.compile(r'<[Pp][ \t\n]+align="center"[ \t\n]*>') tmpstr = p_re_end.sub(r'\n', tmpstr) tmpstr = p_center_re_remove.sub(r'\n', tmpstr) tmpstr = p_re.sub(r'\n\n', tmpstr) # EM em_re = re.compile(r'<[Ee][Mm]>') em_re_end = re.compile(r'</[Ee][Mm]>') tmpstr = em_re.sub(r'{\\em ', tmpstr) tmpstr = em_re_end.sub(r'}', tmpstr) # BIG big_re = re.compile(r'<[Bb][Ii][Gg]>') big_re_end = re.compile(r'</[Bb][Ii][Gg]>') tmpstr = big_re.sub("{\\Large ", tmpstr) tmpstr = big_re_end.sub("}", tmpstr) # CENTER center_re = re.compile(r'<[Cc][Ee][Nn][Tt][Ee][Rr]>') center_re_end = re.compile(r'</[Cc][Ee][Nn][Tt][Ee][Rr]>') tmpstr = center_re.sub(r'\n\\begin{center}\n', tmpstr) tmpstr = center_re_end.sub(r'\n\\end{center}\n', tmpstr) tmpstr = tmpstr.replace("{PREHASH}", "#").replace("{PRELT}","<").replace("{PREGT}",">").replace("{PREOPENBRACE}","{").replace("{PRECLOSEBRACE}","}") return tmpstr def writeHHC(self, bookname, hierarchy, extension): f = open(bookname + ".hhc", "w") top_level_ids = hierarchy.getImmediateChildren(1000) self.writeHHCList(f, top_level_ids, hierarchy, extension) f.close() def writeHHCList(self, f, idlist, hierarchy, extension): if len(idlist) > 0: print >>f, "<ul>" for id in idlist: print >>f, " <li><object type=\"text/sitemap\">" print >>f, " <param name=\"Name\" value=\"%s\">" % hierarchy.getEntry(id).getTitle() print >>f, " <param name=\"ID\" value=%d>" % id print >>f, " <param name=\"Local\" value=\"%d%s\">" % (id, str(extension)) print >>f, " </object>" id_children = hierarchy.getImmediateChildren(id) self.writeHHCList(f, id_children, hierarchy, extension) print >>f, "</ul>" def writeHHK(self, bookname, hierarchy, extension): self.produceIndex(hierarchy) f = open(bookname + ".hhk", "w") topics = self.myindex.keys() topics.sort() if len(topics) > 0: print >>f, "<ul>" for topic in topics: idlist = self.myindex[topic] idlist.sort() for id in idlist: print >>f, " <li><object type=\"text/sitemap\">" print >>f, " <param name=\"Name\" value=\"%s\">" % topic print >>f, " <param name=\"Local\" value=\"%d%s\">" % (id, extension) print >>f, " </object>" print >>f, "</ul>" f.close() def produceIndex(self, hierarchy): index_tag_re = re.compile(r'<index[ \t\n]+term="([^"]+)">') for pageid in self.pages.keys(): title = hierarchy.getEntry(pageid).getTitle() if title not in self.myindex.keys(): self.myindex[title] = [] self.myindex[title].append(pageid) pagestring = self.pages[pageid] indexterms = index_tag_re.findall(pagestring) if len(indexterms) > 0: for topic in indexterms: if topic not in self.myindex.keys(): self.myindex[topic] = [] self.myindex[topic].append(pageid) def doit(bookname, booktitle, contents_filename, files_filename, purpose): # Read "files.txt" f = open(files_filename) hierarchy = Hierarchy() hierarchy.readFromFile(f) f.close() # Read "Content.txt" f = open(contents_filename) pages = Pages() pages.readFromFile(f) f.close() if purpose == "htb": pages.writeHTMLPages(hierarchy, ".htm", False) pages.writeHHC(bookname, hierarchy, ".htm") pages.writeHHK(bookname, hierarchy, ".htm") elif purpose == "single": pages.writeOneBigHTMLPage(bookname, booktitle, hierarchy) elif purpose == "latex": pages.writeOneBigLaTeXPage(bookname, booktitle, hierarchy) elif purpose == "web": pages.writeHTMLPages(hierarchy, ".html", True) else: raise Exception("Error: Unknown purpose '%s'" % purpose)# Do htb by defaultpurpose = "htb"if len(sys.argv) >= 2: arg1 = sys.argv[1] if arg1[0:2] == "--": purpose = arg1[2:] else: print "Unknown switch: %s" % arg1 sys.exit(1) doit(PDF_PREFIX, DOC_TITLE, "Content.txt", "files.txt", purpose)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -