bluecloth.rb
来自「用ruby on rails写的一个博客程序,还不错..ruby on rail」· RB 代码 · 共 1,145 行 · 第 1/2 页
RB
1,145 行
#!/usr/bin/ruby# # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion# tool.# # == Synopsis# # doc = BlueCloth::new "# ## Test document #### Just a simple test.# "## puts doc.to_html# # == Authors# # * Michael Granger <ged@FaerieMUD.org># # == Contributors## * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions# * Florian Gross <flgr@ccan.de> - Filter options, suggestions## == Copyright## Original version:# Copyright (c) 2003-2004 John Gruber# <http://daringfireball.net/> # All rights reserved.## Ruby port:# Copyright (c) 2004 The FaerieMUD Consortium.# # BlueCloth is free software; you can redistribute it and/or modify it under the# terms of the GNU General Public License as published by the Free Software# Foundation; either version 2 of the License, or (at your option) any later# version.# # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR# A PARTICULAR PURPOSE. See the GNU General Public License for more details.# # == To-do## * Refactor some of the larger uglier methods that have to do their own# brute-force scanning because of lack of Perl features in Ruby's Regexp# class. Alternately, could add a dependency on 'pcre' and use most Perl# regexps.## * Put the StringScanner in the render state for thread-safety.## == Version## $Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $# require 'digest/md5'require 'logger'require 'strscan'### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion### tool.class BlueCloth < String ### Exception class for formatting errors. class FormatError < RuntimeError ### Create a new FormatError with the given source +str+ and an optional ### message about the +specific+ error. def initialize( str, specific=nil ) if specific msg = "Bad markdown format near %p: %s" % [ str, specific ] else msg = "Bad markdown format near %p" % str end super( msg ) end end # Release Version Version = '0.0.3' # SVN Revision SvnRev = %q$Rev: 69 $ # SVN Id tag SvnId = %q$Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $ # SVN URL SvnUrl = %q$URL: svn+ssh://svn.faeriemud.org/usr/local/svn/BlueCloth/trunk/lib/bluecloth.rb $ # Rendering state struct. Keeps track of URLs, titles, and HTML blocks # midway through a render. I prefer this to the globals of the Perl version # because globals make me break out in hives. Or something. RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log ) # Tab width for #detab! if none is specified TabWidth = 4 # The tag-closing string -- set to '>' for HTML EmptyElementSuffix = "/>"; # Table of MD5 sums for escaped characters EscapeTable = {} '\\`*_{}[]()#.!'.split(//).each {|char| hash = Digest::MD5::hexdigest( char ) EscapeTable[ char ] = { :md5 => hash, :md5re => Regexp::new( hash ), :re => Regexp::new( '\\\\' + Regexp::escape(char) ), } } ################################################################# ### I N S T A N C E M E T H O D S ################################################################# ### Create a new BlueCloth string. def initialize( content="", *restrictions ) @log = Logger::new( $deferr ) @log.level = $DEBUG ? Logger::DEBUG : ($VERBOSE ? Logger::INFO : Logger::WARN) @scanner = nil # Add any restrictions, and set the line-folding attribute to reflect # what happens by default. @filter_html = nil @filter_styles = nil restrictions.flatten.each {|r| __send__("#{r}=", true) } @fold_lines = true super( content ) @log.debug "String is: %p" % self end ###### public ###### # Filters for controlling what gets output for untrusted input. (But really, # you're filtering bad stuff out of untrusted input at submission-time via # untainting, aren't you?) attr_accessor :filter_html, :filter_styles # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax, # so this isn't used by anything. attr_accessor :fold_lines ### Render Markdown-formatted text in this string object as HTML and return ### it. The parameter is for compatibility with RedCloth, and is currently ### unused, though that may change in the future. def to_html( lite=false ) # Create a StringScanner we can reuse for various lexing tasks @scanner = StringScanner::new( '' ) # Make a structure to carry around stuff that gets placeholdered out of # the source. rs = RenderState::new( {}, {}, {} ) # Make a copy of the string with normalized line endings, tabs turned to # spaces, and a couple of guaranteed newlines at the end text = self.gsub( /\r\n?/, "\n" ).detab text += "\n\n" @log.debug "Normalized line-endings: %p" % text # Filter HTML if we're asked to do so if self.filter_html text.gsub!( "<", "<" ) text.gsub!( ">", ">" ) @log.debug "Filtered HTML: %p" % text end # Simplify blank lines text.gsub!( /^ +$/, '' ) @log.debug "Tabs -> spaces/blank lines stripped: %p" % text # Replace HTML blocks with placeholders text = hide_html_blocks( text, rs ) @log.debug "Hid HTML blocks: %p" % text @log.debug "Render state: %p" % rs # Strip link definitions, store in render state text = strip_link_definitions( text, rs ) @log.debug "Stripped link definitions: %p" % text @log.debug "Render state: %p" % rs # Escape meta-characters text = escape_special_chars( text ) @log.debug "Escaped special characters: %p" % text # Transform block-level constructs text = apply_block_transforms( text, rs ) @log.debug "After block-level transforms: %p" % text # Now swap back in all the escaped characters text = unescape_special_chars( text ) @log.debug "After unescaping special characters: %p" % text return text end ### Convert tabs in +str+ to spaces. def detab( tabwidth=TabWidth ) copy = self.dup copy.detab!( tabwidth ) return copy end ### Convert tabs to spaces in place and return self if any were converted. def detab!( tabwidth=TabWidth ) newstr = self.split( /\n/ ).collect {|line| line.gsub( /(.*?)\t/ ) do $1 + ' ' * (tabwidth - $1.length % tabwidth) end }.join("\n") self.replace( newstr ) end ####### #private ####### ### Do block-level transforms on a copy of +str+ using the specified render ### state +rs+ and return the results. def apply_block_transforms( str, rs ) # Port: This was called '_runBlockGamut' in the original @log.debug "Applying block transforms to:\n %p" % str text = transform_headers( str, rs ) text = transform_hrules( text, rs ) text = transform_lists( text, rs ) text = transform_code_blocks( text, rs ) text = transform_block_quotes( text, rs ) text = transform_auto_links( text, rs ) text = hide_html_blocks( text, rs ) text = form_paragraphs( text, rs ) @log.debug "Done with block transforms:\n %p" % text return text end ### Apply Markdown span transforms to a copy of the specified +str+ with the ### given render state +rs+ and return it. def apply_span_transforms( str, rs ) @log.debug "Applying span transforms to:\n %p" % str str = transform_code_spans( str, rs ) str = encode_html( str ) str = transform_images( str, rs ) str = transform_anchors( str, rs ) str = transform_italic_and_bold( str, rs ) # Hard breaks str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" ) @log.debug "Done with span transforms:\n %p" % str return str end # The list of tags which are considered block-level constructs and an # alternation pattern suitable for use in regexps made from the list StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript form fieldset iframe math ins del ] StrictTagPattern = StrictBlockTags.join('|') LooseBlockTags = StrictBlockTags - %w[ins del] LooseTagPattern = LooseBlockTags.join('|') # Nested blocks: # <div> # <div> # tags for inner block must be indented. # </div> # </div> StrictBlockRegex = %r{ ^ # Start of line <(#{StrictTagPattern}) # Start tag: \2 \b # word break (.*\n)*? # Any number of lines, minimal match </\1> # Matching end tag [ ]* # trailing spaces $ # End of line or document }ix # More-liberal block-matching LooseBlockRegex = %r{ ^ # Start of line <(#{LooseTagPattern}) # start tag: \2 \b # word break (.*\n)*? # Any number of lines, minimal match .*</\1> # Anything + Matching end tag [ ]* # trailing spaces $ # End of line or document }ix # Special case for <hr />. HruleBlockRegex = %r{ ( # $1 \A\n? # Start of doc + optional \n | # or .*\n\n # anything + blank line ) ( # save in $2 [ ]* # Any spaces <hr # Tag open \b # Word break ([^<>])*? # Attributes /?> # Tag close $ # followed by a blank line or end of document ) }ix ### Replace all blocks of HTML in +str+ that start in the left margin with ### tokens. def hide_html_blocks( str, rs ) @log.debug "Hiding HTML blocks in %p" % str # Tokenizer proc to pass to gsub tokenize = lambda {|match| key = Digest::MD5::hexdigest( match ) rs.html_blocks[ key ] = match @log.debug "Replacing %p with %p" % [ match, key ] "\n\n#{key}\n\n" } rval = str.dup @log.debug "Finding blocks with the strict regex..." rval.gsub!( StrictBlockRegex, &tokenize ) @log.debug "Finding blocks with the loose regex..." rval.gsub!( LooseBlockRegex, &tokenize ) @log.debug "Finding hrules..." rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] } return rval end # Link defs are in the form: ^[id]: url "optional title" LinkRegex = %r{ ^[ ]*\[(.+)\]: # id = $1 [ ]* \n? # maybe *one* newline [ ]* <?(\S+?)>? # url = $2 [ ]* \n? # maybe one newline [ ]* (?: # Titles are delimited by "quotes" or (parens). ["(] (.+?) # title = $3 [")] # Matching ) or " [ ]* )? # title is optional (?:\n+|\Z) }x ### Strip link definitions from +str+, storing them in the given RenderState ### +rs+. def strip_link_definitions( str, rs ) str.gsub( LinkRegex ) {|match| id, url, title = $1, $2, $3 rs.urls[ id.downcase ] = encode_html( url ) unless title.nil? rs.titles[ id.downcase ] = title.gsub( /"/, """ ) end "" } end ### Escape special characters in the given +str+ def escape_special_chars( str ) @log.debug " Escaping special characters" text = '' # The original Markdown source has something called '$tags_to_skip' # declared here, but it's never used, so I don't define it. tokenize_html( str ) {|token, str| @log.debug " Adding %p token %p" % [ token, str ] case token # Within tags, encode * and _ when :tag text += str. gsub( /\*/, EscapeTable['*'][:md5] ). gsub( /_/, EscapeTable['_'][:md5] ) # Encode backslashed stuff in regular text when :text text += encode_backslash_escapes( str ) else raise TypeError, "Unknown token type %p" % token end } @log.debug " Text with escapes is now: %p" % text return text end ### Swap escaped special characters in a copy of the given +str+ and return ### it. def unescape_special_chars( str ) EscapeTable.each {|char, hash| @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ] str.gsub!( hash[:md5re], char ) } return str end ### Return a copy of the given +str+ with any backslashed special character ### in it replaced with MD5 placeholders. def encode_backslash_escapes( str ) # Make a copy with any double-escaped backslashes encoded text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] ) EscapeTable.each_pair {|char, esc| next if char == '\\' text.gsub!( esc[:re], esc[:md5] ) } return text end ### Transform any Markdown-style horizontal rules in a copy of the specified ### +str+ and return it. def transform_hrules( str, rs ) @log.debug " Transforming horizontal rules" str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" ) end # Patterns to match and transform lists ListMarkerOl = %r{\d+\.} ListMarkerUl = %r{[*+-]} ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl ) ListRegexp = %r{ (?: ^[ ]{0,#{TabWidth - 1}} # Indent < tab width (#{ListMarkerAny}) # unordered or ordered ($1) [ ]+ # At least one space ) (?m:.+?) # item content (include newlines) (?: \z # Either EOF | # or \n{2,} # Blank line... (?=\S) # ...followed by non-space (?![ ]* # ...but not another item (#{ListMarkerAny}) [ ]+) ) }x ### Transform Markdown-style lists in a copy of the specified +str+ and ### return it. def transform_lists( str, rs ) @log.debug " Transforming lists at %p" % (str[0,100] + '...') str.gsub( ListRegexp ) {|list| @log.debug " Found list %p" % list bullet = $1 list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol") list.gsub!( /\n{2,}/, "\n\n\n" ) %{<%s>\n%s</%s>\n} % [ list_type, transform_list_items( list, rs ), list_type, ] } end # Pattern for transforming list items ListItemRegexp = %r{ (\n)? # leading line = $1 (^[ ]*) # leading whitespace = $2 (#{ListMarkerAny}) [ ]+ # list marker = $3 ((?m:.+?) # list item text = $4 (\n{1,2})) (?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+)) }x ### Transform list items in a copy of the given +str+ and return it. def transform_list_items( str, rs ) @log.debug " Transforming list items" # Trim trailing blank lines str = str.sub( /\n{2,}\z/, "\n" ) str.gsub( ListItemRegexp ) {|line| @log.debug " Found item line %p" % line leading_line, item = $1, $4 if leading_line or /\n{2,}/.match( item ) @log.debug " Found leading line or item has a blank" item = apply_block_transforms( outdent(item), rs ) else # Recursion for sub-lists @log.debug " Recursing for sublist" item = transform_lists( outdent(item), rs ).chomp item = apply_span_transforms( item, rs ) end %{<li>%s</li>\n} % item } end # Pattern for matching codeblocks CodeBlockRegexp = %r{ (?:\n\n|\A) ( # $1 = the code block (?: (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces .*\n+ )+ ) (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at # line-start, or end of doc }x ### Transform Markdown-style codeblocks in a copy of the specified +str+ and ### return it. def transform_code_blocks( str, rs ) @log.debug " Transforming code blocks" str.gsub( CodeBlockRegexp ) {|block| codeblock = $1 remainder = $2 # Generate the codeblock %{\n\n<pre><code>%s\n</code></pre>\n\n%s} % [ encode_code( outdent(codeblock), rs ).rstrip, remainder ] } end # Pattern for matching Markdown blockquote blocks BlockQuoteRegexp = %r{ (?: ^[ ]*>[ ]? # '>' at the start of a line .+\n # rest of the first line
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?