bluecloth.rb
来自「用ruby on rails写的一个博客程序,还不错..ruby on rail」· RB 代码 · 共 1,145 行 · 第 1/2 页
RB
1,145 行
(?:.+\n)* # subsequent consecutive lines \n* # blanks )+ }x PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm ### Transform Markdown-style blockquotes in a copy of the specified +str+ ### and return it. def transform_block_quotes( str, rs ) @log.debug " Transforming block quotes" str.gsub( BlockQuoteRegexp ) {|quote| @log.debug "Making blockquote from %p" % quote quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines indent = " " * TabWidth quoted = %{<blockquote>\n%s\n</blockquote>\n\n} % apply_block_transforms( quote, rs ). gsub( /^/, indent ). gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') } @log.debug "Blockquoted chunk is: %p" % quoted quoted } end AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/ AutoAnchorEmailRegexp = %r{ < ( [-.\w]+ \@ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ ) > }xi ### Transform URLs in a copy of the specified +str+ into links and return ### it. def transform_auto_links( str, rs ) @log.debug " Transforming auto-links" str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}). gsub( AutoAnchorEmailRegexp ) {|addr| encode_email_address( unescape_special_chars($1) ) } end # Encoder functions to turn characters of an email address into encoded # entities. Encoders = [ lambda {|char| "&#%03d;" % char}, lambda {|char| "&#x%X;" % char}, lambda {|char| char.chr }, ] ### Transform a copy of the given email +addr+ into an escaped version safer ### for posting publicly. def encode_email_address( addr ) rval = '' ("mailto:" + addr).each_byte {|b| case b when ?: rval += ":" when ?@ rval += Encoders[ rand(2) ][ b ] else r = rand(100) rval += ( r > 90 ? Encoders[2][ b ] : r < 45 ? Encoders[1][ b ] : Encoders[0][ b ] ) end } return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ] end # Regex for matching Setext-style headers SetextHeaderRegexp = %r{ (.+) # The title text ($1) \n ([\-=])+ # Match a line of = or -. Save only one in $2. [ ]*\n+ }x # Regexp for matching ATX-style headers AtxHeaderRegexp = %r{ ^(\#{1,6}) # $1 = string of #'s [ ]* (.+?) # $2 = Header text [ ]* \#* # optional closing #'s (not counted) \n+ }x ### Apply Markdown header transforms to a copy of the given +str+ amd render ### state +rs+ and return the result. def transform_headers( str, rs ) @log.debug " Transforming headers" # Setext-style headers: # Header 1 # ======== # # Header 2 # -------- # str. gsub( SetextHeaderRegexp ) {|m| @log.debug "Found setext-style header" title, hdrchar = $1, $2 title = apply_span_transforms( title, rs ) case hdrchar when '=' %[<h1>#{title}</h1>\n\n] when '-' %[<h2>#{title}</h2>\n\n] else title end }. gsub( AtxHeaderRegexp ) {|m| @log.debug "Found ATX-style header" hdrchars, title = $1, $2 title = apply_span_transforms( title, rs ) level = hdrchars.length %{<h%d>%s</h%d>\n\n} % [ level, title, level ] } end ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p> ### tags and return it. def form_paragraphs( str, rs ) @log.debug " Forming paragraphs" grafs = str. sub( /\A\n+/, '' ). sub( /\n+\z/, '' ). split( /\n{2,}/ ) rval = grafs.collect {|graf| # Unhashify HTML blocks if this is a placeholder if rs.html_blocks.key?( graf ) rs.html_blocks[ graf ] # Otherwise, wrap in <p> tags else apply_span_transforms(graf, rs). sub( /^[ ]*/, '<p>' ) + '</p>' end }.join( "\n\n" ) @log.debug " Formed paragraphs: %p" % rval return rval end # Pattern to match the linkid part of an anchor tag for reference-style # links. RefLinkIdRegex = %r{ [ ]? # Optional leading space (?:\n[ ]*)? # Optional newline + spaces \[ (.*?) # Id = $1 \] }x InlineLinkRegex = %r{ \( # Literal paren [ ]* # Zero or more spaces <?(.+?)>? # URI = $1 [ ]* # Zero or more spaces (?: # ([\"\']) # Opening quote char = $2 (.*?) # Title = $3 \2 # Matching quote char )? # Title is optional \) }x ### Apply Markdown anchor transforms to a copy of the specified +str+ with ### the given render state +rs+ and return it. def transform_anchors( str, rs ) @log.debug " Transforming anchors" @scanner.string = str.dup text = '' # Scan the whole string until @scanner.empty? if @scanner.scan( /\[/ ) link = ''; linkid = '' depth = 1 startpos = @scanner.pos @log.debug " Found a bracket-open at %d" % startpos # Scan the rest of the tag, allowing unlimited nested []s. If # the scanner runs out of text before the opening bracket is # closed, append the text and return (wasn't a valid anchor). while depth.nonzero? linktext = @scanner.scan_until( /\]|\[/ ) if linktext @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ] link += linktext # Decrement depth for each closing bracket depth += ( linktext[-1, 1] == ']' ? -1 : 1 ) @log.debug " Depth is now #{depth}" # If there's no more brackets, it must not be an anchor, so # just abort. else @log.debug " Missing closing brace, assuming non-link." link += @scanner.rest @scanner.terminate return text + '[' + link end end link.slice!( -1 ) # Trim final ']' @log.debug " Found leading link %p" % link # Look for a reference-style second part if @scanner.scan( RefLinkIdRegex ) linkid = @scanner[1] linkid = link.dup if linkid.empty? linkid.downcase! @log.debug " Found a linkid: %p" % linkid # If there's a matching link in the link table, build an # anchor tag for it. if rs.urls.key?( linkid ) @log.debug " Found link key in the link table: %p" % rs.urls[linkid] url = escape_md( rs.urls[linkid] ) text += %{<a href="#{url}"} if rs.titles.key?(linkid) text += %{ title="%s"} % escape_md( rs.titles[linkid] ) end text += %{>#{link}</a>} # If the link referred to doesn't exist, just append the raw # source to the result else @log.debug " Linkid %p not found in link table" % linkid @log.debug " Appending original string instead: " @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ] text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] end # ...or for an inline style second part elsif @scanner.scan( InlineLinkRegex ) url = @scanner[1] title = @scanner[3] @log.debug " Found an inline link to %p" % url text += %{<a href="%s"} % escape_md( url ) if title title.gsub!( /"/, """ ) text += %{ title="%s"} % escape_md( title ) end text += %{>#{link}</a>} # No linkid part: just append the first part as-is. else @log.debug "No linkid, so no anchor. Appending literal text." text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] end # if linkid # Plain text else @log.debug " Scanning to the next link from %p" % @scanner.rest text += @scanner.scan( /[^\[]+/ ) end end # until @scanner.empty? return text end # Pattern to match strong emphasis in Markdown text BoldRegexp = %r{ (\*\*|__) (\S|\S.+?\S) \1 }x # Pattern to match normal emphasis in Markdown text ItalicRegexp = %r{ (\*|_) (\S|\S.+?\S) \1 }x ### Transform italic- and bold-encoded text in a copy of the specified +str+ ### and return it. def transform_italic_and_bold( str, rs ) @log.debug " Transforming italic and bold" str. gsub( BoldRegexp, %{<strong>\\2</strong>} ). gsub( ItalicRegexp, %{<em>\\2</em>} ) end ### Transform backticked spans into <code> spans. def transform_code_spans( str, rs ) @log.debug " Transforming code spans" # Set up the string scanner and just return the string unless there's at # least one backtick. @scanner.string = str.dup unless @scanner.exist?( /`/ ) @scanner.terminate @log.debug "No backticks found for code span in %p" % str return str end @log.debug "Transforming code spans in %p" % str # Build the transformed text anew text = '' # Scan to the end of the string until @scanner.empty? # Scan up to an opening backtick if pre = @scanner.scan_until( /.?(?=`)/m ) text += pre @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ] # Make a pattern to find the end of the span opener = @scanner.scan( /`+/ ) len = opener.length closer = Regexp::new( opener ) @log.debug "Scanning for end of code span with %p" % closer # Scan until the end of the closing backtick sequence. Chop the # backticks off the resultant string, strip leading and trailing # whitespace, and encode any enitites contained in it. codespan = @scanner.scan_until( closer ) or raise FormatError::new( @scanner.rest[0,20], "No %p found before end" % opener ) @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ] codespan.slice!( -len, len ) text += "<code>%s</code>" % encode_code( codespan.strip, rs ) # If there's no more backticks, just append the rest of the string # and move the scan pointer to the end else text += @scanner.rest @scanner.terminate end end return text end # Next, handle inline images:  # Don't forget: encode * and _ InlineImageRegexp = %r{ ( # Whole match = $1 !\[ (.*?) \] # alt text = $2 \([ ]* <?(\S+?)>? # source url = $3 [ ]* (?: # (["']) # quote char = $4 (.*?) # title = $5 \4 # matching quote [ ]* )? # title is optional \) ) }xs #" # Reference-style images ReferenceImageRegexp = %r{ ( # Whole match = $1 !\[ (.*?) \] # Alt text = $2 [ ]? # Optional space (?:\n[ ]*)? # One optional newline + spaces \[ (.*?) \] # id = $3 ) }xs ### Turn image markup into image tags. def transform_images( str, rs ) @log.debug " Transforming images" % str # Handle reference-style labeled images: ![alt text][id] str. gsub( ReferenceImageRegexp ) {|match| whole, alt, linkid = $1, $2, $3.downcase @log.debug "Matched %p" % match res = nil alt.gsub!( /"/, '"' ) # for shortcut links like ![this][]. linkid = alt.downcase if linkid.empty? if rs.urls.key?( linkid ) url = escape_md( rs.urls[linkid] ) @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ] # Build the tag result = %{<img src="%s" alt="%s"} % [ url, alt ] if rs.titles.key?( linkid ) result += %{ title="%s"} % escape_md( rs.titles[linkid] ) end result += EmptyElementSuffix else result = whole end @log.debug "Replacing %p with %p" % [ match, result ] result }. # Inline image style gsub( InlineImageRegexp ) {|match| @log.debug "Found inline image %p" % match whole, alt, title = $1, $2, $5 url = escape_md( $3 ) alt.gsub!( /"/, '"' ) # Build the tag result = %{<img src="%s" alt="%s"} % [ url, alt ] unless title.nil? title.gsub!( /"/, '"' ) result += %{ title="%s"} % escape_md( title ) end result += EmptyElementSuffix @log.debug "Replacing %p with %p" % [ match, result ] result } end # Regexp to match special characters in a code block CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x ### Escape any characters special to HTML and encode any characters special ### to Markdown in a copy of the given +str+ and return it. def encode_code( str, rs ) str.gsub( %r{&}, '&' ). gsub( %r{<}, '<' ). gsub( %r{>}, '>' ). gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]} end ################################################################# ### U T I L I T Y F U N C T I O N S ################################################################# ### Escape any markdown characters in a copy of the given +str+ and return ### it. def escape_md( str ) str. gsub( /\*/, EscapeTable['*'][:md5] ). gsub( /_/, EscapeTable['_'][:md5] ) end # Matching constructs for tokenizing X/HTML HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx XMLProcInstRegexp = %r{ <\? .*? \?> }mx MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp ) HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx HTMLTagCloseRegexp = %r{ > }x HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp ) ### Break the HTML source in +str+ into a series of tokens and return ### them. The tokens are just 2-element Array tuples with a type and the ### actual content. If this function is called with a block, the type and ### text parts of each token will be yielded to it one at a time as they are ### extracted. def tokenize_html( str ) depth = 0 tokens = [] @scanner.string = str.dup type, token = nil, nil until @scanner.empty? @log.debug "Scanning from %p" % @scanner.rest # Match comments and PIs without nesting if (( token = @scanner.scan(MetaTag) )) type = :tag # Do nested matching for HTML tags elsif (( token = @scanner.scan(HTMLTagOpenRegexp) )) tagstart = @scanner.pos @log.debug " Found the start of a plain tag at %d" % tagstart # Start the token with the opening angle depth = 1 type = :tag # Scan the rest of the tag, allowing unlimited nested <>s. If # the scanner runs out of text before the tag is closed, raise # an error. while depth.nonzero? # Scan either an opener or a closer chunk = @scanner.scan( HTMLTagPart ) or raise "Malformed tag at character %d: %p" % [ tagstart, token + @scanner.rest ] @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ] token += chunk # If the last character of the token so far is a closing # angle bracket, decrement the depth. Otherwise increment # it for a nested tag. depth += ( token[-1, 1] == '>' ? -1 : 1 ) @log.debug " Depth is now #{depth}" end # Match text segments else @log.debug " Looking for a chunk of text" type = :text # Scan forward, always matching at least one character to move # the pointer beyond any non-tag '<'. token = @scanner.scan_until( /[^<]+/m ) end @log.debug " type: %p, token: %p" % [ type, token ] # If a block is given, feed it one token at a time. Add the token to # the token list to be returned regardless. if block_given? yield( type, token ) end tokens << [ type, token ] end return tokens end ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded. def encode_html( str ) str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&" ). gsub( %r{<(?![a-z/?\$!])}i, "<" ) end ### Return one level of line-leading tabs or spaces from a copy of +str+ and ### return it. def outdent( str ) str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '') end end # class BlueCloth
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?