bluecloth.rb

来自「用ruby on rails写的一个博客程序,还不错..ruby on rail」· RB 代码 · 共 1,145 行 · 第 1/2 页

RB
1,145
字号
			(?:.+\n)*		# subsequent consecutive lines			\n*				# blanks		  )+	  }x	PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm	### Transform Markdown-style blockquotes in a copy of the specified +str+	### and return it.	def transform_block_quotes( str, rs )		@log.debug " Transforming block quotes"		str.gsub( BlockQuoteRegexp ) {|quote|			@log.debug "Making blockquote from %p" % quote			quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting 			quote.gsub!( /^ +$/, '' )	# Trim whitespace-only lines			indent = " " * TabWidth			quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %				apply_block_transforms( quote, rs ).				gsub( /^/, indent ).				gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }			@log.debug "Blockquoted chunk is: %p" % quoted			quoted		}	end	AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/	AutoAnchorEmailRegexp = %r{		<		(			[-.\w]+			\@			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+		)		>	  }xi	### Transform URLs in a copy of the specified +str+ into links and return	### it.	def transform_auto_links( str, rs )		@log.debug " Transforming auto-links"		str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).			gsub( AutoAnchorEmailRegexp ) {|addr|			encode_email_address( unescape_special_chars($1) )		}	end	# Encoder functions to turn characters of an email address into encoded	# entities.	Encoders = [		lambda {|char| "&#%03d;" % char},		lambda {|char| "&#x%X;" % char},		lambda {|char| char.chr },	]	### Transform a copy of the given email +addr+ into an escaped version safer	### for posting publicly.	def encode_email_address( addr )		rval = ''		("mailto:" + addr).each_byte {|b|			case b			when ?:				rval += ":"			when ?@				rval += Encoders[ rand(2) ][ b ]			else				r = rand(100)				rval += (					r > 90 ? Encoders[2][ b ] :					r < 45 ? Encoders[1][ b ] :							 Encoders[0][ b ]				)			end		}		return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]	end	# Regex for matching Setext-style headers	SetextHeaderRegexp = %r{		(.+)			# The title text ($1)		\n		([\-=])+		# Match a line of = or -. Save only one in $2.		[ ]*\n+	   }x	# Regexp for matching ATX-style headers	AtxHeaderRegexp = %r{		^(\#{1,6})	# $1 = string of #'s		[ ]*		(.+?)		# $2 = Header text		[ ]*		\#*			# optional closing #'s (not counted)		\n+	  }x	### Apply Markdown header transforms to a copy of the given +str+ amd render	### state +rs+ and return the result.	def transform_headers( str, rs )		@log.debug " Transforming headers"		# Setext-style headers:		#	  Header 1		#	  ========		#  		#	  Header 2		#	  --------		#		str.			gsub( SetextHeaderRegexp ) {|m|				@log.debug "Found setext-style header"				title, hdrchar = $1, $2				title = apply_span_transforms( title, rs )				case hdrchar				when '='					%[<h1>#{title}</h1>\n\n]				when '-'					%[<h2>#{title}</h2>\n\n]				else					title				end			}.			gsub( AtxHeaderRegexp ) {|m|				@log.debug "Found ATX-style header"				hdrchars, title = $1, $2				title = apply_span_transforms( title, rs )				level = hdrchars.length				%{<h%d>%s</h%d>\n\n} % [ level, title, level ]			}	end	### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>	### tags and return it.	def form_paragraphs( str, rs )		@log.debug " Forming paragraphs"		grafs = str.			sub( /\A\n+/, '' ).			sub( /\n+\z/, '' ).			split( /\n{2,}/ )		rval = grafs.collect {|graf|			# Unhashify HTML blocks if this is a placeholder			if rs.html_blocks.key?( graf )				rs.html_blocks[ graf ]			# Otherwise, wrap in <p> tags			else				apply_span_transforms(graf, rs).					sub( /^[ ]*/, '<p>' ) + '</p>'			end		}.join( "\n\n" )		@log.debug " Formed paragraphs: %p" % rval		return rval	end	# Pattern to match the linkid part of an anchor tag for reference-style	# links.	RefLinkIdRegex = %r{		[ ]?					# Optional leading space		(?:\n[ ]*)?				# Optional newline + spaces		\[			(.*?)				# Id = $1		\]	  }x	InlineLinkRegex = %r{		\(						# Literal paren			[ ]*				# Zero or more spaces			<?(.+?)>?			# URI = $1			[ ]*				# Zero or more spaces			(?:					# 				([\"\'])		# Opening quote char = $2				(.*?)			# Title = $3				\2				# Matching quote char			)?					# Title is optional		\)	  }x	### Apply Markdown anchor transforms to a copy of the specified +str+ with	### the given render state +rs+ and return it.	def transform_anchors( str, rs )		@log.debug " Transforming anchors"		@scanner.string = str.dup		text = ''		# Scan the whole string		until @scanner.empty?					if @scanner.scan( /\[/ )				link = ''; linkid = ''				depth = 1				startpos = @scanner.pos				@log.debug " Found a bracket-open at %d" % startpos				# Scan the rest of the tag, allowing unlimited nested []s. If				# the scanner runs out of text before the opening bracket is				# closed, append the text and return (wasn't a valid anchor).				while depth.nonzero?					linktext = @scanner.scan_until( /\]|\[/ )					if linktext						@log.debug "  Found a bracket at depth %d: %p" % [ depth, linktext ]						link += linktext						# Decrement depth for each closing bracket						depth += ( linktext[-1, 1] == ']' ? -1 : 1 )						@log.debug "  Depth is now #{depth}"					# If there's no more brackets, it must not be an anchor, so					# just abort.					else						@log.debug "  Missing closing brace, assuming non-link."						link += @scanner.rest						@scanner.terminate						return text + '[' + link					end				end				link.slice!( -1 ) # Trim final ']'				@log.debug " Found leading link %p" % link				# Look for a reference-style second part				if @scanner.scan( RefLinkIdRegex )					linkid = @scanner[1]					linkid = link.dup if linkid.empty?					linkid.downcase!					@log.debug "  Found a linkid: %p" % linkid					# If there's a matching link in the link table, build an					# anchor tag for it.					if rs.urls.key?( linkid )						@log.debug "   Found link key in the link table: %p" % rs.urls[linkid]						url = escape_md( rs.urls[linkid] )						text += %{<a href="#{url}"}						if rs.titles.key?(linkid)							text += %{ title="%s"} % escape_md( rs.titles[linkid] )						end						text += %{>#{link}</a>}					# If the link referred to doesn't exist, just append the raw					# source to the result					else						@log.debug "  Linkid %p not found in link table" % linkid						@log.debug "  Appending original string instead: "						@log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]						text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]					end				# ...or for an inline style second part				elsif @scanner.scan( InlineLinkRegex )					url = @scanner[1]					title = @scanner[3]					@log.debug "  Found an inline link to %p" % url					text += %{<a href="%s"} % escape_md( url )					if title						title.gsub!( /"/, "&quot;" )						text += %{ title="%s"} % escape_md( title )					end					text += %{>#{link}</a>}				# No linkid part: just append the first part as-is.				else					@log.debug "No linkid, so no anchor. Appending literal text."					text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]				end # if linkid			# Plain text			else				@log.debug " Scanning to the next link from %p" % @scanner.rest				text += @scanner.scan( /[^\[]+/ )			end		end # until @scanner.empty?		return text	end	# Pattern to match strong emphasis in Markdown text	BoldRegexp = %r{ (\*\*|__) (\S|\S.+?\S) \1 }x	# Pattern to match normal emphasis in Markdown text	ItalicRegexp = %r{ (\*|_) (\S|\S.+?\S) \1 }x	### Transform italic- and bold-encoded text in a copy of the specified +str+	### and return it.	def transform_italic_and_bold( str, rs )		@log.debug " Transforming italic and bold"		str.			gsub( BoldRegexp, %{<strong>\\2</strong>} ).			gsub( ItalicRegexp, %{<em>\\2</em>} )	end		### Transform backticked spans into <code> spans.	def transform_code_spans( str, rs )		@log.debug " Transforming code spans"		# Set up the string scanner and just return the string unless there's at		# least one backtick.		@scanner.string = str.dup		unless @scanner.exist?( /`/ )			@scanner.terminate			@log.debug "No backticks found for code span in %p" % str			return str		end		@log.debug "Transforming code spans in %p" % str		# Build the transformed text anew		text = ''		# Scan to the end of the string		until @scanner.empty?			# Scan up to an opening backtick			if pre = @scanner.scan_until( /.?(?=`)/m )				text += pre				@log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]				# Make a pattern to find the end of the span				opener = @scanner.scan( /`+/ )				len = opener.length				closer = Regexp::new( opener )				@log.debug "Scanning for end of code span with %p" % closer				# Scan until the end of the closing backtick sequence. Chop the				# backticks off the resultant string, strip leading and trailing				# whitespace, and encode any enitites contained in it.				codespan = @scanner.scan_until( closer ) or					raise FormatError::new( @scanner.rest[0,20],						"No %p found before end" % opener )				@log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]				codespan.slice!( -len, len )				text += "<code>%s</code>" %					encode_code( codespan.strip, rs )			# If there's no more backticks, just append the rest of the string			# and move the scan pointer to the end			else				text += @scanner.rest				@scanner.terminate			end		end		return text	end	# Next, handle inline images:  ![alt text](url "optional title")	# Don't forget: encode * and _	InlineImageRegexp = %r{		(					# Whole match = $1			!\[ (.*?) \]	# alt text = $2		  \([ ]*			<?(\S+?)>?		# source url = $3		    [ ]*			(?:				# 			  (["'])		# quote char = $4			  (.*?)			# title = $5			  \4			# matching quote			  [ ]*			)?				# title is optional		  \)		)	  }xs #"	# Reference-style images	ReferenceImageRegexp = %r{		(					# Whole match = $1			!\[ (.*?) \]	# Alt text = $2			[ ]?			# Optional space			(?:\n[ ]*)?		# One optional newline + spaces			\[ (.*?) \]		# id = $3		)	  }xs	### Turn image markup into image tags.	def transform_images( str, rs )		@log.debug " Transforming images" % str		# Handle reference-style labeled images: ![alt text][id]		str.			gsub( ReferenceImageRegexp ) {|match|				whole, alt, linkid = $1, $2, $3.downcase				@log.debug "Matched %p" % match				res = nil				alt.gsub!( /"/, '&quot;' )				# for shortcut links like ![this][].				linkid = alt.downcase if linkid.empty?				if rs.urls.key?( linkid )					url = escape_md( rs.urls[linkid] )					@log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]					# Build the tag					result = %{<img src="%s" alt="%s"} % [ url, alt ]					if rs.titles.key?( linkid )						result += %{ title="%s"} % escape_md( rs.titles[linkid] )					end					result += EmptyElementSuffix				else					result = whole				end				@log.debug "Replacing %p with %p" % [ match, result ]				result			}.			# Inline image style			gsub( InlineImageRegexp ) {|match|				@log.debug "Found inline image %p" % match				whole, alt, title = $1, $2, $5				url = escape_md( $3 )				alt.gsub!( /"/, '&quot;' )				# Build the tag				result = %{<img src="%s" alt="%s"} % [ url, alt ]				unless title.nil?					title.gsub!( /"/, '&quot;' )					result += %{ title="%s"} % escape_md( title )				end				result += EmptyElementSuffix				@log.debug "Replacing %p with %p" % [ match, result ]				result			}	end	# Regexp to match special characters in a code block	CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x	### Escape any characters special to HTML and encode any characters special	### to Markdown in a copy of the given +str+ and return it.	def encode_code( str, rs )		str.gsub( %r{&}, '&amp;' ).			gsub( %r{<}, '&lt;' ).			gsub( %r{>}, '&gt;' ).			gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}	end					#################################################################	###	U T I L I T Y   F U N C T I O N S	#################################################################	### Escape any markdown characters in a copy of the given +str+ and return	### it.	def escape_md( str )		str.			gsub( /\*/, EscapeTable['*'][:md5] ).			gsub( /_/,  EscapeTable['_'][:md5] )	end	# Matching constructs for tokenizing X/HTML	HTMLCommentRegexp  = %r{ <! ( -- .*? -- \s* )+ > }mx	XMLProcInstRegexp  = %r{ <\? .*? \?> }mx	MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )	HTMLTagOpenRegexp  = %r{ < [a-z/!$] [^<>]* }imx	HTMLTagCloseRegexp = %r{ > }x	HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )	### Break the HTML source in +str+ into a series of tokens and return	### them. The tokens are just 2-element Array tuples with a type and the	### actual content. If this function is called with a block, the type and	### text parts of each token will be yielded to it one at a time as they are	### extracted.	def tokenize_html( str )		depth = 0		tokens = []		@scanner.string = str.dup		type, token = nil, nil		until @scanner.empty?			@log.debug "Scanning from %p" % @scanner.rest			# Match comments and PIs without nesting			if (( token = @scanner.scan(MetaTag) ))				type = :tag			# Do nested matching for HTML tags			elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))				tagstart = @scanner.pos				@log.debug " Found the start of a plain tag at %d" % tagstart				# Start the token with the opening angle				depth = 1				type = :tag				# Scan the rest of the tag, allowing unlimited nested <>s. If				# the scanner runs out of text before the tag is closed, raise				# an error.				while depth.nonzero?					# Scan either an opener or a closer					chunk = @scanner.scan( HTMLTagPart ) or						raise "Malformed tag at character %d: %p" % 							[ tagstart, token + @scanner.rest ]											@log.debug "  Found another part of the tag at depth %d: %p" % [ depth, chunk ]					token += chunk					# If the last character of the token so far is a closing					# angle bracket, decrement the depth. Otherwise increment					# it for a nested tag.					depth += ( token[-1, 1] == '>' ? -1 : 1 )					@log.debug "  Depth is now #{depth}"				end			# Match text segments			else				@log.debug " Looking for a chunk of text"				type = :text				# Scan forward, always matching at least one character to move				# the pointer beyond any non-tag '<'.				token = @scanner.scan_until( /[^<]+/m )			end			@log.debug " type: %p, token: %p" % [ type, token ]			# If a block is given, feed it one token at a time. Add the token to			# the token list to be returned regardless.			if block_given?				yield( type, token )			end			tokens << [ type, token ]		end		return tokens	end	### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.	def encode_html( str )		str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).			gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )	end		### Return one level of line-leading tabs or spaces from a copy of +str+ and	### return it.	def outdent( str )		str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')	end	end # class BlueCloth

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?