bluecloth.rb

来自「用ruby on rails写的一个博客程序,还不错..ruby on rail」· RB 代码 · 共 1,145 行 · 第 1/2 页

RB
1,145
字号
#!/usr/bin/ruby# # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion# tool.# # == Synopsis# #   doc = BlueCloth::new "#     ## Test document ####     Just a simple test.#   "##   puts doc.to_html# # == Authors# # * Michael Granger <ged@FaerieMUD.org># # == Contributors## * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions# * Florian Gross <flgr@ccan.de> - Filter options, suggestions## == Copyright## Original version:#   Copyright (c) 2003-2004 John Gruber#   <http://daringfireball.net/>  #   All rights reserved.## Ruby port:#   Copyright (c) 2004 The FaerieMUD Consortium.# # BlueCloth is free software; you can redistribute it and/or modify it under the# terms of the GNU General Public License as published by the Free Software# Foundation; either version 2 of the License, or (at your option) any later# version.# # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.# # == To-do## * Refactor some of the larger uglier methods that have to do their own#   brute-force scanning because of lack of Perl features in Ruby's Regexp#   class. Alternately, could add a dependency on 'pcre' and use most Perl#   regexps.## * Put the StringScanner in the render state for thread-safety.## == Version##  $Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $# require 'digest/md5'require 'logger'require 'strscan'### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion### tool.class BlueCloth < String	### Exception class for formatting errors.	class FormatError < RuntimeError		### Create a new FormatError with the given source +str+ and an optional		### message about the +specific+ error.		def initialize( str, specific=nil )			if specific				msg = "Bad markdown format near %p: %s" % [ str, specific ]			else				msg = "Bad markdown format near %p" % str			end			super( msg )		end	end	# Release Version	Version = '0.0.3'	# SVN Revision	SvnRev = %q$Rev: 69 $	# SVN Id tag	SvnId = %q$Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $	# SVN URL	SvnUrl = %q$URL: svn+ssh://svn.faeriemud.org/usr/local/svn/BlueCloth/trunk/lib/bluecloth.rb $	# Rendering state struct. Keeps track of URLs, titles, and HTML blocks	# midway through a render. I prefer this to the globals of the Perl version	# because globals make me break out in hives. Or something.	RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )	# Tab width for #detab! if none is specified	TabWidth = 4	# The tag-closing string -- set to '>' for HTML	EmptyElementSuffix = "/>";	# Table of MD5 sums for escaped characters	EscapeTable = {}	'\\`*_{}[]()#.!'.split(//).each {|char|		hash = Digest::MD5::hexdigest( char )		EscapeTable[ char ] = { 			:md5 => hash,			:md5re => Regexp::new( hash ),			:re  => Regexp::new( '\\\\' + Regexp::escape(char) ),		}	}	#################################################################	###	I N S T A N C E   M E T H O D S	#################################################################	### Create a new BlueCloth string.	def initialize( content="", *restrictions )		@log = Logger::new( $deferr )		@log.level = $DEBUG ?			Logger::DEBUG :			($VERBOSE ? Logger::INFO : Logger::WARN)		@scanner = nil		# Add any restrictions, and set the line-folding attribute to reflect		# what happens by default.		@filter_html = nil		@filter_styles = nil		restrictions.flatten.each {|r| __send__("#{r}=", true) }		@fold_lines = true		super( content )		@log.debug "String is: %p" % self	end	######	public	######	# Filters for controlling what gets output for untrusted input. (But really,	# you're filtering bad stuff out of untrusted input at submission-time via	# untainting, aren't you?)	attr_accessor :filter_html, :filter_styles	# RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,	# so this isn't used by anything.	attr_accessor :fold_lines	### Render Markdown-formatted text in this string object as HTML and return	### it. The parameter is for compatibility with RedCloth, and is currently	### unused, though that may change in the future.	def to_html( lite=false )		# Create a StringScanner we can reuse for various lexing tasks		@scanner = StringScanner::new( '' )		# Make a structure to carry around stuff that gets placeholdered out of		# the source.		rs = RenderState::new( {}, {}, {} )		# Make a copy of the string with normalized line endings, tabs turned to		# spaces, and a couple of guaranteed newlines at the end		text = self.gsub( /\r\n?/, "\n" ).detab		text += "\n\n"		@log.debug "Normalized line-endings: %p" % text		# Filter HTML if we're asked to do so		if self.filter_html			text.gsub!( "<", "&lt;" )			text.gsub!( ">", "&gt;" )			@log.debug "Filtered HTML: %p" % text		end		# Simplify blank lines		text.gsub!( /^ +$/, '' )		@log.debug "Tabs -> spaces/blank lines stripped: %p" % text		# Replace HTML blocks with placeholders		text = hide_html_blocks( text, rs )		@log.debug "Hid HTML blocks: %p" % text		@log.debug "Render state: %p" % rs		# Strip link definitions, store in render state		text = strip_link_definitions( text, rs )		@log.debug "Stripped link definitions: %p" % text		@log.debug "Render state: %p" % rs		# Escape meta-characters		text = escape_special_chars( text )		@log.debug "Escaped special characters: %p" % text		# Transform block-level constructs		text = apply_block_transforms( text, rs )		@log.debug "After block-level transforms: %p" % text		# Now swap back in all the escaped characters		text = unescape_special_chars( text )		@log.debug "After unescaping special characters: %p" % text		return text	end		### Convert tabs in +str+ to spaces.	def detab( tabwidth=TabWidth )		copy = self.dup		copy.detab!( tabwidth )		return copy	end	### Convert tabs to spaces in place and return self if any were converted.	def detab!( tabwidth=TabWidth )		newstr = self.split( /\n/ ).collect {|line|			line.gsub( /(.*?)\t/ ) do				$1 + ' ' * (tabwidth - $1.length % tabwidth)			end		}.join("\n")		self.replace( newstr )	end	#######	#private	#######	### Do block-level transforms on a copy of +str+ using the specified render	### state +rs+ and return the results.	def apply_block_transforms( str, rs )		# Port: This was called '_runBlockGamut' in the original		@log.debug "Applying block transforms to:\n  %p" % str		text = transform_headers( str, rs )		text = transform_hrules( text, rs )		text = transform_lists( text, rs )		text = transform_code_blocks( text, rs )		text = transform_block_quotes( text, rs )		text = transform_auto_links( text, rs )		text = hide_html_blocks( text, rs )		text = form_paragraphs( text, rs )		@log.debug "Done with block transforms:\n  %p" % text		return text	end	### Apply Markdown span transforms to a copy of the specified +str+ with the	### given render state +rs+ and return it.	def apply_span_transforms( str, rs )		@log.debug "Applying span transforms to:\n  %p" % str		str = transform_code_spans( str, rs )		str = encode_html( str )		str = transform_images( str, rs )		str = transform_anchors( str, rs )		str = transform_italic_and_bold( str, rs )		# Hard breaks		str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )		@log.debug "Done with span transforms:\n  %p" % str		return str	end	# The list of tags which are considered block-level constructs and an	# alternation pattern suitable for use in regexps made from the list	StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript		form fieldset iframe math ins del ]	StrictTagPattern = StrictBlockTags.join('|')	LooseBlockTags = StrictBlockTags - %w[ins del]	LooseTagPattern = LooseBlockTags.join('|')	# Nested blocks:	# 	<div>	# 		<div>	# 		tags for inner block must be indented.	# 		</div>	# 	</div>	StrictBlockRegex = %r{		^						# Start of line		<(#{StrictTagPattern})	# Start tag: \2		\b						# word break		(.*\n)*?				# Any number of lines, minimal match		</\1>					# Matching end tag		[ ]*					# trailing spaces		$						# End of line or document	  }ix	# More-liberal block-matching	LooseBlockRegex = %r{		^						# Start of line		<(#{LooseTagPattern})	# start tag: \2		\b						# word break		(.*\n)*?				# Any number of lines, minimal match		.*</\1>					# Anything + Matching end tag		[ ]*					# trailing spaces		$						# End of line or document	  }ix	# Special case for <hr />.	HruleBlockRegex = %r{		(						# $1			\A\n?				# Start of doc + optional \n			|					# or			.*\n\n				# anything + blank line		)		(						# save in $2			[ ]*				# Any spaces			<hr					# Tag open			\b					# Word break			([^<>])*?			# Attributes			/?>					# Tag close			$					# followed by a blank line or end of document		)	  }ix	### Replace all blocks of HTML in +str+ that start in the left margin with	### tokens.	def hide_html_blocks( str, rs )		@log.debug "Hiding HTML blocks in %p" % str				# Tokenizer proc to pass to gsub		tokenize = lambda {|match|			key = Digest::MD5::hexdigest( match )			rs.html_blocks[ key ] = match			@log.debug "Replacing %p with %p" % [ match, key ]			"\n\n#{key}\n\n"		}		rval = str.dup		@log.debug "Finding blocks with the strict regex..."		rval.gsub!( StrictBlockRegex, &tokenize )		@log.debug "Finding blocks with the loose regex..."		rval.gsub!( LooseBlockRegex, &tokenize )		@log.debug "Finding hrules..."		rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }		return rval	end	# Link defs are in the form: ^[id]: url "optional title"	LinkRegex = %r{		^[ ]*\[(.+)\]:		# id = $1		  [ ]*		  \n?				# maybe *one* newline		  [ ]*		<?(\S+?)>?				# url = $2		  [ ]*		  \n?				# maybe one newline		  [ ]*		(?:			# Titles are delimited by "quotes" or (parens).			["(]			(.+?)			# title = $3			[")]			# Matching ) or "			[ ]*		)?	# title is optional		(?:\n+|\Z)	  }x	### Strip link definitions from +str+, storing them in the given RenderState	### +rs+.	def strip_link_definitions( str, rs )		str.gsub( LinkRegex ) {|match|			id, url, title = $1, $2, $3			rs.urls[ id.downcase ] = encode_html( url )			unless title.nil?				rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )			end			""		}	end	### Escape special characters in the given +str+	def escape_special_chars( str )		@log.debug "  Escaping special characters"		text = ''		# The original Markdown source has something called '$tags_to_skip'		# declared here, but it's never used, so I don't define it.		tokenize_html( str ) {|token, str|			@log.debug "   Adding %p token %p" % [ token, str ]			case token			# Within tags, encode * and _			when :tag				text += str.					gsub( /\*/, EscapeTable['*'][:md5] ).					gsub( /_/, EscapeTable['_'][:md5] )			# Encode backslashed stuff in regular text			when :text				text += encode_backslash_escapes( str )			else				raise TypeError, "Unknown token type %p" % token			end		}		@log.debug "  Text with escapes is now: %p" % text		return text	end	### Swap escaped special characters in a copy of the given +str+ and return	### it.	def unescape_special_chars( str )		EscapeTable.each {|char, hash|			@log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]			str.gsub!( hash[:md5re], char )		}		return str	end	### Return a copy of the given +str+ with any backslashed special character	### in it replaced with MD5 placeholders.	def encode_backslash_escapes( str )		# Make a copy with any double-escaped backslashes encoded		text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )				EscapeTable.each_pair {|char, esc|			next if char == '\\'			text.gsub!( esc[:re], esc[:md5] )		}		return text	end	### Transform any Markdown-style horizontal rules in a copy of the specified	### +str+ and return it.	def transform_hrules( str, rs )		@log.debug " Transforming horizontal rules"		str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )	end	# Patterns to match and transform lists	ListMarkerOl = %r{\d+\.}	ListMarkerUl = %r{[*+-]}	ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )	ListRegexp = %r{		  (?:			^[ ]{0,#{TabWidth - 1}}		# Indent < tab width			(#{ListMarkerAny})			# unordered or ordered ($1)			[ ]+						# At least one space		  )		  (?m:.+?)						# item content (include newlines)		  (?:			  \z						# Either EOF			|							#  or			  \n{2,}					# Blank line...			  (?=\S)					# ...followed by non-space			  (?![ ]*					# ...but not another item				(#{ListMarkerAny})			   [ ]+)		  )	  }x	### Transform Markdown-style lists in a copy of the specified +str+ and	### return it.	def transform_lists( str, rs )		@log.debug " Transforming lists at %p" % (str[0,100] + '...')		str.gsub( ListRegexp ) {|list|			@log.debug "  Found list %p" % list			bullet = $1			list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")			list.gsub!( /\n{2,}/, "\n\n\n" )			%{<%s>\n%s</%s>\n} % [				list_type,				transform_list_items( list, rs ),				list_type,			]		}	end	# Pattern for transforming list items	ListItemRegexp = %r{		(\n)?							# leading line = $1		(^[ ]*)							# leading whitespace = $2		(#{ListMarkerAny}) [ ]+			# list marker = $3		((?m:.+?)						# list item text   = $4		(\n{1,2}))		(?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+))	  }x	### Transform list items in a copy of the given +str+ and return it.	def transform_list_items( str, rs )		@log.debug " Transforming list items"		# Trim trailing blank lines		str = str.sub( /\n{2,}\z/, "\n" )		str.gsub( ListItemRegexp ) {|line|			@log.debug "  Found item line %p" % line			leading_line, item = $1, $4			if leading_line or /\n{2,}/.match( item )				@log.debug "   Found leading line or item has a blank"				item = apply_block_transforms( outdent(item), rs )			else				# Recursion for sub-lists				@log.debug "   Recursing for sublist"				item = transform_lists( outdent(item), rs ).chomp				item = apply_span_transforms( item, rs )			end			%{<li>%s</li>\n} % item		}	end	# Pattern for matching codeblocks	CodeBlockRegexp = %r{		(?:\n\n|\A)		(									# $1 = the code block		  (?:			(?:[ ]{#{TabWidth}} | \t)		# a tab or tab-width of spaces			.*\n+		  )+		)		(^[ ]{0,#{TabWidth - 1}}\S|\Z)		# Lookahead for non-space at											# line-start, or end of doc	  }x	### Transform Markdown-style codeblocks in a copy of the specified +str+ and	### return it.	def transform_code_blocks( str, rs )		@log.debug " Transforming code blocks"		str.gsub( CodeBlockRegexp ) {|block|			codeblock = $1			remainder = $2			# Generate the codeblock			%{\n\n<pre><code>%s\n</code></pre>\n\n%s} %				[ encode_code( outdent(codeblock), rs ).rstrip, remainder ]		}	end	# Pattern for matching Markdown blockquote blocks	BlockQuoteRegexp = %r{		  (?:			^[ ]*>[ ]?		# '>' at the start of a line			  .+\n			# rest of the first line

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?