⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regular-expressions.zc

📁 实现树形结构
💻 ZC
📖 第 1 页 / 共 2 页
字号:
//[of]:description
//[c]Tests if a string matches with a regular expression.
//[cf]
//[of]:imports
import "base/types"
import "base/memory-allocator"
import "collection/collection"
import "collection/vector"
import "text/string-buffer"
import "text/string"
//[cf]
//[of]:definitions
//[c]
typedef char map = [256] byte
equ max groups = 32
//[c]
public struct regex

	private 
		nodes: local vector
		states: local vector
		rules: local vector

		groups: int
		group mask: dword
		group starts: [max groups] string
		group stops: [max groups] string

		initial: state
		final: state

		match beginning: bool
		match ending: bool

		firsts: char map

		// for compilation only
		is case sensitive: bool

end
//[cf]
//[of]:error codes
public enum regex code
	regex ok
	regex missing cpar
	regex empty
	regex invalid range
end
//[c]
//[cf]
//[c]
//[of]:initialize - release
//[of]:initialize
//[c]
public func initialize (m: regex)

	initialize (states (m), 256)
	initialize (nodes (m), 256)
	initialize (rules (m), 256)
	groups (m) = 0
	match beginning (m) = false
	match ending (m) = false

end
//[cf]
//[of]:release
//[c]
public func release (m: regex)

	delete all (m)
	release (states (m))
	release (nodes (m))
	release (rules (m))

end
//[cf]
//[cf]
//[of]:compiling
//[of]:compile (source,case sensitive)
//[c]Compile a regular expression
//[c]
public func compile (m: regex, s: string, is case sensitive: bool)

	is case sensitive (m) = is case sensitive

	delete all (m)
	remove all (nodes (m))
	remove all (states (m))
	remove all (rules (m))
	groups (m) = 0
	group mask (m) = 1
	match beginning (m) = false
	match ending (m) = false
	
	def p = s
	if p[] == $^
		match beginning (m) = true
		p += 1
	end

	def parser: local regex parser
	source (parser) = p

	// 'firsts' is cleared before returning so an compile
	// error will make fail any search without an extra
	// test. So the object is valid and the match function
	// can be used even if the compilation fails.
	initialize (firsts (m))
	
	def node = new or node (m)
	def code = get union (m, parser, node)
	if code <> regex ok
		return code
	end

	def s1 = new state (m)
	def s2 = new state (m)
	expand (m, node, s1, s2)
	initial (m) = s1
	final (m) = s2

	// compute firsts
	scan (m, s1)

	return code

end
//[c]
//[c]Structures:
//[c]
struct regex parser
	source: string
	node: node
end
//[c]
//[c]Subfunctions:
//[of]:scan
//[c]Scan all chars accessibles from this state: all epsilon transitions
//[c]are followed recursively.
//[c]
func scan (m: regex, s: state) : void
	
	if mark (s)
		return
	end
	mark (s) = true

	each (rules (s)) ? r
		equ rule = r: rule
		if epsilon (rule)
			scan (m, state (rule))
		else
			def i = 0
			def p = chars (rule)
			def q = firsts (m)
			while i < 256
				q [i] |= p [i]
				i += 1
			end
		end			
	end
	
end
//[cf]
//[of]:expand
//[c]
func expand (m: regex, node: node, s1: state, s2: state)  : void

	switch (type (node))

	case nt rules
		equ rn = node : rule node
		add rule (s1, s2, rule (rn))

	case nt or
		equ on = node : or node

		def mask = group mask (m)
		group start (s1) |= mask
		group stop (s2) |= mask
		group mask (m) <<= 1
		groups (m) += 1
		
		each (sequences (on)) ? ns
			equ nodes = ns : node
		
			def ss1 = s1
			each (nodes) ? n
				equ subnode = n : node
				def ss2 = is last (subnode) -> s2, new state (m)
				expand (m, subnode, ss1, ss2)
				ss1 = ss2
			end
		end

	case nt zero or many
		equ rn = node : rep node

		def t1 = new state (m)
		def t2 = new state (m)
		expand (m, node (rn), t1, t2)
	
		add epsilon (m, s1, s2)
		add epsilon (m, s1, t1)
		add epsilon (m, t2, s2)
		add epsilon (m, t2, t1)
		
	case nt one or many
		equ rn = node : rep node

		def t1 = new state (m)
		def t2 = new state (m)
		expand (m, node (rn), t1, t2)
	
		add epsilon (m, s1, t1)
		add epsilon (m, t2, s2)
		add epsilon (m, t2, t1)
		
	case nt zero or one
		equ rn = node : rep node
		expand (m, node (rn), s1, s2)
		add epsilon (m, s1, s2)
		
	end

end
//[cf]
//[of]:get union
func get union (m: regex, parser: regex parser, or node: or node) : regex code

	def sequences = sequences (or node)

	repeat
		def nodes: local collection
		def code = get sequence (m, parser, nodes)
		if code <> regex ok
			return code
		end
		def node = first (nodes)
		if is nil (node)
			return regex empty
		end
		add (sequences, node)
		
		def p = source (parser)
		if p[] <> $|
			break
		end
		source (parser) = p + 1
	end
		
	return regex ok

end
//[cf]
//[of]:get sequence
//[c]
func get sequence (m: regex, parser: regex parser, nodes: collection) : regex code

	initialize (nodes)

	repeat
		def code = get node (m, parser)
		if code <> regex ok
			return code
		end
		def node = node (parser)
		if is nil (node)
			break
		end
		add (nodes, node)
	end		

	return regex ok

end
//[cf]
//[of]:get node
func get node (m: regex, parser: regex parser)
	
	def p = source (parser)
	def node = nil : node
	def c = p[]
	switch c
//[of]:	case nul char, ), |
	case nul char, $), $|
		// empty
//[cf]
//[of]:	case $
	case $$
		p += 1
		if is nul (p[])
			match ending (m) = true
		else
			node = new char node (m, c)
		end
//[cf]
//[of]:	case (
	case $(
		def or node = new or node (m)
		source (parser) = p+1
		def code = get union (m, parser, or node)
		if code <> regex ok
			return code
		end
		p = source (parser)
		if p[] <> $)
			return regex missing cpar
		end
		
		node = or node
		p += 1 
//[cf]
//[of]:	case [
	case $[
		def rule = new char rule (m)
		def rule node = new rule node (m, rule)
		p += 1
		p = get ranges (m, rule, p)
		if is nil (p) || p[] <> $]
			source (parser) = p
			return regex invalid range
		end
		node = rule node
		p += 1
//[cf]
//[of]:	case \\
	case $\
		p += 1
		c = get escape char (p[])
		node = new char node (m, c)
		p += 1

//[cf]
//[of]:	case .
	case $.
		node = new rule node (m, new any rule (m))
		p += 1
//[cf]
//[of]:	else
	else
		node = new char node (m, c)
		p += 1
//[cf]
	end

	// Check repeaters
	if not nil (node)
		c = p[]
		if c == $*
			p += 1
			node = new repeat node (m, node, nt zero or many)
		elsif c == $+
			p += 1
			node = new repeat node (m, node, nt one or many)
		elsif c == $?
			p += 1
			node = new repeat node (m, node, nt zero or one)
		end
	end

	source (parser) = p
	node (parser) = node
	
	return regex ok

end
//[c]
//[of]:get ranges
func get ranges (m: regex, rule: rule, s: string)

	def p = s
	
	def invert = false
	if p[] == $^
		invert = true
		p += 1
	end
	
	repeat
		def c = p []
		if is nul (c) || c == $]
			break
		end
		p += 1
		def d = p []
		if c == $\ && d <> nul char
			c = get escape char (d)
			p += 1
			d = p []
		end
		
		if d <> $- || p [1] == $]
			set (rule, c, is case sensitive (m))
		else
			p += 1
			d = p []
			if d == $\ && p [1] <> nul char
				d = get escape char (d)
				p += 1
			end
			if d <> nul char
				set (rule, c, d, is case sensitive (m))
				p += 1
			end
		end
	end

	if invert
		invert (rule)
	end

	return p

end
//[cf]
//[of]:new char node
//[c]
func new char node (m: regex, c: char)
	return new rule node (m, new char rule (m, c))
end
//[cf]
//[cf]
//[cf]
//[cf]
//[of]:matching
//[of]:match (s)
//[c]Returns the end position or nil if no match
//[c]
public func match (m: regex, string: string)

	// quick test
	if firsts (m) [string[0]:byte:int] == 0:byte
		return nil
	end

	// test without group (faster)
	if is nil (test without group (m, string))
		return nil
	end

	// test with groups (slower)	
	return test with groups (m, string)

end
//[c]
equ stack size = 1024
//[c]
public func test without group (m: regex, string: string)

	def stack: [stack size] local scan
	def top = stack + stack size
	def sp = top

	def final = final (m)
	def s = initial (m)
	def p = string
	repeat
		if s == final
			return p
		end
		def c = p[]
		each (rules (s)) ? r
			equ rule = r : rule
			if epsilon (rule)
				if sp == stack
					return nil // stack overflow
				end
				sp -= 1
				s (sp []) = state (rule)
				p (sp []) = p
			elsif match (rule, c)
				if sp == stack
					return nil // stack overflow
				end
				sp -= 1
				s (sp []) = state (rule)
				p (sp []) = p + 1
			end
		end
		
		// pop next
		if sp == top
			break
		end
		
		s = s (sp [])
		p = p (sp [])
		sp += 1
	end
	
	return nil

end
//[c]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -