⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regular-expressions.zc

📁 实现树形结构
💻 ZC
📖 第 1 页 / 共 2 页
字号:
public func test with groups (m: regex, string: string)

	reset groups (m)

	def stack: [stack size] local scan
	def top = stack + stack size
	def sp = top

	def final = final (m)
	def s = initial (m)
	def p = string
	repeat
		if group start (s) <> 0
			set group start (m, s, p)
		end
		if group stop (s) <> 0
			set group stop (m, s, p)
		end
		if s == final
			return p
		end
		
		def c = p[]

		each (rules (s)) ? r
			equ rule = r : rule
			if epsilon (rule)
				if sp == stack
					return nil // stack overflow
				end
				sp -= 1
				s (sp []) = state (rule)
				p (sp []) = p
			elsif match (rule, c)
				if sp == stack
					return nil // stack overflow
				end
				sp -= 1
				s (sp []) = state (rule)
				p (sp []) = p + 1
			end
		end
		
		// pop next
		if sp == top
			break
		end
		
		s = s (sp [])
		p = p (sp [])
		sp += 1
	end
	
	return nil

end
//[c]
//[c]Structures:
//[c]
struct scan
	s: state
	p: string
end
//[c]
//[c]Subfunctions:
//[of]:set group start
func set group start (m: regex, s: state, p: string)

	def i = 0
	def mask = group start (s)
	while mask <> 0
		if (mask & 1) <> 0
			group starts (m) [i] = p
		end
		i += 1
		mask >>= 1
	end

end
//[cf]
//[of]:set group stop
func set group stop (m: regex, s: state, p: string)

	def i = 0
	def mask = group stop (s)
	while mask <> 0
		if (mask & 1) <> 0
			group stops (m) [i] = p
		end
		i += 1
		mask >>= 1
	end

end
//[cf]
//[of]:reset groups
//[c]
func reset groups (m: regex)
	
	def i = 0
	def n = number of groups (m)
	while i < n
		group starts (m) [i] = nil
		group stops (m) [i] = nil
		i += 1
	end

end
//[cf]
//[cf]
//[cf]
//[of]:accessing
//[of]:number of groups
//[c]Returns the number of groups
//[c]
//[c]The number of groups is known after compilation.
//[c]
public func number of groups (m: regex)
	return groups (m)
end
//[cf]
//[of]:append (index, s)
//[c]Appends the content of the i-th group to the string buffer
//[c]
//[c]The method can be invoked after the first call to match().
//[c]This method is valid as long as the source string is not modified.
//[c]
public func append (m: regex, index: int, s: string buffer)

	def start = group starts (m) [index] : string
	def stop = group stops (m) [index] : string
	if not nil (start) && not nil (stop)
		append (s, start, stop - start)
	end

end
//[cf]
//[of]:size (index)
//[c]Returns the size of the i-th group
//[c]
public func size (m: regex, index: int)

	def start = group starts (m) [index] : string
	def stop = group stops (m) [index] : string
	if not nil (start) && not nil (stop)
		return stop - start
	end

	return 0

end
//[cf]
//[cf]
//[of]:testing
//[of]:must match beginning
//[c]Returns true if the expression must starts at the beginning of the line
//[c]
//[c]This is not handled by this component, this is the responsibility
//[c]of the caller to handle begin and end matches because the end
//[c]of line can be \n as well as a nul char and the beginning of the 
//[c]line should be optimized by the caller (avoid scanning all the line).
//[c]
public equ must match beginning (m: regex) = match beginning (m)
//[cf]
//[of]:must match ending
//[c]Returns true if the expression must ends at the end of the line
//[c]
//[c]This is not handled by this component, this is the responsibility
//[c]of the caller to handle begin and end matches because the end
//[c]of line can be \n as well as a nul char and the beginning of the 
//[c]line should be optimized by the caller (avoid scanning all the line).
//[c]
public equ must match ending (m: regex) = match ending (m)
//[cf]
//[of]:is empty
//[c]Returns true if the regular expression matches an empty string
//[c]
public func is empty (m: regex)
	return not nil (match (m, empty string))
end
//[cf]
//[cf]
//[c]
//[of]:private
//[of]:regex
//[of]:delete all
//[c]Deletes all states, nodes and rules
//[c]
func delete all (m: regex)

	each (states (m)) ? s
		delete (s : state)
	end
	
	each (nodes (m)) ? n
		delete (n : node)
	end

	each (rules (m)) ? r
		delete (r : rule)
	end

end
//[cf]
//[cf]
//[of]:state
//[of]:definition
struct state
	rules: local collection
	group start: int
	group stop: int
	mark: bool
end	
//[cf]
//[c]
//[of]:new state 
//[c]
func new state (r: regex)
	def s = allocate memory (sizeof local state): state
	initialize (rules (s))
	group start (s) = 0
	group stop (s) = 0
	mark (s) = false
	add (states (r), s)
	return s
end
//[cf]
//[of]:delete
//[c]
func delete (s: state)
	free memory (s)
end
//[cf]
//[c]
//[of]:add rule (s1, s2, rule)
func add rule (s1: state, s2: state, rule: rule)
	add (rules (s1), rule)
	state (rule) = s2
end
//[cf]
//[of]:add epsilon (s1, s2)
func add epsilon (m: regex, s1: state, s2: state)
	def rule = new epsilon rule (m)
	add (rules (s1), rule)
	state (rule) = s2
end
//[cf]
//[cf]
//[of]:rule
//[of]:definition
//[c]
struct rule: local element
	epsilon: bool
	chars: char map
	state: state
end
//[cf]
//[c]
//[of]:new rule
//[c]
func new rule (m: regex)
	def r = allocate memory (sizeof local rule): rule
	add (rules (m), r)
	return r
end
//[cf]
//[of]:new epsilon rule
//[c]
func new epsilon rule (m: regex)

	def r = new rule (m)
	epsilon (r) = true
	return r

end
//[cf]
//[of]:new char rule
//[c]
func new char rule (m: regex)

	def r = new rule (m)
	epsilon (r) = false
	initialize (chars (r))
	return r

end
//[cf]
//[of]:new any rule
//[c]
func new any rule (m: regex)

	def r = new char rule (m)
	invert (r)
	return r

end
//[cf]
//[of]:new char rule (c)
//[c]
func new char rule (m: regex, c: char)

	def r = new char rule (m)
	set (r, c, is case sensitive (m))
	return r

end
//[cf]
//[of]:delete
//[c]
func delete (rule: rule)
	free memory (rule)
end
//[cf]
//[c]
//[of]:set (c)
//[c]
func set (r: rule, c: char, is case sensitive: bool)

	chars (r) [c:byte:int] = 1:byte

	if ~ is case sensitive
		if c >= $a && c <= $z
			chars (r) [(c - $a + $A):byte:int] = 1:byte
		elsif c >= $A && c <= $Z
			chars (r) [(c - $A + $a):byte:int] = 1:byte
		end
	end

end
//[cf]
//[of]:set (c1, c2)
//[c]
func set (r: rule, c1: char, c2: char, is case sensitive: bool)

	def c = c1
	while c <= c2
		set (r, c, is case sensitive)
		c += \1
	end

end
//[cf]
//[of]:invert
//[c]
func invert (r: rule)

	def i = 1
	while i < 256
		chars (r) [i] = 1:byte - chars (r) [i]
		i += 1
	end

end
//[cf]
//[c]
//[of]:match (c)
//[c]
equ match (r: rule, c: char) = chars (r) [c:byte:int] <> 0:byte
//[cf]
//[cf]
//[c]
//[of]:node
//[of]:definition
struct node : local element
	type: node type
end
//[cf]
//[of]:constants
enum node type
	nt rules
	nt or
	nt zero or many
	nt one or many
	nt zero or one
end
//[cf]
//[of]:delete
func delete (m: node)
	if type (m) == nt or
		release (sequences (m : or node))
	end
	
	free memory (m)
end
//[cf]
//[cf]
//[of]:rule node
//[of]:definition
struct rule node : local node
	rule: rule
end
//[cf]
//[of]:create
func new rule node (m: regex, rule: rule)

	def n = allocate memory (sizeof local rule node) : rule node
	type (n) = nt rules
	rule (n) = rule

	add (nodes (m), n)

	return n

end
//[cf]
//[cf]
//[of]:or node
//[of]:definition
struct or node : local node
	sequences: local vector
end
//[cf]
//[of]:create
func new or node (m: regex)

	def n = allocate memory (sizeof local or node) : or node
	type (n) = nt or
	initialize (sequences (n))

	add (nodes (m), n)

	return n

end
//[cf]
//[cf]
//[of]:rep node
//[of]:definition
struct rep node : local node
	node: node
end
//[cf]
//[of]:create
func new repeat node (m: regex, node: node, type: node type)

	def n = allocate memory (sizeof local rep node) : rep node
	type (n) = type
	node (n) = node

	add (nodes (m), n)

	return n

end
//[cf]
//[cf]
//[c]
//[of]:utility functions
//[of]:initialize (char map)
func initialize (m: []byte)

	def p = m
	def i = 0
	while i < 256
		p [i] = 0:byte
		i += 1
	end

end
//[cf]
//[of]:get escape char (c)
//[c]Returns the char for an escape sequence \x
//[c]
func get escape char (x: char)
	
	def c: char

	switch x
	case nul char
		c = $\
	case $a
		c = \a
	case $b
		c = \b
	case $n
		c = \n
	case $f
		c = \f
	case $r
		c = \r
	case $t
		c = \t
	case $v
		c = \v
	else
		c = x
	end

	return c

end
//[cf]
//[cf]
//[cf]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -