old_str_util.py

来自「Urwid is a Python library for making tex」· Python 代码 · 共 342 行
342 行
#!/usr/bin/python## Urwid unicode character processing tables#    Copyright (C) 2004-2006  Ian Ward##    This library is free software; you can redistribute it and/or#    modify it under the terms of the GNU Lesser General Public#    License as published by the Free Software Foundation; either#    version 2.1 of the License, or (at your option) any later version.##    This library is distributed in the hope that it will be useful,#    but WITHOUT ANY WARRANTY; without even the implied warranty of#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU#    Lesser General Public License for more details.##    You should have received a copy of the GNU Lesser General Public#    License along with this library; if not, write to the Free Software#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA## Urwid web site: http://excess.org/urwid/import reSAFE_ASCII_RE = re.compile("^[ -~]*$")_byte_encoding = None# GENERATED DATA# generated from # http://www.unicode.org/Public/4.0-Update/EastAsianWidth-4.0.0.txtwidths = [	(126, 1),	(159, 0),	(687, 1),	(710, 0),	(711, 1),	(727, 0),	(733, 1),	(879, 0),	(1154, 1),	(1161, 0),	(4347, 1),	(4447, 2),	(7467, 1),	(7521, 0),	(8369, 1),	(8426, 0),	(9000, 1),	(9002, 2),	(11021, 1),	(12350, 2),	(12351, 1),	(12438, 2),	(12442, 0),	(19893, 2),	(19967, 1),	(55203, 2),	(63743, 1),	(64106, 2),	(65039, 1),	(65059, 0),	(65131, 2),	(65279, 1),	(65376, 2),	(65500, 1),	(65510, 2),	(120831, 1),	(262141, 2),	(1114109, 1),]# ACCESSOR FUNCTIONSdef get_width( o ):	"""Return the screen column width for unicode ordinal o."""	global widths	if o == 0xe or o == 0xf:		return 0	for num, wid in widths:		if o <= num:			return wid	return 1def decode_one( text, pos ):	"""Return (ordinal at pos, next position) for UTF-8 encoded text."""	b1 = ord(text[pos])	if not b1 & 0x80: 		return b1, pos+1	error = ord("?"), pos+1	lt = len(text)	lt = lt-pos	if lt < 2:		return error	if b1 & 0xe0 == 0xc0:		b2 = ord(text[pos+1])		if b2 & 0xc0 != 0x80:			return error		o = ((b1&0x1f)<<6)|(b2&0x3f)		if o < 0x80:			return error		return o, pos+2	if lt < 3:		return error	if b1 & 0xf0 == 0xe0:		b2 = ord(text[pos+1])		if b2 & 0xc0 != 0x80:			return error		b3 = ord(text[pos+2])		if b3 & 0xc0 != 0x80:			return error		o = ((b1&0x0f)<<12)|((b2&0x3f)<<6)|(b3&0x3f)		if o < 0x800:			return error		return o, pos+3	if lt < 4:		return error	if b1 & 0xf8 == 0xf0:		b2 = ord(text[pos+1])		if b2 & 0xc0 != 0x80:			return error		b3 = ord(text[pos+2])		if b3 & 0xc0 != 0x80:			return error		b4 = ord(text[pos+2])		if b4 & 0xc0 != 0x80:			return error		o = ((b1&0x07)<<18)|((b2&0x3f)<<12)|((b3&0x3f)<<6)|(b4&0x3f)		if o < 0x10000:			return error		return o, pos+4	return errordef decode_one_right( text, pos):	"""	Return (ordinal at pos, next position) for UTF-8 encoded text.	pos is assumed to be on the trailing byte of a utf-8 sequence."""	error = ord("?"), pos-1	p = pos	while p >= 0:		if ord(text[p])&0xc0 != 0x80:			o, next = decode_one( text, p )			return o, p-1		p -=1		if p == p-4:			return errordef set_byte_encoding(enc):	assert enc in ('utf8', 'narrow', 'wide')	global _byte_encoding	_byte_encoding = encdef get_byte_encoding():	return _byte_encodingdef calc_text_pos( text, start_offs, end_offs, pref_col ):	"""	Calculate the closest position to the screen column pref_col in text	where start_offs is the offset into text assumed to be screen column 0	and end_offs is the end of the range to search.		Returns (position, actual_col).	"""	assert start_offs <= end_offs, `start_offs, end_offs`	utfs = (type(text) == type("") and _byte_encoding == "utf8")	if type(text) == type(u"") or utfs:		i = start_offs		sc = 0		n = 1 # number to advance by		while i < end_offs:			if utfs:				o, n = decode_one(text, i)			else:				o = ord(text[i])				n = i + 1			w = get_width(o)			if w+sc > pref_col: 				return i, sc			i = n			sc += w		return i, sc	assert type(text) == type(""), `text`	# "wide" and "narrow"	i = start_offs+pref_col	if i >= end_offs:		return end_offs, end_offs-start_offs	if _byte_encoding == "wide":		if within_double_byte( text, start_offs, i ) == 2:			i -= 1	return i, i-start_offsdef calc_width( text, start_offs, end_offs ):	"""	Return the screen column width of text between start_offs and end_offs.	"""	assert start_offs <= end_offs, `start_offs, end_offs`	utfs = (type(text) == type("") and _byte_encoding == "utf8")	if (type(text) == type(u"") or utfs) and not SAFE_ASCII_RE.match(text):		i = start_offs		sc = 0		n = 1 # number to advance by		while i < end_offs:			if utfs:				o, n = decode_one(text, i)			else:				o = ord(text[i])				n = i + 1			w = get_width(o)			i = n			sc += w		return sc	# "wide" and "narrow"	return end_offs - start_offs	def is_wide_char( text, offs ):	"""	Test if the character at offs within text is wide.	"""	if type(text) == type(u""):		o = ord(text[offs])		return get_width(o) == 2	assert type(text) == type("")	if _byte_encoding == "utf8":		o, n = decode_one(text, offs)		return get_width(o) == 2	if _byte_encoding == "wide":		return within_double_byte(text, offs, offs) == 1	return Falsedef move_prev_char( text, start_offs, end_offs ):	"""	Return the position of the character before end_offs.	"""	assert start_offs < end_offs	if type(text) == type(u""):		return end_offs-1	assert type(text) == type("")	if _byte_encoding == "utf8":		o = end_offs-1		while ord(text[o])&0xc0 == 0x80:			o -= 1		return o	if _byte_encoding == "wide" and within_double_byte( text,		start_offs, end_offs-1) == 2:		return end_offs-2	return end_offs-1def move_next_char( text, start_offs, end_offs ):	"""	Return the position of the character after start_offs.	"""	assert start_offs < end_offs	if type(text) == type(u""):		return start_offs+1	assert type(text) == type("")	if _byte_encoding == "utf8":		o = start_offs+1		while o<end_offs and ord(text[o])&0xc0 == 0x80:			o += 1		return o	if _byte_encoding == "wide" and within_double_byte(text, 		start_offs, start_offs) == 1:		return start_offs +2	return start_offs+1def within_double_byte(str, line_start, pos):	"""Return whether pos is within a double-byte encoded character.		str -- string in question	line_start -- offset of beginning of line (< pos)	pos -- offset in question	Return values:	0 -- not within dbe char, or double_byte_encoding == False	1 -- pos is on the 1st half of a dbe char	2 -- pos is on the 2nd half og a dbe char	"""	v = ord(str[pos])	if v >= 0x40 and v < 0x7f:		# might be second half of big5, uhc or gbk encoding		if pos == line_start: return 0				if ord(str[pos-1]) >= 0x81:			if within_double_byte(str, line_start, pos-1) == 1:				return 2		return 0	if v < 0x80: return 0	i = pos -1	while i >= line_start:		if ord(str[i]) < 0x80:			break		i -= 1		if (pos - i) & 1:		return 1	return 2# TABLE GENERATION CODEdef process_east_asian_width():	import sys	out = []	last = None	for line in sys.stdin.readlines():		if line[:1] == "#": continue		line = line.strip()		hex,rest = line.split(";",1)		wid,rest = rest.split(" # ",1)		word1 = rest.split(" ",1)[0]		if "." in hex:			hex = hex.split("..")[1]		num = int(hex, 16)		if word1 in ("COMBINING","MODIFIER","<control>"):			l = 0		elif wid in ("W", "F"):			l = 2		else:			l = 1		if last is None:			out.append((0, l))			last = l				if last == l:			out[-1] = (num, l)		else:			out.append( (num, l) )			last = l	print "widths = ["	for o in out[1:]:  # treat control characters same as ascii		print "\t"+`o`+","	print "]"		if __name__ == "__main__":	process_east_asian_width()
old_str_util.py - 源码说明

本页面展示了「Urwid is a Python library for making text console applications. It has many features including fluid」中的 old_str_util.py 源码文件，采用 Python 编程语言编写，共 342 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与applications相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?