htmlentitydefs.py
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 258 行 · 第 1/2 页
PY
258 行
"""HTML character entity references."""entitydefs = { 'AElig': '\306', # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 'Aacute': '\301', # latin capital letter A with acute, U+00C1 ISOlat1 'Acirc': '\302', # latin capital letter A with circumflex, U+00C2 ISOlat1 'Agrave': '\300', # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 'Alpha': 'Α', # greek capital letter alpha, U+0391 'Aring': '\305', # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 'Atilde': '\303', # latin capital letter A with tilde, U+00C3 ISOlat1 'Auml': '\304', # latin capital letter A with diaeresis, U+00C4 ISOlat1 'Beta': 'Β', # greek capital letter beta, U+0392 'Ccedil': '\307', # latin capital letter C with cedilla, U+00C7 ISOlat1 'Chi': 'Χ', # greek capital letter chi, U+03A7 'Dagger': '‡', # double dagger, U+2021 ISOpub 'Delta': 'Δ', # greek capital letter delta, U+0394 ISOgrk3 'ETH': '\320', # latin capital letter ETH, U+00D0 ISOlat1 'Eacute': '\311', # latin capital letter E with acute, U+00C9 ISOlat1 'Ecirc': '\312', # latin capital letter E with circumflex, U+00CA ISOlat1 'Egrave': '\310', # latin capital letter E with grave, U+00C8 ISOlat1 'Epsilon': 'Ε', # greek capital letter epsilon, U+0395 'Eta': 'Η', # greek capital letter eta, U+0397 'Euml': '\313', # latin capital letter E with diaeresis, U+00CB ISOlat1 'Gamma': 'Γ', # greek capital letter gamma, U+0393 ISOgrk3 'Iacute': '\315', # latin capital letter I with acute, U+00CD ISOlat1 'Icirc': '\316', # latin capital letter I with circumflex, U+00CE ISOlat1 'Igrave': '\314', # latin capital letter I with grave, U+00CC ISOlat1 'Iota': 'Ι', # greek capital letter iota, U+0399 'Iuml': '\317', # latin capital letter I with diaeresis, U+00CF ISOlat1 'Kappa': 'Κ', # greek capital letter kappa, U+039A 'Lambda': 'Λ', # greek capital letter lambda, U+039B ISOgrk3 'Mu': 'Μ', # greek capital letter mu, U+039C 'Ntilde': '\321', # latin capital letter N with tilde, U+00D1 ISOlat1 'Nu': 'Ν', # greek capital letter nu, U+039D 'OElig': 'Œ', # latin capital ligature OE, U+0152 ISOlat2 'Oacute': '\323', # latin capital letter O with acute, U+00D3 ISOlat1 'Ocirc': '\324', # latin capital letter O with circumflex, U+00D4 ISOlat1 'Ograve': '\322', # latin capital letter O with grave, U+00D2 ISOlat1 'Omega': 'Ω', # greek capital letter omega, U+03A9 ISOgrk3 'Omicron': 'Ο', # greek capital letter omicron, U+039F 'Oslash': '\330', # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 'Otilde': '\325', # latin capital letter O with tilde, U+00D5 ISOlat1 'Ouml': '\326', # latin capital letter O with diaeresis, U+00D6 ISOlat1 'Phi': 'Φ', # greek capital letter phi, U+03A6 ISOgrk3 'Pi': 'Π', # greek capital letter pi, U+03A0 ISOgrk3 'Prime': '″', # double prime = seconds = inches, U+2033 ISOtech 'Psi': 'Ψ', # greek capital letter psi, U+03A8 ISOgrk3 'Rho': 'Ρ', # greek capital letter rho, U+03A1 'Scaron': 'Š', # latin capital letter S with caron, U+0160 ISOlat2 'Sigma': 'Σ', # greek capital letter sigma, U+03A3 ISOgrk3 'THORN': '\336', # latin capital letter THORN, U+00DE ISOlat1 'Tau': 'Τ', # greek capital letter tau, U+03A4 'Theta': 'Θ', # greek capital letter theta, U+0398 ISOgrk3 'Uacute': '\332', # latin capital letter U with acute, U+00DA ISOlat1 'Ucirc': '\333', # latin capital letter U with circumflex, U+00DB ISOlat1 'Ugrave': '\331', # latin capital letter U with grave, U+00D9 ISOlat1 'Upsilon': 'Υ', # greek capital letter upsilon, U+03A5 ISOgrk3 'Uuml': '\334', # latin capital letter U with diaeresis, U+00DC ISOlat1 'Xi': 'Ξ', # greek capital letter xi, U+039E ISOgrk3 'Yacute': '\335', # latin capital letter Y with acute, U+00DD ISOlat1 'Yuml': 'Ÿ', # latin capital letter Y with diaeresis, U+0178 ISOlat2 'Zeta': 'Ζ', # greek capital letter zeta, U+0396 'aacute': '\341', # latin small letter a with acute, U+00E1 ISOlat1 'acirc': '\342', # latin small letter a with circumflex, U+00E2 ISOlat1 'acute': '\264', # acute accent = spacing acute, U+00B4 ISOdia 'aelig': '\346', # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 'agrave': '\340', # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 'alefsym': 'ℵ', # alef symbol = first transfinite cardinal, U+2135 NEW 'alpha': 'α', # greek small letter alpha, U+03B1 ISOgrk3 'amp': '\46', # ampersand, U+0026 ISOnum 'and': '∧', # logical and = wedge, U+2227 ISOtech 'ang': '∠', # angle, U+2220 ISOamso 'aring': '\345', # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 'asymp': '≈', # almost equal to = asymptotic to, U+2248 ISOamsr 'atilde': '\343', # latin small letter a with tilde, U+00E3 ISOlat1 'auml': '\344', # latin small letter a with diaeresis, U+00E4 ISOlat1 'bdquo': '„', # double low-9 quotation mark, U+201E NEW 'beta': 'β', # greek small letter beta, U+03B2 ISOgrk3 'brvbar': '\246', # broken bar = broken vertical bar, U+00A6 ISOnum 'bull': '•', # bullet = black small circle, U+2022 ISOpub 'cap': '∩', # intersection = cap, U+2229 ISOtech 'ccedil': '\347', # latin small letter c with cedilla, U+00E7 ISOlat1 'cedil': '\270', # cedilla = spacing cedilla, U+00B8 ISOdia 'cent': '\242', # cent sign, U+00A2 ISOnum 'chi': 'χ', # greek small letter chi, U+03C7 ISOgrk3 'circ': 'ˆ', # modifier letter circumflex accent, U+02C6 ISOpub 'clubs': '♣', # black club suit = shamrock, U+2663 ISOpub 'cong': '≅', # approximately equal to, U+2245 ISOtech 'copy': '\251', # copyright sign, U+00A9 ISOnum 'crarr': '↵', # downwards arrow with corner leftwards = carriage return, U+21B5 NEW 'cup': '∪', # union = cup, U+222A ISOtech 'curren': '\244', # currency sign, U+00A4 ISOnum 'dArr': '⇓', # downwards double arrow, U+21D3 ISOamsa 'dagger': '†', # dagger, U+2020 ISOpub 'darr': '↓', # downwards arrow, U+2193 ISOnum 'deg': '\260', # degree sign, U+00B0 ISOnum 'delta': 'δ', # greek small letter delta, U+03B4 ISOgrk3 'diams': '♦', # black diamond suit, U+2666 ISOpub 'divide': '\367', # division sign, U+00F7 ISOnum 'eacute': '\351', # latin small letter e with acute, U+00E9 ISOlat1 'ecirc': '\352', # latin small letter e with circumflex, U+00EA ISOlat1 'egrave': '\350', # latin small letter e with grave, U+00E8 ISOlat1 'empty': '∅', # empty set = null set = diameter, U+2205 ISOamso 'emsp': ' ', # em space, U+2003 ISOpub 'ensp': ' ', # en space, U+2002 ISOpub 'epsilon': 'ε', # greek small letter epsilon, U+03B5 ISOgrk3 'equiv': '≡', # identical to, U+2261 ISOtech 'eta': 'η', # greek small letter eta, U+03B7 ISOgrk3 'eth': '\360', # latin small letter eth, U+00F0 ISOlat1 'euml': '\353', # latin small letter e with diaeresis, U+00EB ISOlat1 'euro': '€', # euro sign, U+20AC NEW 'exist': '∃', # there exists, U+2203 ISOtech 'fnof': 'ƒ', # latin small f with hook = function = florin, U+0192 ISOtech 'forall': '∀', # for all, U+2200 ISOtech 'frac12': '\275', # vulgar fraction one half = fraction one half, U+00BD ISOnum 'frac14': '\274', # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum 'frac34': '\276', # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum 'frasl': '⁄', # fraction slash, U+2044 NEW 'gamma': 'γ', # greek small letter gamma, U+03B3 ISOgrk3 'ge': '≥', # greater-than or equal to, U+2265 ISOtech 'gt': '\76', # greater-than sign, U+003E ISOnum 'hArr': '⇔', # left right double arrow, U+21D4 ISOamsa 'harr': '↔', # left right arrow, U+2194 ISOamsa 'hearts': '♥', # black heart suit = valentine, U+2665 ISOpub 'hellip': '…', # horizontal ellipsis = three dot leader, U+2026 ISOpub 'iacute': '\355', # latin small letter i with acute, U+00ED ISOlat1 'icirc': '\356', # latin small letter i with circumflex, U+00EE ISOlat1 'iexcl': '\241', # inverted exclamation mark, U+00A1 ISOnum 'igrave': '\354', # latin small letter i with grave, U+00EC ISOlat1
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?