category.cs

来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 638 行 · 第 1/2 页

CS
638
字号
//// assembly:	System// namespace:	System.Text.RegularExpressions// file:	category.cs//// author:	Dan Lewis (dlewis@gmx.co.uk)// 		(c) 2002using System;using System.Globalization;namespace System.Text.RegularExpressions {	enum Category : ushort {		None,		// canonical classes			Any,			// any character except newline		.		AnySingleline,		// any character			. (s option)		Word,			// any word character			\w		Digit,			// any digit character			\d		WhiteSpace,		// any whitespace character		\s				// ECMAScript classes		EcmaAny,		EcmaAnySingleline,		EcmaWord,		// [a-zA-Z_0-9]		EcmaDigit,		// [0-9]		EcmaWhiteSpace,		// [ \f\n\r\t\v]		// unicode categories				UnicodeL,		// Letter		UnicodeM,		// Mark		UnicodeN,		// Number		UnicodeZ,		// Separator		UnicodeP,		// Punctuation		UnicodeS,		// Symbol		UnicodeC,		// Other		UnicodeLu,		// UppercaseLetter		UnicodeLl,		// LowercaseLetter		UnicodeLt,		// TitlecaseLetter		UnicodeLm,		// ModifierLetter		UnicodeLo,		// OtherLetter		UnicodeMn,		// NonspacingMark		UnicodeMe,		// EnclosingMark		UnicodeMc,		// SpacingMark		UnicodeNd,		// DecimalNumber		UnicodeNl,		// LetterNumber		UnicodeNo,		// OtherNumber		UnicodeZs,		// SpaceSeparator		UnicodeZl,		// LineSeparator		UnicodeZp,		// ParagraphSeparator		UnicodePd,		// DashPunctuation		UnicodePs,		// OpenPunctuation		UnicodePi,		// InitialPunctuation		UnicodePe,		// ClosePunctuation		UnicodePf,		// FinalPunctuation		UnicodePc,		// ConnectorPunctuation		UnicodePo,		// OtherPunctuation		UnicodeSm,		// MathSymbol		UnicodeSc,		// CurrencySymbol		UnicodeSk,		// ModifierSymbol		UnicodeSo,		// OtherSymbol		UnicodeCc,		// Control		UnicodeCf,		// Format		UnicodeCo,		// PrivateUse		UnicodeCs,		// Surrogate		UnicodeCn,		// Unassigned		// unicode block ranges		// notes: the categories marked with a star are valid unicode block ranges,		// but don't seem to be accepted by the MS parser using the /p{...} format.		// any ideas?		UnicodeBasicLatin,		UnicodeLatin1Supplement,			// *		UnicodeLatinExtendedA,				// *		UnicodeLatinExtendedB,				// *		UnicodeIPAExtensions,		UnicodeSpacingModifierLetters,		UnicodeCombiningDiacriticalMarks,		UnicodeGreek,		UnicodeCyrillic,		UnicodeArmenian,		UnicodeHebrew,		UnicodeArabic,		UnicodeSyriac,		UnicodeThaana,		UnicodeDevanagari,		UnicodeBengali,		UnicodeGurmukhi,		UnicodeGujarati,		UnicodeOriya,		UnicodeTamil,		UnicodeTelugu,		UnicodeKannada,		UnicodeMalayalam,		UnicodeSinhala,		UnicodeThai,		UnicodeLao,		UnicodeTibetan,		UnicodeMyanmar,		UnicodeGeorgian,		UnicodeHangulJamo,		UnicodeEthiopic,		UnicodeCherokee,		UnicodeUnifiedCanadianAboriginalSyllabics,		UnicodeOgham,		UnicodeRunic,		UnicodeKhmer,		UnicodeMongolian,		UnicodeLatinExtendedAdditional,		UnicodeGreekExtended,		UnicodeGeneralPunctuation,		UnicodeSuperscriptsandSubscripts,		UnicodeCurrencySymbols,		UnicodeCombiningMarksforSymbols,		UnicodeLetterlikeSymbols,		UnicodeNumberForms,		UnicodeArrows,		UnicodeMathematicalOperators,		UnicodeMiscellaneousTechnical,		UnicodeControlPictures,		UnicodeOpticalCharacterRecognition,		UnicodeEnclosedAlphanumerics,		UnicodeBoxDrawing,		UnicodeBlockElements,		UnicodeGeometricShapes,		UnicodeMiscellaneousSymbols,		UnicodeDingbats,		UnicodeBraillePatterns,		UnicodeCJKRadicalsSupplement,		UnicodeKangxiRadicals,		UnicodeIdeographicDescriptionCharacters,		UnicodeCJKSymbolsandPunctuation,		UnicodeHiragana,		UnicodeKatakana,		UnicodeBopomofo,		UnicodeHangulCompatibilityJamo,		UnicodeKanbun,		UnicodeBopomofoExtended,		UnicodeEnclosedCJKLettersandMonths,		UnicodeCJKCompatibility,		UnicodeCJKUnifiedIdeographsExtensionA,		UnicodeCJKUnifiedIdeographs,		UnicodeYiSyllables,		UnicodeYiRadicals,		UnicodeHangulSyllables,		UnicodeHighSurrogates,		UnicodeHighPrivateUseSurrogates,		UnicodeLowSurrogates,		UnicodePrivateUse,		UnicodeCJKCompatibilityIdeographs,		UnicodeAlphabeticPresentationForms,		UnicodeArabicPresentationFormsA,		// *		UnicodeCombiningHalfMarks,		UnicodeCJKCompatibilityForms,		UnicodeSmallFormVariants,		UnicodeArabicPresentationFormsB,		// *		UnicodeSpecials,		UnicodeHalfwidthandFullwidthForms,				UnicodeOldItalic,		UnicodeGothic,		UnicodeDeseret,		UnicodeByzantineMusicalSymbols,		UnicodeMusicalSymbols,		UnicodeMathematicalAlphanumericSymbols,		UnicodeCJKUnifiedIdeographsExtensionB,		UnicodeCJKCompatibilityIdeographsSupplement,		UnicodeTags	}	class CategoryUtils {		public static Category CategoryFromName (string name) {			try {				if (name.StartsWith ("Is"))	// remove prefix from block range					name = name.Substring (2);				return (Category)Enum.Parse (typeof (Category), "Unicode" + name);			}			catch (ArgumentException) {				return Category.None;			}		}			public static bool IsCategory (Category cat, char c) {			switch (cat) {			case Category.None:				return false;						case Category.Any:				return c != '\n';			case Category.AnySingleline:				return true;			case Category.Word:				return					Char.IsLetterOrDigit (c) ||					IsCategory (UnicodeCategory.ConnectorPunctuation, c);			case Category.Digit:				return Char.IsDigit (c);			case Category.WhiteSpace:				return Char.IsWhiteSpace (c);			// ECMA categories			case Category.EcmaAny:				return c != '\n';							case Category.EcmaAnySingleline:				return true;			case Category.EcmaWord:				return					'a' <= c && c <= 'z' ||					'A' <= c && c <= 'Z' ||					'0' <= c && c <= '9' ||					'_' == c;			case Category.EcmaDigit:				return					'0' <= c && c <= 9;						case Category.EcmaWhiteSpace:				return					c == ' '  ||					c == '\f' ||					c == '\n' ||					c == '\r' ||					c == '\t' ||					c == '\v';			// Unicode categories...			// letter						case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);			case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);			case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);			case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);			case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);			// mark			case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);			case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);			case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);			case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);			// number			case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);			case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);			// separator			case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);			case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);			case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);			// punctuation			case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);			case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);			case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);			case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);			case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);			case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);			case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);			// symbol			case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);			case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);			case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);			case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);			// other			case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);			case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);			case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);			case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);			case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); 			case Category.UnicodeL:	// letter				return					IsCategory (UnicodeCategory.UppercaseLetter, c) ||					IsCategory (UnicodeCategory.LowercaseLetter, c) ||					IsCategory (UnicodeCategory.TitlecaseLetter, c) ||					IsCategory (UnicodeCategory.ModifierLetter, c) ||					IsCategory (UnicodeCategory.OtherLetter, c);						case Category.UnicodeM:	// mark				return					IsCategory (UnicodeCategory.NonSpacingMark, c) ||					IsCategory (UnicodeCategory.EnclosingMark, c) ||					IsCategory (UnicodeCategory.SpacingCombiningMark, c);			case Category.UnicodeN:	// number				return					IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||					IsCategory (UnicodeCategory.LetterNumber, c) ||					IsCategory (UnicodeCategory.OtherNumber, c);			case Category.UnicodeZ:	// separator				return					IsCategory (UnicodeCategory.SpaceSeparator, c) ||					IsCategory (UnicodeCategory.LineSeparator, c) ||

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?