⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 identifyencoding.cs

📁 一)需求 很多情况下我们需要知道字节流的编码
💻 CS
📖 第 1 页 / 共 5 页
字号:
		#region ENCTWProbability.....
		/// <summary>
		/// 判断是CNS11643(台湾)编码的可能性
		/// </summary>
		/// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
		/// <returns>返回 0 至 100 之间的可能性</returns>
		internal virtual int ENCTWProbability(sbyte[] rawtext)
		{
			int i, rawtextlen = 0;
			int dbchars = 1, cnschars = 1;
			long cnsfreq = 0, totalfreq = 1;
			float rangeval = 0, freqval = 0;
			int row, column;

			// Check to see if characters fit into acceptable ranges
			// and have expected frequency of use

			rawtextlen = rawtext.Length;
			for (i = 0; i < rawtextlen - 1; i++)
			{
				if (rawtext[i] >= 0)
				{
					// in ASCII range
					//asciichars++;
				}
				else
				{
					// high bit set
					dbchars++;
					if (i + 3 < rawtextlen && (sbyte) Identity(0x8E) == rawtext[i] && (sbyte) Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte) Identity(0xB0) && (sbyte) Identity(0xA1) <= rawtext[i + 2] && rawtext[i + 2] <= (sbyte) Identity(0xFE) && (sbyte) Identity(0xA1) <= rawtext[i + 3] && rawtext[i + 3] <= (sbyte) Identity(0xFE))
					{
						// Planes 1 - 16

						cnschars++;
						// These are all less frequent chars so just ignore freq
						i += 3;
					}
					else if ((sbyte) Identity(0xA1) <= rawtext[i] && rawtext[i] <= (sbyte) Identity(0xFE) && (sbyte) Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte) Identity(0xFE))
					{
						cnschars++;
						totalfreq += 500;
						row = rawtext[i] + 256 - 0xA1;
						column = rawtext[i + 1] + 256 - 0xA1;
						if (EUC_TWFreq[row][column] != 0)
						{
							cnsfreq += EUC_TWFreq[row][column];
						}
						else if (35 <= row && row <= 92)
						{
							cnsfreq += 150;
						}
						i++;
					}
				}
			}

			
			rangeval = 50*((float) cnschars/(float) dbchars);
			freqval = 50*((float) cnsfreq/(float) totalfreq);

			
			return (int) (rangeval + freqval);
		}

		#endregion

		#region ISO2022CNProbability.....
		/// <summary>
		/// 判断是ISO2022CN编码的可能性
		/// </summary>
		/// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
		/// <returns>返回 0 至 100 之间的可能性</returns>
		internal virtual int ISO2022CNProbability(sbyte[] rawtext)
		{
			int i, rawtextlen = 0;
			int dbchars = 1, isochars = 1;
			long isofreq = 0, totalfreq = 1;
			float rangeval = 0, freqval = 0;
			int row, column;

			// Check to see if characters fit into acceptable ranges
			// and have expected frequency of use

			rawtextlen = rawtext.Length;
			for (i = 0; i < rawtextlen - 1; i++)
			{
				if (rawtext[i] == (sbyte) 0x1B && i + 3 < rawtextlen)
				{
					// Escape char ESC
					if (rawtext[i + 1] == (sbyte) 0x24 && rawtext[i + 2] == 0x29 && rawtext[i + 3] == (sbyte) 0x41)
					{
						// GB Escape  $ ) A
						i += 4;
						while (rawtext[i] != (sbyte) 0x1B)
						{
							dbchars++;
							if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77) && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77))
							{
								isochars++;
								row = rawtext[i] - 0x21;
								column = rawtext[i + 1] - 0x21;
								totalfreq += 500;
								if (GBFreq[row][column] != 0)
								{
									isofreq += GBFreq[row][column];
								}
								else if (15 <= row && row < 55)
								{
									isofreq += 200;
								}
								i++;
							}
							i++;
						}
					}
					else if (i + 3 < rawtextlen && rawtext[i + 1] == (sbyte) 0x24 && rawtext[i + 2] == (sbyte) 0x29 && rawtext[i + 3] == (sbyte) 0x47)
					{
						// CNS Escape $ ) G
						i += 4;
						while (rawtext[i] != (sbyte) 0x1B)
						{
							dbchars++;
							if ((sbyte) 0x21 <= rawtext[i] && rawtext[i] <= (sbyte) 0x7E && (sbyte) 0x21 <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte) 0x7E)
							{
								isochars++;
								totalfreq += 500;
								row = rawtext[i] - 0x21;
								column = rawtext[i + 1] - 0x21;
								if (EUC_TWFreq[row][column] != 0)
								{
									isofreq += EUC_TWFreq[row][column];
								}
								else if (35 <= row && row <= 92)
								{
									isofreq += 150;
								}
								i++;
							}
							i++;
						}
					}
					if (rawtext[i] == (sbyte) 0x1B && i + 2 < rawtextlen && rawtext[i + 1] == (sbyte) 0x28 && rawtext[i + 2] == (sbyte) 0x42)
					{
						// ASCII:  ESC ( B
						i += 2;
					}
				}
			}
			
			rangeval = 50*((float) isochars/(float) dbchars);
			freqval = 50*((float) isofreq/(float) totalfreq);		

			return (int) (rangeval + freqval);
		}

		#endregion

		#region UTF8Probability.....
		/// <summary>
		/// 判断是UTF8编码的可能性
		/// </summary>
		/// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
		/// <returns>返回 0 至 100 之间的可能性</returns>
		internal virtual int UTF8Probability(sbyte[] rawtext)
		{
			int score = 0;
			int i, rawtextlen = 0;
			int goodbytes = 0, asciibytes = 0;

			// Maybe also use UTF8 Byte Order Mark:  EF BB BF

			// Check to see if characters fit into acceptable ranges
			rawtextlen = rawtext.Length;
			for (i = 0; i < rawtextlen; i++)
			{
				if ((rawtext[i] & (sbyte) 0x7F) == rawtext[i])
				{
					// One byte
					asciibytes++;
					// Ignore ASCII, can throw off count
				}
				else if (- 64 <= rawtext[i] && rawtext[i] <= - 33 && i + 1 < rawtextlen && - 128 <= rawtext[i + 1] && rawtext[i + 1] <= - 65)
				{
					goodbytes += 2;
					i++;
				}
				else if (- 32 <= rawtext[i] && rawtext[i] <= - 17 && i + 2 < rawtextlen && - 128 <= rawtext[i + 1] && rawtext[i + 1] <= - 65 && - 128 <= rawtext[i + 2] && rawtext[i + 2] <= - 65)
				{
					goodbytes += 3;
					i += 2;
				}
			}

			if (asciibytes == rawtextlen)
			{
				return 0;
			}
			
			score = (int) (100*((float) goodbytes/(float) (rawtextlen - asciibytes)));

			// If not above 98, reduce to zero to prevent coincidental matches
			// Allows for some (few) bad formed sequences
			if (score > 98)
			{
				return score;
			}
			else if (score > 95 && goodbytes > 30)
			{
				return score;
			}
			else
			{
				return 0;
			}
		}

		#endregion

		#region UnicodeProbability.....
		/// <summary>
		/// 判断是Unicode编码的可能性
		/// </summary>
		/// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
		/// <returns>返回 0 至 100 之间的可能性</returns>
		internal virtual int UnicodeProbability(sbyte[] rawtext)
		{
			//int score = 0;
			//int i, rawtextlen = 0;
			//int goodbytes = 0, asciibytes = 0;

			if (((sbyte) Identity(0xFE) == rawtext[0] && (sbyte) Identity(0xFF) == rawtext[1]) || ((sbyte) Identity(0xFF) == rawtext[0] && (sbyte) Identity(0xFE) == rawtext[1]))
			{
				return 100;
			}

			return 0;			
		}

		#endregion

		#region ASCIIProbability.....
		/// <summary>
		/// 判断是ASCII编码的可能性
		/// </summary>
		/// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
		/// <returns>返回 0 至 100 之间的可能性</returns>
		internal virtual int ASCIIProbability(sbyte[] rawtext)
		{
			int score = 70;
			int i, rawtextlen;

			rawtextlen = rawtext.Length;

			for (i = 0; i < rawtextlen; i++)
			{
				if (rawtext[i] < 0)
				{
					score = score - 5;
				}
				else if (rawtext[i] == (sbyte) 0x1B)
				{
					// ESC (used by ISO 2022)
					score = score - 5;
				}
			}

			return score;
		}

		#endregion
		#endregion

		#region Initialize_Frequencies.....
		/// <summary>
		/// 初始化必要的条件
		/// </summary>
		internal virtual void Initialize_Frequencies() 
		{
			int i;
			if(GBFreq[0] == null)
			{
				for (i = 0; i < 94; i++) 
				{
					GBFreq[i] = new int[94];	
				}

				#region GBFreq[20][35] = 599;
				GBFreq[49][26] = 598;
				GBFreq[41][38] = 597;
				GBFreq[17][26] = 596;
				GBFreq[32][42] = 595;
				GBFreq[39][42] = 594;
				GBFreq[45][49] = 593;
				GBFreq[51][57] = 592;
				GBFreq[50][47] = 591;
				GBFreq[42][90] = 590;
				GBFreq[52][65] = 589;
				GBFreq[53][47] = 588;
				GBFreq[19][82] = 587;
				GBFreq[31][19] = 586;
				GBFreq[40][46] = 585;
				GBFreq[24][89] = 584;
				GBFreq[23][85] = 583;
				GBFreq[20][28] = 582;
				GBFreq[42][20] = 581;
				GBFreq[34][38] = 580;
				GBFreq[45][9] = 579;
				GBFreq[54][50] = 578;
				GBFreq[25][44] = 577;
				GBFreq[35][66] = 576;
				GBFreq[20][55] = 575;
				GBFreq[18][85] = 574;
				GBFreq[20][31] = 573;
				GBFreq[49][17] = 572;
				GBFreq[41][16] = 571;
				GBFreq[35][73] = 570;
				GBFreq[20][34] = 569;
				GBFreq[29][44] = 568;
				GBFreq[35][38] = 567;
				GBFreq[49][9] = 566;
				GBFreq[46][33] = 565;
				GBFreq[49][51] = 564;
				GBFreq[40][89] = 563;
				GBFreq[26][64] = 562;
				GBFreq[54][51] = 561;
				GBFreq[54][36] = 560;
				GBFreq[39][4] = 559;
				GBFreq[53][13] = 558;
				GBFreq[24][92] = 557;
				GBFreq[27][49] = 556;
				GBFreq[48][6] = 555;
				GBFreq[21][51] = 554;
				GBFreq[30][40] = 553;
				GBFreq[42][92] = 552;
				GBFreq[31][78] = 551;
				GBFreq[25][82] = 550;
				GBFreq[47][0] = 549;
				GBFreq[34][19] = 548;
				GBFreq[47][35] = 547;
				GBFreq[21][63] = 546;
				GBFreq[43][75] = 545;
				GBFreq[21][87] = 544;
				GBFreq[35][59] = 543;
				GBFreq[25][34] = 542;
				GBFreq[21][27] = 541;
				GBFreq[39][26] = 540;
				GBFreq[34][26] = 539;
				GBFreq[39][52] = 538;
				GBFreq[50][57] = 537;
				GBFreq[37][79] = 536;
				GBFreq[26][24] = 535;
				GBFreq[22][1] = 534;
				GBFreq[18][40] = 533;
				GBFreq[41][33] = 532;
				GBFreq[53][26] = 531;
				GBFreq[54][86] = 530;
				GBFreq[20][16] = 529;
				GBFreq[46][74] = 528;
				GBFreq[30][19] = 527;
				GBFreq[45][35] = 526;
				GBFreq[45][61] = 525;
				GBFreq[30][9] = 524;
				GBFreq[41][53] = 523;
				GBFreq[41][13] = 522;
				GBFreq[50][34] = 521;
				GBFreq[53][86] = 520;
				GBFreq[47][47] = 519;
				GBFreq[22][28] = 518;
				GBFreq[50][53] = 517;
				GBFreq[39][70] = 516;
				GBFreq[38][15] = 515;
				GBFreq[42][88] = 514;
				GBFreq[16][29] = 513;
				GBFreq[27][90] = 512;
				GBFreq[29][12] = 511;
				GBFreq[44][22] = 510;
				GBFreq[34][69] = 509;
				GBFreq[24][10] = 508;
				GBFreq[44][11] = 507;
				GBFreq[39][92] = 506;
				GBFreq[49][48] = 505;
				GBFreq[31][46] = 504;
				GBFreq[19][50] = 503;
				GBFreq[21][14] = 502;
				GBFreq[32][28] = 501;
				GBFreq[18][3] = 500;
				GBFreq[53][9] = 499;
				GBFreq[34][80] = 498;
				GBFreq[48][88] = 497;
				GBFreq[46][53] = 496;
				GBFreq[22][53] = 495;
				GBFreq[28][10] = 494;
				GBFreq[44][65] = 493;
				GBFreq[20][10] = 492;
				GBFreq[40][76] = 491;
				GBFreq[47][8] = 490;
				GBFreq[50][74] = 489;
				GBFreq[23][62] = 488;
				GBFreq[49][65] = 487;
				GBFreq[28][87] = 486;
				GBFreq[15][48] = 485;
				GBFreq[22][7] = 484;
				GBFreq[19][42] = 483;
				GBFreq[41][20] = 482;
				GBFreq[26][55] = 481;
				GBFreq[21][93] = 480;
				GBFreq[31][76] = 479;
				GBFreq[34][31] = 478;
				GBFreq[20][66] = 477;
				GBFreq[51][33] = 476;
				GBFreq[34][86] = 475;
				GBFreq[37][67] = 474;
				GBFreq[53][53] = 473;
				GBFreq[40][88] = 472;
				GBFreq[39][10] = 471;
				GBFreq[24][3] = 470;
				GBFreq[27][25] = 469;
				GBFreq[26][15] = 468;
				GBFreq[21][88] = 467;
				GBFreq[52][62] = 466;
				GBFreq[46][81] = 465;
				GBFreq[38][72] = 464;
				GBFreq[17][30] = 463;
				GBFreq[52][92] = 462;
				GBFreq[34][90] = 461;
				GBFreq[21][7] = 460;
				GBFreq[36][13] = 459;
				GBFreq[45][41] = 458;
				GBFreq[32][5] = 457;
				GBFreq[26][89] = 456;
				GBFreq[23][87] = 455;
				GBFreq[20][39] = 454;
				GBFreq[27][23] = 453;
				GBFreq[25][59] = 452;
				GBFreq[49][20] = 451;
				GBFreq[54][77] = 450;
				GBFreq[27][67] = 449;
				GBFreq[47][33] = 448;
				GBFreq[41][17] = 447;
				GBFreq[19][81] = 446;
				GBFreq[16][66] = 445;
				GBFreq[45][26] = 444;
				GBFreq[49][81] = 443;
				GBFreq[53][55] = 442;
				GBFreq[16][26] = 441;
				GBFreq[54][62] = 440;
				GBFreq[20][70] = 439;
				GBFreq[42][35] = 438;
				GBFreq[20][57] = 437;
				GBFreq[34][36] = 436;
				GBFreq[46][63] = 435;
				GBFreq[19][45] = 434;
				GBFreq[21][10] = 433;
				GBFreq[52][93] = 432;
				GBFreq[25][2] = 431;
				GBFreq[30][57] = 430;
				GBFreq[41][24] = 429;
				GBFreq[28][43] = 428;
				GBFreq[45][86] = 427;
				GBFreq[51][56] = 426;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -