📄 form1.cs
字号:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using Office;
using Word;
using System.Text.RegularExpressions;
namespace ParseWord
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void buttonBrowse_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = "Word Documents(*.doc)|*.doc";
ofd.ShowDialog();
textBoxWordPathName.Text = ofd.FileName;
}
private void buttonParse_Click(object sender, EventArgs e)
{
Word.ApplicationClass wordApp = new ApplicationClass();
object file = textBoxWordPathName.Text;
object nullobj = System.Reflection.Missing.Value;
Word.Document doc = wordApp.Documents.Open(
ref file, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj);
doc.ActiveWindow.Selection.WholeStory();
doc.ActiveWindow.Selection.Copy();
IDataObject data = Clipboard.GetDataObject();
string filecontent = data.GetData(DataFormats.Text).ToString();
doc.Close(ref nullobj, ref nullobj, ref nullobj);
wordApp.Quit(ref nullobj, ref nullobj, ref nullobj);
textBoxFile.Text = FileParse(filecontent);
// textBoxFile.Text = filecontent;
}
private string FileParse(string fc)
{
WordDeal worddeal = new WordDeal(fc);
worddeal.ReadWordList(@"word_freq_list.txt");
worddeal.WordsParse(4);
return worddeal.FILECONTENT;
}
}
public class WordDeal
{
private string FileContent;
private List<string> WordList;
public string FILECONTENT
{
get
{
return FileContent;
}
}
public WordDeal(string filecontent)
{
FileContent = filecontent;
WordList = new List<string>();
}
public void ReadWordList(string filename)//读取字库
{
FileStream afile = new FileStream(filename, FileMode.Open);
Encoding ecode = Encoding.GetEncoding("GB18030");
StreamReader sr = new StreamReader(afile, ecode);
string strline = sr.ReadLine();
char[] seperator = new char[] { ' ' };
string[] splitline;
while (strline != null)
{
splitline = strline.Split(seperator, StringSplitOptions.RemoveEmptyEntries);
WordList.Add(splitline[1]);
strline = sr.ReadLine();
}
sr.Close();
}
public string ExtractWords(int index, int length)//index从0开始
{
return FileContent.Substring(index, length);
}
public void InsertBackSpace(int pos)//在第pos后插入空格,从1开始
{
//string bs=" ";
FileContent = FileContent.Insert(pos, " ");
}
public bool isExist(string WordSegment)//判断是字符串否在字库中
{
foreach (string str in WordList)
{
if (WordSegment == str)
return true;
}
return false;
}
public bool isWord(string WordSegment)//判断是否为字母或数字
{
if ((new Regex(@"^[A-Za-z0-9\s]+$")).IsMatch(WordSegment))
return true;
else
return false;
}
public void WordsParse(int ParseLen)//数据解析,ParseLe为最大解析字符串长度,4
{
int i = 0;
int j = 0;
string substring;
int len;
while (i < FileContent.Length)
{
if ((FileContent.Length - i) >= ParseLen)
{
len = ParseLen;
}
else
{
len = FileContent.Length-i;
}
for (j = len; j > 0; j--)
{
substring = ExtractWords(i, j);
if (isExist(substring))
{
InsertBackSpace(i + j);
break;
}
else if(isWord(substring))
{
break;
}
else if (j == 1)
{
InsertBackSpace(i + j);
break;
}
}
i = i + j + 1;
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -