📄 unit1.~pas
字号:
unit Unit1;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, ExtCtrls, ComCtrls,strutils, DB, ADODB, ToolWin;
type
TForm1 = class(TForm)
Panel1: TPanel;
Splitter1: TSplitter;
StatusBar1: TStatusBar;
Memo1: TMemo;
Memo2: TMemo;
ADOConnection1: TADOConnection;
ADOTable1: TADOTable;
ToolBar1: TToolBar;
btnReverse: TToolButton;
btnNaive: TToolButton;
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure btnReverseClick(Sender: TObject);
procedure btnNaiveClick(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
procedure FindAll(Path,FileExt:String;Recursive:boolean;Const FoundResult:TStrings);
procedure FindSubDir(Path:string;Recursive:boolean;Const FoundResult:TStrings);
procedure RenameFileExt(FileName,FileExt:string);
function GetWordParents(strWord,strDoc:string;ClassIndex:integer):double;
function GetWordRelProb(strWord,strDoc:string):double;
function GetFileNameWithExt(FileName,FileExt:string):string;
function InText(const ASubText,AText:string):integer;//求子串位置,忽略大小写
function getHtmlText(filename:string):string;//取网页文本
function DelRepeatWords(words:string):string;
var
Form1: TForm1;
slFileNames:TStringList;
DocsClass:array[0..600] of integer;//文档所属类别
implementation
{$R *.dfm}
function InText(const ASubText,AText:string):integer;//求子串位置,忽略大小写
begin
Result:=AnsiPos(AnsiUppercase(ASubText), AnsiUppercase(AText));
end;
//遍历文件夹中的子文件夹名
procedure FindSubDir(Path:string;Recursive:boolean;Const FoundResult:TStrings);
var
sr:TSearchRec;
fr:Integer;
begin
if copy(path,length(path),1)<>'\' then
path:=path+'\';
fr:=FindFirst(Path+'*.*',faDirectory,sr);
while fr=0 do
begin
if (sr.Attr=faDirectory)and(sr.Name<>'.')and(sr.Name<>'..') then
begin
if Recursive then
FindSubDir(path+sr.Name,Recursive,FoundResult);
FoundResult.Add(path+sr.Name);
end;
fr:=FindNext(sr);
end;
FindClose(sr);
end;
//遍历文件夹中的文件名
procedure FindAll(Path,FileExt:String;Recursive:boolean;Const FoundResult:TStrings);
var
sr:TSearchRec;
fr:Integer;
begin
if copy(path,length(path),1)<>'\' then
path:=path+'\';
if Recursive then
fr:=FindFirst(Path+'*.*',faAnyFile,sr)
else
fr:=FindFirst(Path+FileExt,faAnyFile,sr);
while fr=0 do
begin
if (sr.Attr=faDirectory)and(sr.Name<>'.')and(sr.Name<>'..') then
if Recursive then
FindAll(path+sr.Name,FileExt,Recursive,FoundResult);
if FileExt='*.*' then
FoundResult.Add(path+sr.Name)
else
begin
if AnsiCompareText(ExtractFileExt(FileExt),ExtractFileExt(sr.Name))=0 then
FoundResult.Add(path+sr.Name);
end;
fr:=FindNext(sr);
end;
FindClose(sr);
end;
//改文件扩展名(FileExt新扩展名必须包括.如".htm")
procedure RenameFileExt(FileName,FileExt:string);
var s,t:string;
begin
if Not FileExists(FileName) then exit;
s:=extractfileName(fileName);
t:=extractfileExt(s);
s:=copy(s,1,length(s)-length(t));
s:=s+FileExt;
RenameFile(FileName,ExtractFilepath(FileName)+s);
end;
//获取词强关联
function GetWordParents(strWord,strDoc:string;ClassIndex:integer):double;
var i,j,k:integer;
aDoc:string;
Count1,Count2,Count3:integer;
WordIndex1,WordIndex2:integer;
begin
result:=1.0;
if form1.ADOTable1.Active then
form1.ADOTable1.Active:=false;
form1.ADOTable1.TableName:='WordsTable';
form1.ADOTable1.Active:=true;
if form1.ADOTable1.IsEmpty then
begin
form1.ADOTable1.Active:=false;
result:=0.0;
exit;
end;
form1.ADOTable1.First;
Count1:=0;
Count2:=0;
k:=1;
while not form1.ADOTable1.Eof do
begin
if AnsiCompareText(form1.ADOTable1.FieldByName('Words').AsString,strWord)<=0 then
begin
form1.ADOTable1.Next;
inc(k);
continue;
end;
aDoc:=form1.ADOTable1.FieldByName('Docs').AsString;
if ansicompareText(aDoc[1],strDoc[1])=0 then
break;
Count3:=0;
for i:=1 to length(strDoc) do
begin
if strDoc[i]=',' then continue;
for j:=1 to length(ADoc) do
begin
if aDoc[j]=',' then continue;
if strDoc[i]=aDoc[j] then
begin
inc(Count3);
break;
end;
end;
end;
if (count3>Count1) then
begin
Count1:=Count3;
WordIndex1:=K;
end;
if (count3>Count2) then
begin
Count2:=Count3;
WordIndex2:=K;
end;
form1.ADOTable1.Next;
inc(K);
end;
form1.ADOTable1.Active:=false;
end;
function GetWordRelProb(strWord,strDoc:string):double;
var i,j,k:integer;
aDoc:string;
Count1,Count2,Count3:integer;
WordIndex1,WordIndex2:integer;
begin
result:=0.0001;
if form1.ADOTable1.Active then
form1.ADOTable1.Active:=false;
form1.ADOTable1.TableName:='WordsTable';
form1.ADOTable1.Active:=true;
if form1.ADOTable1.IsEmpty then
begin
form1.ADOTable1.Active:=false;
result:=0.0001;
exit;
end;
form1.ADOTable1.First;
Count1:=0;
Count2:=0;
k:=1;
while not form1.ADOTable1.Eof do
begin
if form1.ADOTable1.FieldByName('Words').AsString=strWord then
begin
form1.ADOTable1.Next;
inc(k);
continue;
end;
aDoc:=form1.ADOTable1.FieldByName('Docs').AsString;
if ansicompareText(aDoc[1],strDoc[1])>=0 then
break;
Count3:=0;
for i:=1 to length(strDoc) do
begin
if strDoc[i]=',' then continue;
for j:=1 to length(ADoc) do
begin
if aDoc[j]=',' then continue;
if strDoc[i]=aDoc[j] then inc(Count3);
end;
end;
if (count3>Count1) then
begin
Count1:=Count3;
WordIndex1:=K;
end;
if (count3>Count2) then
begin
Count2:=Count3;
WordIndex2:=K;
end;
form1.ADOTable1.Next;
inc(K);
end;
form1.ADOTable1.Active:=false;
end;
//计算改了扩展名的文件名
function GetFileNameWithExt(FileName,FileExt:string):string;
var s,t:string;
begin
if Not FileExists(FileName) then exit;
s:=extractfileName(fileName);
t:=extractfileExt(s);
s:=copy(s,1,length(s)-length(t));
s:=extractfilepath(FileName)+s+FileExt;
result:=s;
end;
//取网页文本
function getHtmlText(filename:string):string;
var sl:TstringList;
doctext,newtext:string;
i:integer;
enterBracket:boolean;
begin
sl:=TStringList.Create;
sl.LoadFromFile(filename);
doctext:=sl.Text;
enterBracket:=false;
for i:=1 to length(doctext) do
begin
if doctext[i]='<' then enterBracket:=true;
if not enterBracket then newtext:=newtext+doctext[i];
if doctext[i]='>' then
begin
newtext:=newtext+' ';
enterBracket:=false;
end;
end;
Result:=newtext;
sl.Free;
end;
procedure TForm1.FormCreate(Sender: TObject);
var s:string;
begin
// if not directoryexists(extractfilepath(application.ExeName)+'Data') then
// begin
// showmessage('没有训练集目录Data');
// application.Terminate;
// end;
Randomize();
self.WindowState:=wsMaximized;
slFileNames:=TStringList.Create;
s:='Provider=Microsoft.Jet.OLEDB.4.0;Data Source='+
extractfilepath(application.ExeName)+'DataBase.mdb;Persist Security Info=False';
self.ADOConnection1.ConnectionString:=s;
self.ADOConnection1.Connected:=true;
self.ADOTable1.TableName:='DocsClass';
self.ADOTable1.Active:=true;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -