⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unit1.~pas

📁 整个实验是在Windows环境下使用delphi完成的。选取了600篇文档
💻 ~PAS
📖 第 1 页 / 共 2 页
字号:
unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, ExtCtrls, ComCtrls,strutils, DB, ADODB, ToolWin;

type
    TForm1 = class(TForm)
    Panel1: TPanel;
    Splitter1: TSplitter;
    StatusBar1: TStatusBar;
    Memo1: TMemo;
    Memo2: TMemo;
    ADOConnection1: TADOConnection;
    ADOTable1: TADOTable;
    ToolBar1: TToolBar;
    btnReverse: TToolButton;
    btnNaive: TToolButton;
    procedure FormCreate(Sender: TObject);
    procedure FormDestroy(Sender: TObject);
    procedure btnReverseClick(Sender: TObject);
    procedure btnNaiveClick(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;
  procedure FindAll(Path,FileExt:String;Recursive:boolean;Const FoundResult:TStrings);
  procedure FindSubDir(Path:string;Recursive:boolean;Const FoundResult:TStrings);
  procedure RenameFileExt(FileName,FileExt:string);
  function GetWordParents(strWord,strDoc:string;ClassIndex:integer):double;
  function GetWordRelProb(strWord,strDoc:string):double;
  function GetFileNameWithExt(FileName,FileExt:string):string;
  function InText(const ASubText,AText:string):integer;//求子串位置,忽略大小写
  function getHtmlText(filename:string):string;//取网页文本
  function DelRepeatWords(words:string):string;

var
  Form1: TForm1;
  slFileNames:TStringList;
  DocsClass:array[0..600] of integer;//文档所属类别

implementation

{$R *.dfm}

function InText(const ASubText,AText:string):integer;//求子串位置,忽略大小写
begin
  Result:=AnsiPos(AnsiUppercase(ASubText), AnsiUppercase(AText));
end;

//遍历文件夹中的子文件夹名
procedure FindSubDir(Path:string;Recursive:boolean;Const FoundResult:TStrings);
var
  sr:TSearchRec;
  fr:Integer;
begin
  if copy(path,length(path),1)<>'\' then
    path:=path+'\';
  fr:=FindFirst(Path+'*.*',faDirectory,sr);
  while fr=0 do
  begin
    if (sr.Attr=faDirectory)and(sr.Name<>'.')and(sr.Name<>'..') then
    begin
      if Recursive then
        FindSubDir(path+sr.Name,Recursive,FoundResult);
      FoundResult.Add(path+sr.Name);
    end;
    fr:=FindNext(sr);
  end;
  FindClose(sr);
end;

//遍历文件夹中的文件名
procedure FindAll(Path,FileExt:String;Recursive:boolean;Const FoundResult:TStrings);
var
  sr:TSearchRec;
  fr:Integer;
begin
  if copy(path,length(path),1)<>'\' then
    path:=path+'\';
  if Recursive then
    fr:=FindFirst(Path+'*.*',faAnyFile,sr)
  else
    fr:=FindFirst(Path+FileExt,faAnyFile,sr);
  while fr=0 do
  begin
    if (sr.Attr=faDirectory)and(sr.Name<>'.')and(sr.Name<>'..') then
      if Recursive then
        FindAll(path+sr.Name,FileExt,Recursive,FoundResult);
    if FileExt='*.*' then
      FoundResult.Add(path+sr.Name)
    else
    begin
      if AnsiCompareText(ExtractFileExt(FileExt),ExtractFileExt(sr.Name))=0 then
        FoundResult.Add(path+sr.Name);
    end;
    fr:=FindNext(sr);
  end;
  FindClose(sr);
end;

//改文件扩展名(FileExt新扩展名必须包括.如".htm")
procedure RenameFileExt(FileName,FileExt:string);
var s,t:string;
begin
    if Not FileExists(FileName) then exit;
    s:=extractfileName(fileName);
    t:=extractfileExt(s);
    s:=copy(s,1,length(s)-length(t));
    s:=s+FileExt;
    RenameFile(FileName,ExtractFilepath(FileName)+s);
end;

//获取词强关联
function GetWordParents(strWord,strDoc:string;ClassIndex:integer):double;
var i,j,k:integer;
    aDoc:string;
    Count1,Count2,Count3:integer;
    WordIndex1,WordIndex2:integer;
begin
    result:=1.0;
    if form1.ADOTable1.Active then
        form1.ADOTable1.Active:=false;
    form1.ADOTable1.TableName:='WordsTable';
    form1.ADOTable1.Active:=true;
    if form1.ADOTable1.IsEmpty then
    begin
        form1.ADOTable1.Active:=false;
        result:=0.0;
        exit;
    end;
    form1.ADOTable1.First;
    Count1:=0;
    Count2:=0;
    k:=1;
    while not form1.ADOTable1.Eof do
    begin
        if AnsiCompareText(form1.ADOTable1.FieldByName('Words').AsString,strWord)<=0 then
        begin
            form1.ADOTable1.Next;
            inc(k);
            continue;
        end;
        aDoc:=form1.ADOTable1.FieldByName('Docs').AsString;
        if ansicompareText(aDoc[1],strDoc[1])=0 then
            break;
        Count3:=0;
        for i:=1 to length(strDoc) do
        begin
            if strDoc[i]=',' then continue;
            for j:=1 to length(ADoc) do
            begin
                if aDoc[j]=',' then continue;
                if strDoc[i]=aDoc[j] then
                begin
                    inc(Count3);
                    break;
                end;
            end;
        end;
        if (count3>Count1) then
        begin
            Count1:=Count3;
            WordIndex1:=K;
        end;
        if (count3>Count2) then
        begin
            Count2:=Count3;
            WordIndex2:=K;
        end;
        form1.ADOTable1.Next;
        inc(K);
    end;
    form1.ADOTable1.Active:=false;
end;

function GetWordRelProb(strWord,strDoc:string):double;
var i,j,k:integer;
    aDoc:string;
    Count1,Count2,Count3:integer;
    WordIndex1,WordIndex2:integer;
begin
    result:=0.0001;
    if form1.ADOTable1.Active then
        form1.ADOTable1.Active:=false;
    form1.ADOTable1.TableName:='WordsTable';
    form1.ADOTable1.Active:=true;
    if form1.ADOTable1.IsEmpty then
    begin
        form1.ADOTable1.Active:=false;
        result:=0.0001;
        exit;
    end;
    form1.ADOTable1.First;
    Count1:=0;
    Count2:=0;
    k:=1;
    while not form1.ADOTable1.Eof do
    begin
        if form1.ADOTable1.FieldByName('Words').AsString=strWord then
        begin
            form1.ADOTable1.Next;
            inc(k);
            continue;
        end;
        aDoc:=form1.ADOTable1.FieldByName('Docs').AsString;
        if ansicompareText(aDoc[1],strDoc[1])>=0 then
            break; 
        Count3:=0;
        for i:=1 to length(strDoc) do
        begin
            if strDoc[i]=',' then continue;
            for j:=1 to length(ADoc) do
            begin
                if aDoc[j]=',' then continue;
                if strDoc[i]=aDoc[j] then inc(Count3);
            end;
        end;
        if (count3>Count1) then
        begin
            Count1:=Count3;
            WordIndex1:=K;
        end;
        if (count3>Count2) then
        begin
            Count2:=Count3;
            WordIndex2:=K;
        end;
        form1.ADOTable1.Next;
        inc(K);
    end;
    form1.ADOTable1.Active:=false;
end;

//计算改了扩展名的文件名
function GetFileNameWithExt(FileName,FileExt:string):string;
var s,t:string;
begin
    if Not FileExists(FileName) then exit;
    s:=extractfileName(fileName);
    t:=extractfileExt(s);
    s:=copy(s,1,length(s)-length(t));
    s:=extractfilepath(FileName)+s+FileExt;
    result:=s;
end;

//取网页文本
function getHtmlText(filename:string):string;
var sl:TstringList;
    doctext,newtext:string;
    i:integer;
    enterBracket:boolean;
begin
    sl:=TStringList.Create;
    sl.LoadFromFile(filename);
    doctext:=sl.Text;
    enterBracket:=false;
    for i:=1 to length(doctext) do
    begin
        if doctext[i]='<' then enterBracket:=true;
        if not enterBracket then newtext:=newtext+doctext[i];
        if doctext[i]='>' then
        begin
            newtext:=newtext+' ';
            enterBracket:=false;
        end;
    end;
    Result:=newtext;
    sl.Free;
end;

procedure TForm1.FormCreate(Sender: TObject);
var s:string;
begin
//    if not directoryexists(extractfilepath(application.ExeName)+'Data') then
//    begin
//        showmessage('没有训练集目录Data');
//        application.Terminate;
//    end;
    Randomize();
    self.WindowState:=wsMaximized;
    slFileNames:=TStringList.Create;
    s:='Provider=Microsoft.Jet.OLEDB.4.0;Data Source='+
        extractfilepath(application.ExeName)+'DataBase.mdb;Persist Security Info=False';
    self.ADOConnection1.ConnectionString:=s;
    self.ADOConnection1.Connected:=true;
    self.ADOTable1.TableName:='DocsClass';
    self.ADOTable1.Active:=true;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -