📄 frmmain.frm
字号:
'------------------------------------------------
WordsRetrieve tmp
'------------------------------------------------
TimeUsed = CStr(Round(Timer - StartTime, 2))
ProgressBar1.Visible = Not ProgressBar1.Visible
StsBar1.Visible = Not StsBar1.Visible
frmMain.Caption = "取词完成"
DoEvents: Me.Refresh
KeyArray = dict.Keys: ItemArray = dict.Items
ListView1.ListItems.Clear
For i = 0 To dict.Count - 1
Set ItemX = ListView1.ListItems.Add(, , KeyArray(i))
ItemX.SubItems(1) = ItemArray(i)
ItemX.SubItems(2) = Format(ItemArray(i), "0000")
Next
With ListView1
.SortKey = 2
.SortOrder = lvwDescending
.Sorted = True
End With
If dict.Count < printMax Then printMax = dict.Count
For i = 1 To printMax
OutputS = OutputS & ListView1.ListItems(i).Text & vbTab & ListView1.ListItems(i).SubItems(1) & vbCrLf
Next i
rtext1.Text = OutputS
With StsBar1
.Panels(1).Text = "文长" & ProgressBar1.Max & "个字符"
.Panels(2).Text = "找到" & dict.Count & "个词汇"
.Panels(3).Text = "用时" & TimeUsed & "秒"
End With
End Sub
Private Sub picDrag_MouseDown(Button As Integer, Shift As Integer, X As Single, Y As Single)
' start dragging with left button
If Button <> 1 Then Exit Sub
picDrag.Visible = True
' use trick so vertical movement is not visible
picDrag.Height = 32000
picDrag.Top = -10000
picDrag.Drag
End Sub
Private Sub dir1_DragDrop(Source As Control, X As Single, Y As Single)
rtext1.Left = X
Layout
End Sub
Private Sub file1_DragDrop(Source As Control, X As Single, Y As Single)
rtext1.Left = X
Layout
End Sub
Private Sub rtext1_DragDrop(Source As Control, X As Single, Y As Single)
'X对应的是相对于目标控件rText中的坐标,所有应该转换到绝对坐标
rtext1.Left = X + rtext1.Left
Layout
End Sub
Private Sub Layout() '以rText1.left为标准对齐给控件
On Error Resume Next
With rtext1
.Move .Left, 0, ScaleWidth - .Left, ScaleHeight
Dir1.Move 0, 0, .Left, ScaleHeight / 2
File1.Move 0, ScaleHeight / 2, .Left, ScaleHeight / 2
picDrag.Move .Left - 25, 0, 50, ScaleHeight
ListView1.Move .Left, .Top, .Width, .Height
End With
With picDrag
.MousePointer = 9 'ccSizeEW ' to show resizing cursor
.BackColor = BackColor ' don't show this control, only the mouse cursor
End With
With StsBar1
ProgressBar1.Move .Left, .Top, .Width, .Height
End With
End Sub
Function IsChinese(CharX As String) As Boolean
Dim i&
i = Asc(CharX)
If i < 0 Then
Select Case i
Case -24251 To -23400 '中文标点的范围
IsChinese = False
Case Else
IsChinese = True
End Select
Else
Select Case i
Case 65 To 90, 97 To 122 '(A-Z,a-z) ;(0-9) 48 To 57,
IsChinese = True
Case Else
IsChinese = False
End Select
End If
End Function 'Case ",", "。", ":", ";", "!", "?", "……", "”", "“", "‘", "’", "、", "-"
Sub WordsRetrieve(ByVal fName As String)
Const conWordLenMax = 10
Dim Sall$, l As String * 2 'L 为文章的头两个字符
Dim Lfirst As String * 1, Llast As String * 1
Dim Counts&, WordLen&
Dim NewWord$, iNext&
Dim StartTime As Single
Dim TotalLen&, i& 'i 计循环次数
'开始取词
Set ts = fso.OpenTextFile(fName, ForReading)
dict.RemoveAll
Sall = ts.ReadAll
Sall = UCase(Sall)
TotalLen = Len(Sall) 'Sall 最多可容纳近20亿个字符,totallen最大值为2亿,而一般文档的字数在几万到几百万之间,所以不会超出范围
ProgressBar1.Max = TotalLen
StartTime = Timer
'UserList
'----------------------------------------------------------------
Dim A
A = UserList.Keys
For i = 0 To UBound(A)
NewWord = A(i)
iNext = InStr(Sall, NewWord)
If iNext <> 0 Then
Counts = 1
Do
iNext = InStr(iNext + Len(NewWord), Sall, NewWord)
If iNext = 0 Then
Exit Do
Else
Counts = Counts + 1
End If
Loop
'将新词加入词典
dict.Add NewWord, Counts
'删除已取出的词汇
Sall = Replace(Sall, NewWord, " ")
End If
' If i Mod 10 = 0 Then DoEvents
Next
If Chk Then Exit Sub
'常规处理
'-------------------------------------------------------------------
i = 1
Do While Len(Sall) > 3 '因为要构成2个词至少需要4字符
l = Left(Sall, 2)
Lfirst = Left(l, 1): Llast = Right(l, 1)
If IsChinese(Lfirst) = False Or StopList.Exists(Lfirst) Then '说明如果为"。计",则把。删除,使"计"为首字符
Sall = Right(Sall, Len(Sall) - 1)
ElseIf IsChinese(Llast) = False Then '如果是"计。"或" ?",则把指针移到。后面的字符
Sall = Right(Sall, Len(Sall) - 2)
Else
iNext = InStr(3, Sall, l) '起步为3,即指从紧跟其后的字符找起
'如有匹配,找出最长词
If iNext <> 0 Then
WordLen = 0
Do
WordLen = WordLen + 1
NewWord = Left(Sall, 2 + WordLen)
If NewWord <> Mid(Sall, iNext, 2 + WordLen) Or IsChinese(Right(NewWord, 1)) = False Then Exit Do
Loop
If WordLen < conWordLenMax Then
NewWord = Left(NewWord, Len(NewWord) - 1) '得到最长词
'过滤掉虚词
If StopList.Exists(NewWord) Then
Sall = Replace(Sall, NewWord, " ")
ElseIf dict.Exists(NewWord) = False Then '如果是新词
'找出词频
Counts = 2 '初值为2次
Do
iNext = InStr(iNext + Len(NewWord), Sall, NewWord)
If iNext = 0 Then
Exit Do
Else
Counts = Counts + 1
End If
Loop
'将新词加入词典
dict.Add NewWord, Counts
'删除已取出的词汇
Sall = Replace(Sall, NewWord, " ")
Else: Sall = Right(Sall, Len(Sall) - 1) '处理由于前面的删词,造成的误配
End If
Else 'WordLen >= conWordLenMax
Sall = Replace(Sall, NewWord, " ", , 1)
End If
Else: Sall = Right(Sall, Len(Sall) - 1) '处理双字在全文中没有匹配的情形
End If
End If
i = i + 1
If i Mod 20 = 0 Then
ProgressBar1.Value = TotalLen - Len(Sall)
DoEvents
End If
Loop
ProgressBar1.Value = ProgressBar1.Max
End Sub
Private Sub StatusBar_Initialize()
Dim index&
For index = 1 To 2 '第一个面板已存在。
Set pnlX = StsBar1.Panels.Add()
Next index
'改变所有面板的 AutoSize。
For index = 1 To 3
StsBar1.Panels(index).AutoSize = sbrSpring
Next index
With StsBar1
.Panels(1).Text = "总字数"
.Panels(2).Text = "词数"
.Panels(3).Text = "用时"
End With
End Sub
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -