📄 asmcoun1.pas
字号:
{ ==========================================================================
Title: AsmCount
Programmer: David Neal Dubois
Description: Unit AsmCount provides objects for word counting.
Three objects are provided: CountObj, which counts
words in an ASCII file; SprintCountObj, which can count
words in a file created by Borland's Sprint word
processor; and AmiCountObj, designed to count words
in AmiPro files.
A word is defined as a sequence of non-separators. The
non-separators are the letters, digits, and apostrophe.
The counters for Sprint and AmiPro parse for control code
sequences. A control code is treated as a separator.
The formats in which Sprint and AmiPro store control codes
were determined emperically by David Gerrold. Sprint
brackets control codes by ASCII codes 15 and 14, (Ctrl-O and
Ctrl-N). Control codes may be nested. AmiPro indicates the
beginning of a control code by one of these two character
sequences: '<@', '<+', or '<-'. The code is terminated by
a '>'.
To use the counters, first initialize a *CountObj using the
Init constructor passing the file name as a parameter.
Then call the Count method. When the count method returns
the field WordCount will hold the number of words in the
file. Example:
var
Counter : CountObj;
begin
with Counter do
begin
Init ( 'Test' );
Count;
writeln ( 'number of words: ', WordCount );
Done;
end;
end.
The Count methods are written in assembly language for
maximum speed. Counting the words in a one megabyte ASCII
text file stored on RAM disk on a 386, 25 MHz machine takes
about 1.5 seconds.
Method: Count reads from the file in blocks of 65520 characters at
a time. As each character is processed, Count looks checks
a look-up table to determine whether the character is a
separator. When a non-separator is found followed by a
separator, a word is counted.
========================================================================== }
unit AsmCoun1;
interface
{ -----------------------------------------------------------------------
BlockSize - The maximum number of characters to be read from a file
at one time. This is set to 65520, the largest block
which can be allocated from the Turbo Pascal heap.
Init/Done - The Init constructor opens the file. The Done destructor
closes it.
Count - This method performs the actual word count.
InitCount - Used internally by Count. This method initializes the
look-up table, ensures the file is at its beginning
(in case Count was called twice), and allocates a block
of memory from the heap to act as a file buffer.
FiniCount - Used internally by Count. Disposes of the file buffer.
ReadBlock - Used by Count to read a block from the file. At this time
the ShowProgress method is called.
ShowProgress - Gives the user some indication of the Count method's
progress. Here it simply prints a period as each block
is read. This could changed to produce a percentage-bar.
----------------------------------------------------------------------- }
const
BlockSize = 65520;
type
BlockType = array [ 1 .. BlockSize ] of char;
BlockPtr = ^ BlockType;
CountPtr = ^ CountObj;
CountObj = object
TheFile : file;
WordCount : longint;
constructor Init ( FileName : string );
destructor Done;
procedure Count; virtual;
procedure InitCount; virtual;
procedure FiniCount; virtual;
procedure ReadBlock;
procedure ShowProgress;
end;
SprintCountPtr = ^ SprintCountObj;
SprintCountObj = object ( CountObj )
procedure Count; virtual;
procedure InitCount; virtual;
end;
AmiCountPtr = ^ SprintCountObj;
AmiCountObj = object ( CountObj )
procedure Count; virtual;
end;
implementation
{ -----------------------------------------------------------------------
These global variables are used internally by the Count method. Storing
these in the global data segment simplifies the assembly code which
accesses them.
Block - A pointer to the file buffer allocated by InitCount.
Table - The look-up table used to determine whether a character
is a separator.
Actual - When a block is read from a file, the actual number of bytes
which were read.
SaveBP - SprintObj.Count uses the BP register internally. It stores
the original value here for safe keeping.
----------------------------------------------------------------------- }
var
Block : BlockPtr;
Table : array [ char ] of byte;
Actual : word;
SaveBP : word;
{ -----------------------------------------------------------------------
ShowProgress - Let the user know that progress is being made.
----------------------------------------------------------------------- }
procedure CountObj . ShowProgress;
begin
{ write ( '.' ); } {Can't use this within a DLL!!! }
end;
{ -----------------------------------------------------------------------
Init - Open a binary file with a record size of 1 byte.
----------------------------------------------------------------------- }
constructor CountObj . Init ( FileName : string );
begin
assign ( TheFile, FileName );
reset ( TheFile, 1 );
WordCount := 0;
end;
{ -----------------------------------------------------------------------
Done - Close the file.
----------------------------------------------------------------------- }
destructor CountObj . Done;
begin
close ( TheFile );
end;
{ -----------------------------------------------------------------------
InitCount - performs three tasks.
[1] Ensure the file pointer is set to the beginning of the
file.
[2] Allocate a file buffer from the heap.
[3] Generate the look-up table used to determine whether
a character is a separator. A separator is indicated
by a one stored in the table, while a non-separator
is indicated by a zero.
----------------------------------------------------------------------- }
procedure CountObj . InitCount;
var
C : char;
begin
seek ( TheFile, 0 );
new ( Block );
for C := #0 to #255 do
if C in [ '''', '0' .. '9', 'A' .. 'Z', 'a' .. 'z' ] then
Table [ C ] := 0
else
Table [ C ] := 1;
end;
{ -----------------------------------------------------------------------
FiniCount - Dispose of the file buffer.
----------------------------------------------------------------------- }
procedure CountObj . FiniCount;
begin
dispose ( Block );
end;
{ -----------------------------------------------------------------------
ReadBlock - Read a block from the file into the buffer. Call
ShowProgress so the user knows what's happening.
----------------------------------------------------------------------- }
procedure CountObj . ReadBlock;
begin
ShowProgress;
blockread ( TheFile, Block ^, BlockSize, Actual );
end;
{ -----------------------------------------------------------------------
Count - Count the words in an ASCII text file.
Register usage: AL - indicates whether the current character is a
separator. One for separator, zero for
non-separator.
AH - indicates whether the previous character was a
separator.
BX - points to look-up table.
CX - number of characters left to be processed in
buffer.
DX:DI - 32-bit word count
ES:SI - points to next character to be processed.
----------------------------------------------------------------------- }
procedure CountObj . Count;
var
TempCount : longint;
begin
InitCount;
asm
cld { Clear direction flag. }
xor DX, DX { Set word count to zero. }
mov DI, DX
mov AX, 0101h { Treat beginning of file as separators. }
call @CallReadBlock { Fill file buffer. }
@ProcessNormalChar:
mov AH, AL { Set last character separator flag. }
seges lodsb { Read a character from the buffer. }
xlat { Check look-up table. }
cmp AX, 0001h { AX is 1 if this character is a separator }
{ and the previous character isn't. }
jz @CountWord { If this is the case, count a word. }
loop @ProcessNormalChar { Process the next character. }
call @CallReadBlock { If we've processed the whole buffer then }
jmp @ProcessNormalChar { fill it again. }
@CountWord:
add DI, 1 { Increment the word counter. }
adc DX, 0
loop @ProcessNormalChar { Process the next character. }
call @CallReadBlock
jmp @ProcessNormalChar
@CallReadBlock:
pushf { Before calling the ReadBlock method, }
push AX { save current state of registers. }
push DX
push DI
end; { asm }
ReadBlock; { Read a block of characters into buffer. }
asm
pop DI { Restore registers. }
pop DX
pop AX
popf
mov BX, offset Table { Set BX to point to look-up table. }
les SI, [ Block ] { Load address of file buffer into ES:SI. }
mov CX, [ Actual ] { Find out number of characters in buffer. }
jcxz @EndOfFile { If there are no characters in buffer, }
{ then the end of file has been reached. }
retn { Return from CallReadBlock. }
@EndOfFile:
add SP, 2 { Pop and ignore CallReadBlock's return }
{ address. }
cmp AL, 0 { Special consideration must be given to }
jnz @Fini { the end-of-file. If the last character }
add DI, 1 { processed was not a separator, count }
adc DX, 0 { a word. }
@Fini:
mov [ word ptr TempCount ], DI { Store the result in }
mov [ word ptr TempCount + 2 ], DX { TempCount. }
end; { asm }
FiniCount; { Dispose of file buffer. }
WordCount := TempCount; { Return word count. }
end;
{ -----------------------------------------------------------------------
The Sprint counter is similar to the ASCII counter, except that it
parses for Sprint control codes. Sprint brackets control codes by
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -