📄 imgprcs5.asm

📁 汇编编程艺术
💻 ASM
字号:
; IMGPRCS.ASM
;
; An image processing program (Second optimization pass).
;
; This program blurs an eight-bit grayscale image by averaging a pixel
; in the image with the eight pixels around it.  The average is computed
; by (CurCell*8 + other 8 cells)/16, weighting the current cell by 50%.
;
; Because of the size of the image (almost 64K), the input and output
; matrices are in different segments.
;
; Version #1: Straight-forward translation from Pascal to Assembly.
;
; Version #2: Three major optimizations. (1) used movsd instruction rather
;	      than a loop to copy data from DataOut back to DataIn.
;	      (2) Used repeat..until forms for all loops.  (3) unrolled
;	      the innermost two loops (which is responsible for most of
;	      the performance improvement).
;
; Version #3: Used registers for all variables. Set up segment registers
;	      once and for all through the execution of the main loop so
;	      the code didn't have to reload ds each time through.  Computed
;	      index into each row only once (outside the j loop).
;
; Version #4: Eliminated copying data from DataOut to DataIn on each pass.
;	      Removed hazards.  Maintained common subexpressions. Did some
;	      more loop unrolling.
;
; Version #5: Converted data arrays to words rather than bytes and operated
;	      on 16-bit values.  Yielded minimal speedup.
;
;	Performance comparisons (66 MHz 80486 DX/2 system).
;
;	This code-	       2.4 seconds.
;	3rd optimization pass- 2.5 seconds.
;	2nd optimization pass-	 4 seconds.
;	1st optimization pass-	 6 seconds.
;	Original ASM code-	36 seconds.
;	Borland Pascal v7.0-	45 seconds.
;	Borland C++ v4.02-	29 seconds.
;	Microsoft C++ v8.00-	21 seconds.

		.xlist
		include 	stdlib.a
		includelib	stdlib.lib
		.list
		.386
		option		segment:use16



dseg		segment	para public 'data'

ImgData		byte	251 dup (256 dup (?))

InName		byte	"roller1.raw",0
OutName		byte	"roller2.raw",0
Iterations	word	0

dseg		ends


; This code makes the naughty assumption that the following
; segments are loaded contiguously in memory!  Also, because these
; segments are paragraph aligned, this code assumes that these segments
; will contain a full 65,536 bytes.  You cannot declare a segment with
; exactly 65,536 bytes in MASM.  However, the paragraph alignment option
; ensures that the extra byte of padding is added to the end of each
; segment.

DataSeg1	segment	para public 'ds1'
Data1a		byte	65535 dup (?)
DataSeg1	ends

DataSeg2	segment	para public 'ds2'
Data1b		byte	65535 dup (?)
DataSeg2	ends

DataSeg3	segment	para public 'ds3'
Data2a		byte	65535 dup (?)
DataSeg3	ends

DataSeg4	segment	para public 'ds4'
Data2b		byte	65535 dup (?)
DataSeg4	ends




cseg		segment	para public 'code'
		assume	cs:cseg, ds:dseg

Main		proc
		mov	ax, dseg
		mov	ds, ax
		meminit

		mov	ax, 3d00h	;Open input file for reading.
		lea	dx, InName
		int	21h
		jnc	GoodOpen
		print
		byte	"Could not open input file.",cr,lf,0
		jmp	Quit

; Optimization modification- read the data into DataOut rather than
; DataIn because we'll move it into DataIn at the beginning of the
; hloop.

GoodOpen:	mov	bx, ax		;File handle.
		lea	dx, ImgData
		mov	cx, 256*251	;Size of data file to read.
		mov	ah, 3Fh
		int	21h
		cmp	ax, 256*251	;See if we read the data.
		je	GoodRead
		print
		byte	"Did not read the file properly",cr,lf,0
		jmp	Quit

GoodRead:       print
		byte	"Enter number of iterations: ",0
		getsm
		atoi
		free
		mov	Iterations, ax
		cmp	ax, 0
		jle	Quit

		printf
		byte	"Computing Result for %d iterations",cr,lf,0
		dword	Iterations



; Copy the data and expand it from eight bits to sixteen bits.
; The first loop handles the first 32,768 bytes, the second loop
; handles the remaining bytes.

		mov	ax, DataSeg1
		mov	es, ax
		mov	ax, DataSeg3
		mov	fs, ax

		mov	ah, 0
		mov	cx, 32768
		lea	si, ImgData
		xor	di, di			;Output data is at ofs zero.
CopyLoop:	lodsb
		mov	fs:[di], ax
		stosw
		dec	cx
		jne	CopyLoop

		mov	di, DataSeg2
		mov	es, di
		mov	di, DataSeg4
		mov	fs, di
		mov	cx, (251*256) - 32768
		xor	di, di
CopyLoop1:	lodsb
		mov	fs:[di], ax
		stosw
		dec	cx
		jne	CopyLoop1

; hloop completes one iteration on the data moving it from Data1a/Data1b
; to Data2a/Data2b

hloop:		mov	ax, DataSeg1
		mov	ds, ax
		mov	ax, DataSeg3
		mov	es, ax

; Process the first 127 rows (65,024 bytes) of the array):

		mov	cl, 127
		lea	si, Data1a+202h		;Start at [1,1]
iloop0:		mov	ch, 254/2		;# of times through loop.
jloop0:		mov	dx, [si]		;[i,j]
		mov	bx, [si-200h]		;[i-1,j]
		mov	ax, dx
		shl	dx, 3			;[i,j] * 8
		add	bx, [si-1feh]		;[i-1,j+1]
		mov	bp, [si+2]		;[i,j+1]
		add	bx, [si+200h]		;[i+1,j]
		add	dx, bp
		add	bx, [si+202h]		;[i+1,j+1]
		add	dx, [si-202h]		;[i-1,j-1]
		mov	di, [si-1fch]		;[i-1,j+2]
		add	dx, [si-2]		;[i,j-1]
		add	di, [si+4]		;[i,j+2]
		add	dx, [si+1feh]		;[i+1,j-1]
		add	di, [si+204h]		;[i+1,j+2]
		shl	bp, 3			;[i,j+1] * 8
		add	dx, bx
		add	bp, ax
		shr	dx, 4			;Divide by 16.
		add	bp, bx
		mov	es:[si], dx		;Store [i,j] entry.
		add	bp, di
		add	si, 4			;Affects next store operation!
		shr	bp, 4			;Divide by 16.
		dec	ch
		mov	es:[si-2], bp		;Store [i,j+1] entry.
		jne	jloop0

		add	si, 4			;Skip to start of next row.

		dec	cl
		jne	iloop0

; Process the last 124 rows of the array).  This requires that we switch from
; one segment to the next.  Note that the segments overlap.

		mov	ax, DataSeg2
		sub	ax, 40h			;Back up to last 2 rows in DS2
		mov	ds, ax
		mov	ax, DataSeg4
		sub	ax, 40h			;Back up to last 2 rows in DS4
		mov	es, ax

		mov	cl, 251-127-1		;Remaining rows to process.
		mov	si, 202h		;Continue with next row.
iloop1:		mov	ch, 254/2		;# of times through loop.
jloop1:		mov	dx, [si]		;[i,j]
		mov	bx, [si-200h]		;[i-1,j]
		mov	ax, dx
		shl	dx, 3			;[i,j] * 8
		add	bx, [si-1feh]		;[i-1,j+1]
		mov	bp, [si+2]		;[i,j+1]
		add	bx, [si+200h]		;[i+1,j]
		add	dx, bp
		add	bx, [si+202h]		;[i+1,j+1]
		add	dx, [si-202h]		;[i-1,j-1]
		mov	di, [si-1fch]		;[i-1,j+2]
		add	dx, [si-2]		;[i,j-1]
		add	di, [si+4]		;[i,j+2]
		add	dx, [si+1feh]		;[i+1,j-1]
		add	di, [si+204h]		;[i+1,j+2]
		shl	bp, 3			;[i,j+1] * 8
		add	dx, bx
		add	bp, ax
		shr	dx, 4			;Divide by 16
		add	bp, bx
		mov	es:[si], dx		;Store [i,j] entry.
		add	bp, di
		add	si, 4			;Affects next store operation!
		shr	bp, 4
		dec	ch
		mov	es:[si-2], bp		;Store [i,j+1] entry.
		jne	jloop1

		add	si, 4			;Skip to start of next row.

		dec	cl
		jne	iloop1

		mov	ax, dseg
		mov	ds, ax
		assume	ds:dseg

		dec	Iterations
		je	Done0

; Unroll the iterations loop so we can move the data from DataSeg2/4 back
; to DataSeg1/3 without wasting extra time.  Other than the direction of the
; data movement, this code is virtually identical to the above.

		mov	ax, DataSeg3
		mov	ds, ax
		mov	ax, DataSeg1
		mov	es, ax

		mov	cl, 127
		lea	si, Data1a+202h
iloop2:		mov	ch, 254/2
jloop2:		mov	dx, [si]
		mov	bx, [si-200h]
		mov	ax, dx
		shl	dx, 3
		add	bx, [si-1feh]
		mov	bp, [si+2]
		add	bx, [si+200h]
		add	dx, bp
		add	bx, [si+202h]
		add	dx, [si-202h]
		mov	di, [si-1fch]
		add	dx, [si-2]
		add	di, [si+4]
		add	dx, [si+1feh]
		add	di, [si+204h]
		shl	bp, 3
		add	dx, bx
		add	bp, ax
		shr	dx, 4
		add	bp, bx
		mov	es:[si], dx
		add	bp, di
		add	si, 4
		shr	bp, 4
		dec	ch
		mov	es:[si-2], bp
		jne	jloop2

		add	si, 4

		dec	cl
		jne	iloop2


		mov	ax, DataSeg4
		sub	ax, 40h
		mov	ds, ax
		mov	ax, DataSeg2
		sub	ax, 40h
		mov	es, ax

		mov	cl, 251-127-1
		mov	si, 202h
iloop3:		mov	ch, 254/2
jloop3:		mov	dx, [si]
		mov	bx, [si-200h]
		mov	ax, dx
		shl	dx, 3
		add	bx, [si-1feh]
		mov	bp, [si+2]
		add	bx, [si+200h]
		add	dx, bp
		add	bx, [si+202h]
		add	dx, [si-202h]
		mov	di, [si-1fch]
		add	dx, [si-2]
		add	di, [si+4]
		add	dx, [si+1feh]
		add	di, [si+204h]
		shl	bp, 3
		add	dx, bx
		add	bp, ax
		shr	dx, 4
		add	bp, bx
		mov	es:[si], dx
		add	bp, di
		add	si, 4
		shr	bp, 4
		dec	ch
		mov	es:[si-2], bp
		jne	jloop3

		add	si, 4

		dec	cl
		jne	iloop3

		mov	ax, dseg
		mov	ds, ax
		assume	ds:dseg

		dec	Iterations
		je	Done2
		jmp	hloop

Done2:		mov	ax, DataSeg1
		mov	bx, DataSeg2
		jmp	Finish

Done0:		mov	ax, DataSeg3
		mov	bx, DataSeg4
Finish:		mov	ds, ax
		print
		byte	"Writing result",cr,lf,0

; Convert data back to byte form and write to the output file:

		mov	ax, dseg
		mov	es, ax

		mov	cx, 32768
		lea	di, ImgData
		xor	si, si			;Output data is at ofs zero.
CopyLoop3:	lodsw
		stosb
		dec	cx
		jne	CopyLoop3

		mov	ds, bx
		mov	cx, (251*256) - 32768
		xor	si, si
CopyLoop4:	lodsw
		stosb
		dec	cx
		jne	CopyLoop4


; Okay, write the data to the output file:

		mov	ah, 3ch		;Create output file.
		mov	cx, 0		;Normal file attributes.
		mov	dx, dseg
		mov	ds, dx
		lea	dx, OutName
		int	21h
		jnc	GoodCreate
		print
		byte	"Could not create output file.",cr,lf,0
		jmp	Quit

GoodCreate:	mov	bx, ax		;File handle.
		push	bx
		mov	dx, dseg	;Where the data can be found.
		mov	ds, dx
		lea	dx, ImgData
		mov	cx, 256*251	;Size of data file to write.
		mov	ah, 40h		;Write operation.
		int	21h
		pop	bx		;Retrieve handle for close.
		cmp	ax, 256*251	;See if we wrote the data.
		je	GoodWrite
		print
		byte	"Did not write the file properly",cr,lf,0
		jmp	Quit

GoodWrite:	mov	ah, 3eh		;Close operation.
		int	21h


Quit:		ExitPgm			;DOS macro to quit program.
Main		endp

cseg		ends

sseg		segment	para stack 'stack'
stk		byte	1024 dup ("stack   ")
sseg		ends

zzzzzzseg	segment	para public 'zzzzzz'
LastBytes	byte	16 dup (?)
zzzzzzseg	ends
		end	Main
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -