⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 optim.c

📁 window下的c编译器。
💻 C
📖 第 1 页 / 共 5 页
字号:
/*
	A peephole optimizer for lcc
	----------------------------

Section 1:
---------
Overall design.
The optimizer receives its input from the compiler as !n a!cii instruction
stream. When the function 'genasm' sees a label, it will send the contents of
the buffer to the optimizer with a call to the function 'SendToOptimizer'.
The entry point in this file is the function 'ChangeBlock', that performs the
following actions:
	1. Parses the instructions, setting flags for certain characteristics,
	setting register numbers, etc.
	2. Calls the 'Optimize block' function the scan the block for instruction
	to be changed/deleted.

The strategy of this peephole optimizer is to build a machine state, and
try to keep it coherent with the operations performed in a block. It will
try to eliminate redundant loads of registers, replace certains registers
with other free ones, replace certain sequences of instruction lcc generates
with other more efficient ones, etc. It is certainly limited to lcc, since
many of the (implicit) assumptions about the code would preclude its use
as a more general peephole optimizer.

*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#define ANALYZER
#ifdef STANDALONE
#define DEBUGOPTIM
#include <time.h>
int vmask[] = { 1, 0 };
#endif
static int SwappedRegs;
extern int HasSwappedRegs(void);
extern int genlabel(int);
#include <assert.h>
#define MAXIDSIZE 50
/*
The 'Instruction' data structure. This is built now by the parser of
instructions, but it would naturally much more efficient to have the
characteristics of those instructions built in to the tables of the
compiler
*/
typedef struct tagInstruction {
	unsigned long Flags;
	unsigned char Name[12]; /* opcode */
	unsigned char src[MAXIDSIZE];  /* source of the data */
	unsigned char dst[MAXIDSIZE];  /* destination of the data */
	unsigned char *Start;   /* Pointer to the beginning of the instruction */
				/* in the assembler buffer */
	unsigned char *End;	/* pointer to the end of the instruction */
	unsigned short ChangedIdx; /* Index of the replace instruction in the */
				/* instruction table */
	unsigned short SrcReg;  /* Source register number */
	unsigned short DstReg;  /* destination reg number */
}Instruction;
/* instruction Flags field */
#define DELETED			0x1
#define CHANGED			0x2
#define ISCALL			0x4
#define CLOBBERS		0x8
#define ISLOCAL			0x10
#define ISARGUMENT		0x20
#define SRCDEREFERENCE	0x40
#define DSTDEREFERENCE	0x80
#define SRCOFFSET		0x100
#define DSTOFFSET		0x200
#define ISJUMP			0x400
#define ISFLOAT			0x800
#define TRUNCATED		0x1000
#define ISMOVL			0x2000
#define ISMOVSB			0x4000
#define	ISGENERICMOVE	0x8000
#define ISMOVSWL		0x10000
#define ISMOVSBL		0x20000
#define ISLEAL			0x40000
#define ISCOMPARE		0x80000
#define ISREGSRC		0x100000
#define ISREGDST		0x200000
#define ADDED			0x400000
#define STACKADJUST		0x800000

/*
The instruction table contains two parts:
1. The instructions that belong to the block being analyzed now.
2. The changed instructions.

Each changed instruction contains the index of the position of the changed
instruction contents in its ChangedIndex field, set by the function
'GetNextChangedInstruction'
*/
/* Number of instructions in the instruction table */
#define MAXINSTRUCTION 1600
/* Register numbers */
#define EAX		1
#define EBX		2
#define ECX		3
#define	EDX		4
#define ESI		5
#define EDI		6
#define EBP		7
#define	ESP		8
#define	AX		9
#define	BX		10
#define CX		11
#define DX		12
#define SI		13
#define DI		14
#define BP		15
#define SP		16
#define MM0		17
#define MM1		18
#define	MM2		19
#define MM3		20
#define	MM4		21
#define	MM5		22
#define	MM6		23
#define	MM7		24

static unsigned char *RegisterNames[] = {
	"NULL",
	"eax",
	"ebx",
	"ecx",
	"edx",
	"esi",
	"edi",
	"ebp",
	"esp"
	"mm0",
	"mm1",
	"mm2",
	"mm3",
	"mm4",
	"mm5",
	"mm6",
	"mm7"
};
int ediIsSaved=1,esiIsSaved=1;
static Instruction InstructionTable[MAXINSTRUCTION];
static int InstructionIndex = 0;
static int recursion = 0;
static jmp_buf bailout;
static int isByteValue(char *);
typedef struct tagbasicBlock {
	char Label[25];
	int calls;
	int NrOfInstructions;
	char Registers[SP+1];
	int Flags;
	unsigned short FirstCall; /* Index of the first 'call' instruction */
	unsigned short LastCall;  /* Index of the last 'call' instruction */
	unsigned short ChangedCounter;
	unsigned short deleted;
}BasicBlock;
extern int GetLastFunctionLabel(void);
static unsigned char *Nullst = "NULL";
#define MOVL_CODE	1819701101
#define JMP_CODE	7368042
#define JNE_CODE	6647402
#define ADDL_CODE	1818518625
#define PUSH_CODE	1752397168
#define CMPL_CODE	1819307363
#define REP_CODE	7366002
#define XORL_CODE	1819438968
#define XOR_CODE	7499640
#define ADD_CODE	6579297
#define ADDL_CODE	1818518625
#define SUB_CODE	6452595
#define SUBL_CODE	1818391923
#define MOVB_CODE	1651928941
#define MOVW_CODE	2004250477
#define INCL_CODE	1818455657
#define SARL_CODE	1819435379
#define ANDL_CODE	1818521185
#define ORL_CODE	7107183
#define RET_CODE	7628146
#define MOVS_CODE	1937141613
#define SHLL_CODE	1819043955
#define SHRL_CODE	1819437171
#define PUSH_CODE	1752397168
/*
We keep the state of the machine in this character vector.
Each position points towards a character string that
indicates the contents of the register.
*/
static unsigned char *State[25];
static int deleted = 0,redundant = 0;
static int lastLabel = 0;
static int hasRegisterVariables;
/*
The peephole optimizer can be compiled as a standalone module. It
will emit very verbose output...
*/
#ifdef DEBUGOPTIM
static unsigned char *buffer;
#define Printf printf
#define Printf1(a) printf(a)
#define Printf2(a,b) printf(a,b)
#define Printf3(a,b,c) printf(a,b,c)
extern int hasAllocA;
#else
#define Printf1(a)
#define Printf2(a,b)
#define Printf3(a,b,c)
extern int hasAllocA;
#endif
extern unsigned char *FindStringConstant(unsigned char *name);
extern int RegisterVariablesMask;
extern int ispow2(int);
#ifdef STANDALONE
int hasAllocA = 0;
main(int argc,char *argv[])
{
	FILE *f;
	int l,nl;
	unsigned char *newb;

	f = fopen(argv[1],"r");
	if (f == NULL) {
		fprintf(stderr,"Impossible to open %s\n",argv[1]);
		return(1);
	}
	fseek(f,0,SEEK_END);
	l = ftell(f);
	fseek(f,0,SEEK_SET);
	buffer = malloc(l+10);
	memset(buffer,0,l+10);
	l = fread(buffer,1,l,f);
	fclose(f);
	Printf("Optimizing %s\n",argv[1]);
	nl = ScanAsm(buffer,l,&newb);
	Printf("%d instructions deleted,redundant %d\n",deleted,redundant);
	f = fopen("output.out","w");
	fwrite(newb,1,nl,f);
	fclose(f);
}
static int genlabel(int n)
{
	static int label = 99999;

	label += n;
	return label - n;
}

#endif

/*
This routine searches the InstructionTable for an index where
it will put the next changed instruction. Those instructions
start at the end of the basic block.
*/
static Instruction *GetNextChangedInstruction(Instruction *Ins,BasicBlock *bb)
{
	Instruction *insDst;

	if (bb->ChangedCounter  >= MAXINSTRUCTION) {
		warning("Function too big for the optimizer\n");
		longjmp(bailout,1);
	}
	Ins->Flags |= CHANGED;
	Ins->ChangedIdx = bb->ChangedCounter;
	insDst = &InstructionTable[bb->ChangedCounter];
	bb->ChangedCounter++;
	memset(insDst,0,sizeof(Instruction));
	return(insDst);
}
static Instruction *ChangeInstruction(Instruction *ins,BasicBlock *bb,unsigned char *name,unsigned char *src,unsigned char *dst)
{
	Instruction *result;

	result = GetNextChangedInstruction(ins,bb);
	strcpy(result->Name,name);
	strcpy(result->src,src);
	strcpy(result->dst,dst);
	return(result);
}

#ifdef STANDALONE
/*
This routine searches the start of the basic block. It looks
for a label, that should start with the '_$' prefix. Change
this if the prefix is different.
*/
static unsigned char *FindBeginBlock(unsigned char *bp,BasicBlock *bb)
{
	if (*bp == '_' && bp[1] == '$') {
		unsigned char *p;
		p = bp+2;
		while (*p && *p != '\n' && *p != ':') p++;
		if (*p == ':') goto retgood;
		if (*p == 0) return(NULL);
	}
	while (*bp) {
		if (bp[0] == '\n' && bp[1] == '_') {
			bp++;
			if (bp[1] == '$') {
				int i;
retgood:
				i = 0;
				while (*bp && *bp != '\n') bb->Label[i++] = *bp++;
				if (*bp == '\n') bp++;
				return(bp);
			}
			else {
				unsigned char *p = bp;

				while (*p && *p != '\n') {
					if (*p == ':') {
						goto retgood;
					}
					else if (*p == ',') {
						bp = p;
						break;
					}
					p++;
				}
			}
		}
		else if (*bp == '.' && bp[1] == 'd' && bp[2] == 'a' && bp[3] == 't') {
scanfortext:
			bp++;
			for (;;) {
				while (*bp && *bp != '.')
					bp++;
				if (*bp == '.' && !strncmp(bp,".text",5))
					break;
				else if (*bp == 0) break;
				else if (*bp == '.') bp++;
			}
			if (bp == NULL) return(NULL);
			if (*bp == 0) return(NULL);
		}
		else if (*bp == '.' && bp[1] == 'b' && bp[2] == 's' && bp[3] == 's')
			goto scanfortext;
		else bp++;
	}
	return(NULL);
}
#endif
/* Converts a register name into a register number */
static int GetRegNumber(unsigned char *name)
{
	register unsigned char *n = name;
	if (*n == 'e') {
		n++;
		switch (*n++) {
			case 'a': return(EAX);
			case 'b': return (*n == 'x')? EBX : EBP;
			case 'c': return(ECX);
			case 'd': return( *n == 'x')? EDX : EDI;
			case 's': return (*n == 'i') ? ESI : ESP;
		}
		fprintf(stderr,"bad register name %c%c%c\n",name[0],name[1],name[2]);
		exit(1);
	}
	else if (*n == 'm' && n[1] == 'm') {
		return MM0 + n[2] - '0';
	}
	else {
		char tmp[3];
		tmp[0] = 'e';
		tmp[1] = *n++;
		tmp[2] = (*n == 'l') ? 'x' : *n;
		return(ESP+GetRegNumber(tmp));
	}
	return(0);
}

/*
Parser for one instruction. It separates the instruction into
opcode (Name field), source and destination.
*/
static unsigned char *AddNextInstruction(unsigned char *bp,BasicBlock *bb,unsigned char **pp)
{
	unsigned char *p,*start,*op;
	int i,regcount,firstCodeChar;
	Instruction *ins;

	start = p = bp;
	if (*p == '_' && p[1] == '$') {
		*pp = bp;
		return(NULL);
	}
	if (*bp == 1) {
		while (*bp == 1) bp++;
	}
	if (*bp == '\t') bp++;
	if (*bp == ';')
		goto skipline;
	if (*bp == '.') {
		if (!strncmp(bp,".data",5) || !strncmp(bp,".bss",4)) {
			bp = strstr(bp,".text");
			*pp = bp;
			return(bp);
		}
skipline:
		while (*bp && *bp != '\n') bp++;
		if (*bp == '\n') bp++;
		*pp = bp;
		if (*bp == 0) return(NULL);
		return(bp);
	}
	if (InstructionIndex < MAXINSTRUCTION-50) {
		ins = &InstructionTable[InstructionIndex];
		InstructionIndex++;
		memset(ins,0,sizeof(Instruction));
	}
	else {
		warning("Function too big for the optimizer\n");
		longjmp(bailout,1);
	}
	i = 0;
	firstCodeChar = *bp;
	if (firstCodeChar == 'j')
		ins->Flags |= ISJUMP;
	else if (firstCodeChar == 'f')
		ins->Flags |= ISFLOAT;
	op = ins->Name;
	while (*bp >= 'a' && *bp <= 'z') {
			op[i++] = *bp++;
	}
	if (i > 8) {
		op[i] = 0;
		fprintf(stderr,"overflow in instruction name %s ",ins->Name);
		while (*bp && *bp != '\n') fprintf(stderr,"%c",*bp++);
		fprintf(stderr,"\n");
		exit(1);
	}
	ins->Start = start;
	bb->NrOfInstructions++;
	op[i] = 0;
	if (i > 3 && firstCodeChar == 'm' && op[1] == 'o' &&
		op[2] == 'v') {
		if (op[3] == 'l') ins->Flags |= ISMOVL;
		else if (op[3] == 's' && op[4] == 'b' && op[5] == 0) {
			ins->Flags |= ISMOVSB;
		}
		else if (op[3] == 's' && (op[4] == 'w' || op[4] == 'b')
			&& op[5] == 'l') {
			if (op[4] == 'w') ins->Flags |= ISMOVSWL;
			else ins->Flags |= ISMOVSBL;
		}
		ins->Flags |= ISGENERICMOVE;
	}
	else if (*(unsigned long *)op == RET_CODE) {
		while (*bp && *bp != '\n') bp++;
		if (*bp == '\n') bp++;
		*pp = bp;
		ins->End = bp;
		return(NULL);
	}
	if (i == 4 && firstCodeChar == 'l' && op[1] == 'e' &&
		op[3] == 'l')
		ins->Flags |= ISLEAL;
	if (i == 4 && *(unsigned long *)op == CMPL_CODE) {
		ins->Flags |= ISCOMPARE;
	}
	while (*bp == ' ' || *bp == '\t') bp++;
	if (*bp == '\n') {
		bp++;
		ins->End = bp;
		*pp = bp;
		return(bp);
	}
	i = regcount = 0;
	if (*bp == '%') ins->Flags |= ISREGSRC;
	while (*bp && *bp != ',') {
		int c = *bp;
		if (c == '\n') break;
		if (i < MAXIDSIZE)
			ins->src[i++] = c;
		else ins->Flags |= TRUNCATED;
		bp++;
		if (c == '(') {
			ins->Flags |= SRCOFFSET;
			while (*bp && *bp != ')') {
				c = *bp++;
				if (i < 40)
					ins->src[i++] = c;
				if (c == ',') {
					ins->Flags |= SRCDEREFERENCE;
				}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -