📄 optim.c
字号:
/*
A peephole optimizer for lcc
----------------------------
Section 1:
---------
Overall design.
The optimizer receives its input from the compiler as !n a!cii instruction
stream. When the function 'genasm' sees a label, it will send the contents of
the buffer to the optimizer with a call to the function 'SendToOptimizer'.
The entry point in this file is the function 'ChangeBlock', that performs the
following actions:
1. Parses the instructions, setting flags for certain characteristics,
setting register numbers, etc.
2. Calls the 'Optimize block' function the scan the block for instruction
to be changed/deleted.
The strategy of this peephole optimizer is to build a machine state, and
try to keep it coherent with the operations performed in a block. It will
try to eliminate redundant loads of registers, replace certains registers
with other free ones, replace certain sequences of instruction lcc generates
with other more efficient ones, etc. It is certainly limited to lcc, since
many of the (implicit) assumptions about the code would preclude its use
as a more general peephole optimizer.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#define ANALYZER
#ifdef STANDALONE
#define DEBUGOPTIM
#include <time.h>
int vmask[] = { 1, 0 };
#endif
static int SwappedRegs;
extern int HasSwappedRegs(void);
extern int genlabel(int);
#include <assert.h>
#define MAXIDSIZE 50
/*
The 'Instruction' data structure. This is built now by the parser of
instructions, but it would naturally much more efficient to have the
characteristics of those instructions built in to the tables of the
compiler
*/
typedef struct tagInstruction {
unsigned long Flags;
unsigned char Name[12]; /* opcode */
unsigned char src[MAXIDSIZE]; /* source of the data */
unsigned char dst[MAXIDSIZE]; /* destination of the data */
unsigned char *Start; /* Pointer to the beginning of the instruction */
/* in the assembler buffer */
unsigned char *End; /* pointer to the end of the instruction */
unsigned short ChangedIdx; /* Index of the replace instruction in the */
/* instruction table */
unsigned short SrcReg; /* Source register number */
unsigned short DstReg; /* destination reg number */
}Instruction;
/* instruction Flags field */
#define DELETED 0x1
#define CHANGED 0x2
#define ISCALL 0x4
#define CLOBBERS 0x8
#define ISLOCAL 0x10
#define ISARGUMENT 0x20
#define SRCDEREFERENCE 0x40
#define DSTDEREFERENCE 0x80
#define SRCOFFSET 0x100
#define DSTOFFSET 0x200
#define ISJUMP 0x400
#define ISFLOAT 0x800
#define TRUNCATED 0x1000
#define ISMOVL 0x2000
#define ISMOVSB 0x4000
#define ISGENERICMOVE 0x8000
#define ISMOVSWL 0x10000
#define ISMOVSBL 0x20000
#define ISLEAL 0x40000
#define ISCOMPARE 0x80000
#define ISREGSRC 0x100000
#define ISREGDST 0x200000
#define ADDED 0x400000
#define STACKADJUST 0x800000
/*
The instruction table contains two parts:
1. The instructions that belong to the block being analyzed now.
2. The changed instructions.
Each changed instruction contains the index of the position of the changed
instruction contents in its ChangedIndex field, set by the function
'GetNextChangedInstruction'
*/
/* Number of instructions in the instruction table */
#define MAXINSTRUCTION 1600
/* Register numbers */
#define EAX 1
#define EBX 2
#define ECX 3
#define EDX 4
#define ESI 5
#define EDI 6
#define EBP 7
#define ESP 8
#define AX 9
#define BX 10
#define CX 11
#define DX 12
#define SI 13
#define DI 14
#define BP 15
#define SP 16
#define MM0 17
#define MM1 18
#define MM2 19
#define MM3 20
#define MM4 21
#define MM5 22
#define MM6 23
#define MM7 24
static unsigned char *RegisterNames[] = {
"NULL",
"eax",
"ebx",
"ecx",
"edx",
"esi",
"edi",
"ebp",
"esp"
"mm0",
"mm1",
"mm2",
"mm3",
"mm4",
"mm5",
"mm6",
"mm7"
};
int ediIsSaved=1,esiIsSaved=1;
static Instruction InstructionTable[MAXINSTRUCTION];
static int InstructionIndex = 0;
static int recursion = 0;
static jmp_buf bailout;
static int isByteValue(char *);
typedef struct tagbasicBlock {
char Label[25];
int calls;
int NrOfInstructions;
char Registers[SP+1];
int Flags;
unsigned short FirstCall; /* Index of the first 'call' instruction */
unsigned short LastCall; /* Index of the last 'call' instruction */
unsigned short ChangedCounter;
unsigned short deleted;
}BasicBlock;
extern int GetLastFunctionLabel(void);
static unsigned char *Nullst = "NULL";
#define MOVL_CODE 1819701101
#define JMP_CODE 7368042
#define JNE_CODE 6647402
#define ADDL_CODE 1818518625
#define PUSH_CODE 1752397168
#define CMPL_CODE 1819307363
#define REP_CODE 7366002
#define XORL_CODE 1819438968
#define XOR_CODE 7499640
#define ADD_CODE 6579297
#define ADDL_CODE 1818518625
#define SUB_CODE 6452595
#define SUBL_CODE 1818391923
#define MOVB_CODE 1651928941
#define MOVW_CODE 2004250477
#define INCL_CODE 1818455657
#define SARL_CODE 1819435379
#define ANDL_CODE 1818521185
#define ORL_CODE 7107183
#define RET_CODE 7628146
#define MOVS_CODE 1937141613
#define SHLL_CODE 1819043955
#define SHRL_CODE 1819437171
#define PUSH_CODE 1752397168
/*
We keep the state of the machine in this character vector.
Each position points towards a character string that
indicates the contents of the register.
*/
static unsigned char *State[25];
static int deleted = 0,redundant = 0;
static int lastLabel = 0;
static int hasRegisterVariables;
/*
The peephole optimizer can be compiled as a standalone module. It
will emit very verbose output...
*/
#ifdef DEBUGOPTIM
static unsigned char *buffer;
#define Printf printf
#define Printf1(a) printf(a)
#define Printf2(a,b) printf(a,b)
#define Printf3(a,b,c) printf(a,b,c)
extern int hasAllocA;
#else
#define Printf1(a)
#define Printf2(a,b)
#define Printf3(a,b,c)
extern int hasAllocA;
#endif
extern unsigned char *FindStringConstant(unsigned char *name);
extern int RegisterVariablesMask;
extern int ispow2(int);
#ifdef STANDALONE
int hasAllocA = 0;
main(int argc,char *argv[])
{
FILE *f;
int l,nl;
unsigned char *newb;
f = fopen(argv[1],"r");
if (f == NULL) {
fprintf(stderr,"Impossible to open %s\n",argv[1]);
return(1);
}
fseek(f,0,SEEK_END);
l = ftell(f);
fseek(f,0,SEEK_SET);
buffer = malloc(l+10);
memset(buffer,0,l+10);
l = fread(buffer,1,l,f);
fclose(f);
Printf("Optimizing %s\n",argv[1]);
nl = ScanAsm(buffer,l,&newb);
Printf("%d instructions deleted,redundant %d\n",deleted,redundant);
f = fopen("output.out","w");
fwrite(newb,1,nl,f);
fclose(f);
}
static int genlabel(int n)
{
static int label = 99999;
label += n;
return label - n;
}
#endif
/*
This routine searches the InstructionTable for an index where
it will put the next changed instruction. Those instructions
start at the end of the basic block.
*/
static Instruction *GetNextChangedInstruction(Instruction *Ins,BasicBlock *bb)
{
Instruction *insDst;
if (bb->ChangedCounter >= MAXINSTRUCTION) {
warning("Function too big for the optimizer\n");
longjmp(bailout,1);
}
Ins->Flags |= CHANGED;
Ins->ChangedIdx = bb->ChangedCounter;
insDst = &InstructionTable[bb->ChangedCounter];
bb->ChangedCounter++;
memset(insDst,0,sizeof(Instruction));
return(insDst);
}
static Instruction *ChangeInstruction(Instruction *ins,BasicBlock *bb,unsigned char *name,unsigned char *src,unsigned char *dst)
{
Instruction *result;
result = GetNextChangedInstruction(ins,bb);
strcpy(result->Name,name);
strcpy(result->src,src);
strcpy(result->dst,dst);
return(result);
}
#ifdef STANDALONE
/*
This routine searches the start of the basic block. It looks
for a label, that should start with the '_$' prefix. Change
this if the prefix is different.
*/
static unsigned char *FindBeginBlock(unsigned char *bp,BasicBlock *bb)
{
if (*bp == '_' && bp[1] == '$') {
unsigned char *p;
p = bp+2;
while (*p && *p != '\n' && *p != ':') p++;
if (*p == ':') goto retgood;
if (*p == 0) return(NULL);
}
while (*bp) {
if (bp[0] == '\n' && bp[1] == '_') {
bp++;
if (bp[1] == '$') {
int i;
retgood:
i = 0;
while (*bp && *bp != '\n') bb->Label[i++] = *bp++;
if (*bp == '\n') bp++;
return(bp);
}
else {
unsigned char *p = bp;
while (*p && *p != '\n') {
if (*p == ':') {
goto retgood;
}
else if (*p == ',') {
bp = p;
break;
}
p++;
}
}
}
else if (*bp == '.' && bp[1] == 'd' && bp[2] == 'a' && bp[3] == 't') {
scanfortext:
bp++;
for (;;) {
while (*bp && *bp != '.')
bp++;
if (*bp == '.' && !strncmp(bp,".text",5))
break;
else if (*bp == 0) break;
else if (*bp == '.') bp++;
}
if (bp == NULL) return(NULL);
if (*bp == 0) return(NULL);
}
else if (*bp == '.' && bp[1] == 'b' && bp[2] == 's' && bp[3] == 's')
goto scanfortext;
else bp++;
}
return(NULL);
}
#endif
/* Converts a register name into a register number */
static int GetRegNumber(unsigned char *name)
{
register unsigned char *n = name;
if (*n == 'e') {
n++;
switch (*n++) {
case 'a': return(EAX);
case 'b': return (*n == 'x')? EBX : EBP;
case 'c': return(ECX);
case 'd': return( *n == 'x')? EDX : EDI;
case 's': return (*n == 'i') ? ESI : ESP;
}
fprintf(stderr,"bad register name %c%c%c\n",name[0],name[1],name[2]);
exit(1);
}
else if (*n == 'm' && n[1] == 'm') {
return MM0 + n[2] - '0';
}
else {
char tmp[3];
tmp[0] = 'e';
tmp[1] = *n++;
tmp[2] = (*n == 'l') ? 'x' : *n;
return(ESP+GetRegNumber(tmp));
}
return(0);
}
/*
Parser for one instruction. It separates the instruction into
opcode (Name field), source and destination.
*/
static unsigned char *AddNextInstruction(unsigned char *bp,BasicBlock *bb,unsigned char **pp)
{
unsigned char *p,*start,*op;
int i,regcount,firstCodeChar;
Instruction *ins;
start = p = bp;
if (*p == '_' && p[1] == '$') {
*pp = bp;
return(NULL);
}
if (*bp == 1) {
while (*bp == 1) bp++;
}
if (*bp == '\t') bp++;
if (*bp == ';')
goto skipline;
if (*bp == '.') {
if (!strncmp(bp,".data",5) || !strncmp(bp,".bss",4)) {
bp = strstr(bp,".text");
*pp = bp;
return(bp);
}
skipline:
while (*bp && *bp != '\n') bp++;
if (*bp == '\n') bp++;
*pp = bp;
if (*bp == 0) return(NULL);
return(bp);
}
if (InstructionIndex < MAXINSTRUCTION-50) {
ins = &InstructionTable[InstructionIndex];
InstructionIndex++;
memset(ins,0,sizeof(Instruction));
}
else {
warning("Function too big for the optimizer\n");
longjmp(bailout,1);
}
i = 0;
firstCodeChar = *bp;
if (firstCodeChar == 'j')
ins->Flags |= ISJUMP;
else if (firstCodeChar == 'f')
ins->Flags |= ISFLOAT;
op = ins->Name;
while (*bp >= 'a' && *bp <= 'z') {
op[i++] = *bp++;
}
if (i > 8) {
op[i] = 0;
fprintf(stderr,"overflow in instruction name %s ",ins->Name);
while (*bp && *bp != '\n') fprintf(stderr,"%c",*bp++);
fprintf(stderr,"\n");
exit(1);
}
ins->Start = start;
bb->NrOfInstructions++;
op[i] = 0;
if (i > 3 && firstCodeChar == 'm' && op[1] == 'o' &&
op[2] == 'v') {
if (op[3] == 'l') ins->Flags |= ISMOVL;
else if (op[3] == 's' && op[4] == 'b' && op[5] == 0) {
ins->Flags |= ISMOVSB;
}
else if (op[3] == 's' && (op[4] == 'w' || op[4] == 'b')
&& op[5] == 'l') {
if (op[4] == 'w') ins->Flags |= ISMOVSWL;
else ins->Flags |= ISMOVSBL;
}
ins->Flags |= ISGENERICMOVE;
}
else if (*(unsigned long *)op == RET_CODE) {
while (*bp && *bp != '\n') bp++;
if (*bp == '\n') bp++;
*pp = bp;
ins->End = bp;
return(NULL);
}
if (i == 4 && firstCodeChar == 'l' && op[1] == 'e' &&
op[3] == 'l')
ins->Flags |= ISLEAL;
if (i == 4 && *(unsigned long *)op == CMPL_CODE) {
ins->Flags |= ISCOMPARE;
}
while (*bp == ' ' || *bp == '\t') bp++;
if (*bp == '\n') {
bp++;
ins->End = bp;
*pp = bp;
return(bp);
}
i = regcount = 0;
if (*bp == '%') ins->Flags |= ISREGSRC;
while (*bp && *bp != ',') {
int c = *bp;
if (c == '\n') break;
if (i < MAXIDSIZE)
ins->src[i++] = c;
else ins->Flags |= TRUNCATED;
bp++;
if (c == '(') {
ins->Flags |= SRCOFFSET;
while (*bp && *bp != ')') {
c = *bp++;
if (i < 40)
ins->src[i++] = c;
if (c == ',') {
ins->Flags |= SRCDEREFERENCE;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -