📄 optimize.c
字号:
/* Because we really don't have an IR, this stuff is a little messy. */static intF(code, v0, v1) int code; int v0, v1;{ u_int hash; int val; struct valnode *p; hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); hash %= MODULUS; for (p = hashtbl[hash]; p; p = p->next) if (p->code == code && p->v0 == v0 && p->v1 == v1) return p->val; val = ++curval; if (BPF_MODE(code) == BPF_IMM && (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { vmap[val].const_val = v0; vmap[val].is_const = 1; } p = next_vnode++; p->val = val; p->code = code; p->v0 = v0; p->v1 = v1; p->next = hashtbl[hash]; hashtbl[hash] = p; return val;}static inline voidvstore(s, valp, newval, alter) struct stmt *s; int *valp; int newval; int alter;{ if (alter && *valp == newval) s->code = NOP; else *valp = newval;}static voidfold_op(s, v0, v1) struct stmt *s; int v0, v1;{ bpf_int32 a, b; a = vmap[v0].const_val; b = vmap[v1].const_val; switch (BPF_OP(s->code)) { case BPF_ADD: a += b; break; case BPF_SUB: a -= b; break; case BPF_MUL: a *= b; break; case BPF_DIV: if (b == 0) bpf_error("division by zero"); a /= b; break; case BPF_AND: a &= b; break; case BPF_OR: a |= b; break; case BPF_LSH: a <<= b; break; case BPF_RSH: a >>= b; break; case BPF_NEG: a = -a; break; default: abort(); } s->k = a; s->code = BPF_LD|BPF_IMM; done = 0;}static inline struct slist *this_op(s) struct slist *s;{ while (s != 0 && s->s.code == NOP) s = s->next; return s;}static voidopt_not(b) struct block *b;{ struct block *tmp = JT(b); JT(b) = JF(b); JF(b) = tmp;}static voidopt_peep(b) struct block *b;{ struct slist *s; struct slist *next, *last; int val; s = b->stmts; if (s == 0) return; last = s; for (/*empty*/; /*empty*/; s = next) { /* * Skip over nops. */ s = this_op(s); if (s == 0) break; /* nothing left in the block */ /* * Find the next real instruction after that one * (skipping nops). */ next = this_op(s->next); if (next == 0) break; /* no next instruction */ last = next; /* * st M[k] --> st M[k] * ldx M[k] tax */ if (s->s.code == BPF_ST && next->s.code == (BPF_LDX|BPF_MEM) && s->s.k == next->s.k) { done = 0; next->s.code = BPF_MISC|BPF_TAX; } /* * ld #k --> ldx #k * tax txa */ if (s->s.code == (BPF_LD|BPF_IMM) && next->s.code == (BPF_MISC|BPF_TAX)) { s->s.code = BPF_LDX|BPF_IMM; next->s.code = BPF_MISC|BPF_TXA; done = 0; } /* * This is an ugly special case, but it happens * when you say tcp[k] or udp[k] where k is a constant. */ if (s->s.code == (BPF_LD|BPF_IMM)) { struct slist *add, *tax, *ild; /* * Check that X isn't used on exit from this * block (which the optimizer might cause). * We know the code generator won't generate * any local dependencies. */ if (ATOMELEM(b->out_use, X_ATOM)) continue; /* * Check that the instruction following the ldi * is an addx, or it's an ldxms with an addx * following it (with 0 or more nops between the * ldxms and addx). */ if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) add = next; else add = this_op(next->next); if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) continue; /* * Check that a tax follows that (with 0 or more * nops between them). */ tax = this_op(add->next); if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) continue; /* * Check that an ild follows that (with 0 or more * nops between them). */ ild = this_op(tax->next); if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || BPF_MODE(ild->s.code) != BPF_IND) continue; /* * We want to turn this sequence: * * (004) ldi #0x2 {s} * (005) ldxms [14] {next} -- optional * (006) addx {add} * (007) tax {tax} * (008) ild [x+0] {ild} * * into this sequence: * * (004) nop * (005) ldxms [14] * (006) nop * (007) nop * (008) ild [x+2] * * XXX We need to check that X is not * subsequently used, because we want to change * what'll be in it after this sequence. * * We know we can eliminate the accumulator * modifications earlier in the sequence since * it is defined by the last stmt of this sequence * (i.e., the last statement of the sequence loads * a value into the accumulator, so we can eliminate * earlier operations on the accumulator). */ ild->s.k += s->s.k; s->s.code = NOP; add->s.code = NOP; tax->s.code = NOP; done = 0; } } /* * If the comparison at the end of a block is an equality * comparison against a constant, and nobody uses the value * we leave in the A register at the end of a block, and * the operation preceding the comparison is an arithmetic * operation, we can sometime optimize it away. */ if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && !ATOMELEM(b->out_use, A_ATOM)) { /* * We can optimize away certain subtractions of the * X register. */ if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { val = b->val[X_ATOM]; if (vmap[val].is_const) { /* * If we have a subtract to do a comparison, * and the X register is a known constant, * we can merge this value into the * comparison: * * sub x -> nop * jeq #y jeq #(x+y) */ b->s.k += vmap[val].const_val; last->s.code = NOP; done = 0; } else if (b->s.k == 0) { /* * If the X register isn't a constant, * and the comparison in the test is * against 0, we can compare with the * X register, instead: * * sub x -> nop * jeq #0 jeq x */ last->s.code = NOP; b->s.code = BPF_JMP|BPF_JEQ|BPF_X; done = 0; } } /* * Likewise, a constant subtract can be simplified: * * sub #x -> nop * jeq #y -> jeq #(x+y) */ else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { last->s.code = NOP; b->s.k += last->s.k; done = 0; } /* * And, similarly, a constant AND can be simplified * if we're testing against 0, i.e.: * * and #k nop * jeq #0 -> jset #k */ else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && b->s.k == 0) { b->s.k = last->s.k; b->s.code = BPF_JMP|BPF_K|BPF_JSET; last->s.code = NOP; done = 0; opt_not(b); } } /* * jset #0 -> never * jset #ffffffff -> always */ if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { if (b->s.k == 0) JT(b) = JF(b); if (b->s.k == 0xffffffff) JF(b) = JT(b); } /* * If the accumulator is a known constant, we can compute the * comparison result. */ val = b->val[A_ATOM]; if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { bpf_int32 v = vmap[val].const_val; switch (BPF_OP(b->s.code)) { case BPF_JEQ: v = v == b->s.k; break; case BPF_JGT: v = (unsigned)v > b->s.k; break; case BPF_JGE: v = (unsigned)v >= b->s.k; break; case BPF_JSET: v &= b->s.k; break; default: abort(); } if (JF(b) != JT(b)) done = 0; if (v) JF(b) = JT(b); else JT(b) = JF(b); }}/* * Compute the symbolic value of expression of 's', and update * anything it defines in the value table 'val'. If 'alter' is true, * do various optimizations. This code would be cleaner if symbolic * evaluation and code transformations weren't folded together. */static voidopt_stmt(s, val, alter) struct stmt *s; int val[]; int alter;{ int op; int v; switch (s->code) { case BPF_LD|BPF_ABS|BPF_W: case BPF_LD|BPF_ABS|BPF_H: case BPF_LD|BPF_ABS|BPF_B: v = F(s->code, s->k, 0L); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_IND|BPF_W: case BPF_LD|BPF_IND|BPF_H: case BPF_LD|BPF_IND|BPF_B: v = val[X_ATOM]; if (alter && vmap[v].is_const) { s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); s->k += vmap[v].const_val; v = F(s->code, s->k, 0L); done = 0; } else v = F(s->code, s->k, v); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_LEN: v = F(s->code, 0L, 0L); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_IMM: v = K(s->k); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LDX|BPF_IMM: v = K(s->k); vstore(s, &val[X_ATOM], v, alter); break; case BPF_LDX|BPF_MSH|BPF_B: v = F(s->code, s->k, 0L); vstore(s, &val[X_ATOM], v, alter); break; case BPF_ALU|BPF_NEG: if (alter && vmap[val[A_ATOM]].is_const) { s->code = BPF_LD|BPF_IMM; s->k = -vmap[val[A_ATOM]].const_val; val[A_ATOM] = K(s->k); } else val[A_ATOM] = F(s->code, val[A_ATOM], 0L); break; case BPF_ALU|BPF_ADD|BPF_K: case BPF_ALU|BPF_SUB|BPF_K: case BPF_ALU|BPF_MUL|BPF_K: case BPF_ALU|BPF_DIV|BPF_K: case BPF_ALU|BPF_AND|BPF_K: case BPF_ALU|BPF_OR|BPF_K: case BPF_ALU|BPF_LSH|BPF_K: case BPF_ALU|BPF_RSH|BPF_K: op = BPF_OP(s->code); if (alter) { if (s->k == 0) { /* don't optimize away "sub #0" * as it may be needed later to * fixup the generated math code */ if (op == BPF_ADD || op == BPF_LSH || op == BPF_RSH || op == BPF_OR) { s->code = NOP; break; } if (op == BPF_MUL || op == BPF_AND) { s->code = BPF_LD|BPF_IMM; val[A_ATOM] = K(s->k); break; } } if (vmap[val[A_ATOM]].is_const) { fold_op(s, val[A_ATOM], K(s->k)); val[A_ATOM] = K(s->k); break; } } val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); break; case BPF_ALU|BPF_ADD|BPF_X: case BPF_ALU|BPF_SUB|BPF_X: case BPF_ALU|BPF_MUL|BPF_X: case BPF_ALU|BPF_DIV|BPF_X: case BPF_ALU|BPF_AND|BPF_X: case BPF_ALU|BPF_OR|BPF_X: case BPF_ALU|BPF_LSH|BPF_X: case BPF_ALU|BPF_RSH|BPF_X: op = BPF_OP(s->code); if (alter && vmap[val[X_ATOM]].is_const) { if (vmap[val[A_ATOM]].is_const) { fold_op(s, val[A_ATOM], val[X_ATOM]); val[A_ATOM] = K(s->k); } else { s->code = BPF_ALU|BPF_K|op; s->k = vmap[val[X_ATOM]].const_val; done = 0; val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); } break; } /* * Check if we're doing something to an accumulator * that is 0, and simplify. This may not seem like * much of a simplification but it could open up further * optimizations. * XXX We could also check for mul by 1, etc. */ if (alter && vmap[val[A_ATOM]].is_const && vmap[val[A_ATOM]].const_val == 0) { if (op == BPF_ADD || op == BPF_OR) { s->code = BPF_MISC|BPF_TXA; vstore(s, &val[A_ATOM], val[X_ATOM], alter); break; } else if (op == BPF_MUL || op == BPF_DIV || op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { s->code = BPF_LD|BPF_IMM; s->k = 0; vstore(s, &val[A_ATOM], K(s->k), alter); break; } else if (op == BPF_NEG) { s->code = NOP; break; } } val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); break; case BPF_MISC|BPF_TXA: vstore(s, &val[A_ATOM], val[X_ATOM], alter); break; case BPF_LD|BPF_MEM: v = val[s->k]; if (alter && vmap[v].is_const) { s->code = BPF_LD|BPF_IMM; s->k = vmap[v].const_val; done = 0; } vstore(s, &val[A_ATOM], v, alter); break; case BPF_MISC|BPF_TAX: vstore(s, &val[X_ATOM], val[A_ATOM], alter); break; case BPF_LDX|BPF_MEM: v = val[s->k]; if (alter && vmap[v].is_const) { s->code = BPF_LDX|BPF_IMM; s->k = vmap[v].const_val; done = 0; } vstore(s, &val[X_ATOM], v, alter); break; case BPF_ST: vstore(s, &val[s->k], val[A_ATOM], alter); break; case BPF_STX: vstore(s, &val[s->k], val[X_ATOM], alter); break; }}static voiddeadstmt(s, last) register struct stmt *s; register struct stmt *last[];{ register int atom; atom = atomuse(s); if (atom >= 0) { if (atom == AX_ATOM) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -