📄 expmed.c
字号:
if (temp == 0) abort (); return temp;}enum alg_code { alg_add, alg_subtract, alg_compound };/* This structure records a sequence of operations. `ops' is the number of operations recorded. `cost' is their total cost. The operations are stored in `op' and the corresponding integer coefficients in `coeff'. These are the operations: alg_add Add to the total the multiplicand times the coefficient. alg_subtract Subtract the multiplicand times the coefficient. alg_compound This coefficient plus or minus the following one is multiplied into the total. The following operation is alg_add or alg_subtract to indicate whether to add or subtract the two coefficients. */#ifndef MAX_BITS_PER_WORD#define MAX_BITS_PER_WORD BITS_PER_WORD#endifstruct algorithm{ int cost; unsigned int ops; enum alg_code op[MAX_BITS_PER_WORD]; unsigned HOST_WIDE_INT coeff[MAX_BITS_PER_WORD];};/* Compute and return the best algorithm for multiplying by T. Assume that add insns cost ADD_COST and shifts cost SHIFT_COST. Return cost -1 if would cost more than MAX_COST. */static struct algorithmsynth_mult (t, add_cost, shift_cost, max_cost) unsigned HOST_WIDE_INT t; int add_cost, shift_cost; int max_cost;{ int m, n; struct algorithm *best_alg = (struct algorithm *)alloca (sizeof (struct algorithm)); struct algorithm *alg_in = (struct algorithm *)alloca (sizeof (struct algorithm)); unsigned int cost; /* No matter what happens, we want to return a valid algorithm. */ best_alg->cost = max_cost; best_alg->ops = 0; /* Is t an exponent of 2, so we can just do a shift? */ if ((t & -t) == t) { if (t > 1) { if (max_cost >= shift_cost) { best_alg->cost = shift_cost; best_alg->ops = 1; best_alg->op[0] = alg_add; best_alg->coeff[0] = t; } else best_alg->cost = -1; } else if (t == 1) { if (max_cost >= 0) best_alg->cost = 0; } else best_alg->cost = 0; return *best_alg; } /* If MAX_COST just permits as little as an addition (or less), we won't succeed in synthesizing an algorithm for t. Return immediately with an indication of failure. */ if (max_cost <= add_cost) { best_alg->cost = -1; return *best_alg; } /* Look for factors of t of the form t = q(2**m +- 1), 2 <= m <= floor(log2(t)) - 1. If we find such a factor, we can multiply by t using an algorithm that multiplies by q, shift the result by m and add/subtract it to itself. */ for (m = floor_log2 (t) - 1; m >= 2; m--) { HOST_WIDE_INT m_exp_2 = (HOST_WIDE_INT) 1 << m; HOST_WIDE_INT d; d = m_exp_2 + 1; if (t % d == 0) { HOST_WIDE_INT q = t / d; cost = add_cost + shift_cost * 2; *alg_in = synth_mult (q, add_cost, shift_cost, MIN (max_cost, best_alg->cost) - cost); if (alg_in->cost >= 0) { cost += alg_in->cost; if (cost < best_alg->cost) { struct algorithm *x; x = alg_in; alg_in = best_alg; best_alg = x; best_alg->coeff[best_alg->ops] = m_exp_2; best_alg->op[best_alg->ops++] = alg_compound; best_alg->coeff[best_alg->ops] = 1; best_alg->op[best_alg->ops++] = alg_add; best_alg->cost = cost; } } } d = m_exp_2 - 1; if (t % d == 0) { HOST_WIDE_INT q = t / d; cost = add_cost + shift_cost * 2; *alg_in = synth_mult (q, add_cost, shift_cost, MIN (max_cost, best_alg->cost) - cost); if (alg_in->cost >= 0) { cost += alg_in->cost; if (cost < best_alg->cost) { struct algorithm *x; x = alg_in; alg_in = best_alg; best_alg = x; best_alg->coeff[best_alg->ops] = m_exp_2; best_alg->op[best_alg->ops++] = alg_compound; best_alg->coeff[best_alg->ops] = 1; best_alg->op[best_alg->ops++] = alg_subtract; best_alg->cost = cost; } } } } /* Try load effective address instructions, i.e. do a*3, a*5, a*9. */ { HOST_WIDE_INT q; HOST_WIDE_INT w; q = t & -t; /* get out lsb */ w = (t - q) & -(t - q); /* get out next lsb */ if (w / q <= lea_max_mul) { cost = lea_cost + (q != 1 ? shift_cost : 0); *alg_in = synth_mult (t - q - w, add_cost, shift_cost, MIN (max_cost, best_alg->cost) - cost); if (alg_in->cost >= 0) { cost += alg_in->cost; /* Use <= to prefer this method to the factoring method when the cost appears the same, because this method uses fewer temporary registers. */ if (cost <= best_alg->cost) { struct algorithm *x; x = alg_in; alg_in = best_alg; best_alg = x; best_alg->coeff[best_alg->ops] = w; best_alg->op[best_alg->ops++] = alg_add; best_alg->coeff[best_alg->ops] = q; best_alg->op[best_alg->ops++] = alg_add; best_alg->cost = cost; } } } } /* Now, use the good old method to add or subtract at the leftmost 1-bit. */ { HOST_WIDE_INT q; HOST_WIDE_INT w; q = t & -t; /* get out lsb */ for (w = q; (w & t) != 0; w <<= 1) ; if ((w > q << 1) /* Reject the case where t has only two bits. Thus we prefer addition in that case. */ && !(t < w && w == q << 2)) { /* There are many bits in a row. Make 'em by subtraction. */ cost = add_cost; if (q != 1) cost += shift_cost; *alg_in = synth_mult (t + q, add_cost, shift_cost, MIN (max_cost, best_alg->cost) - cost); if (alg_in->cost >= 0) { cost += alg_in->cost; /* Use <= to prefer this method to the factoring method when the cost appears the same, because this method uses fewer temporary registers. */ if (cost <= best_alg->cost) { struct algorithm *x; x = alg_in; alg_in = best_alg; best_alg = x; best_alg->coeff[best_alg->ops] = q; best_alg->op[best_alg->ops++] = alg_subtract; best_alg->cost = cost; } } } else { /* There's only one bit at the left. Make it by addition. */ cost = add_cost; if (q != 1) cost += shift_cost; *alg_in = synth_mult (t - q, add_cost, shift_cost, MIN (max_cost, best_alg->cost) - cost); if (alg_in->cost >= 0) { cost += alg_in->cost; if (cost <= best_alg->cost) { struct algorithm *x; x = alg_in; alg_in = best_alg; best_alg = x; best_alg->coeff[best_alg->ops] = q; best_alg->op[best_alg->ops++] = alg_add; best_alg->cost = cost; } } } } if (best_alg->cost >= max_cost) best_alg->cost = -1; return *best_alg;}/* Perform a multiplication and return an rtx for the result. MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); TARGET is a suggestion for where to store the result (an rtx). We check specially for a constant integer as OP1. If you want this check for OP0 as well, then before calling you should swap the two operands if OP0 would be constant. */rtxexpand_mult (mode, op0, op1, target, unsignedp) enum machine_mode mode; register rtx op0, op1, target; int unsignedp;{ rtx const_op1 = op1; /* If we are multiplying in DImode, it may still be a win to try to work with shifts and adds. */ if (GET_CODE (op1) == CONST_DOUBLE && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT && HOST_BITS_PER_INT <= BITS_PER_WORD) { if ((CONST_DOUBLE_HIGH (op1) == 0 && CONST_DOUBLE_LOW (op1) >= 0) || (CONST_DOUBLE_HIGH (op1) == -1 && CONST_DOUBLE_LOW (op1) < 0)) const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1)); } /* We used to test optimize here, on the grounds that it's better to produce a smaller program when -O is not used. But this causes such a terrible slowdown sometimes that it seems better to use synth_mult always. */ if (GET_CODE (const_op1) == CONST_INT && ! mult_is_very_cheap) { struct algorithm alg; struct algorithm neg_alg; int negate = 0; HOST_WIDE_INT absval = INTVAL (op1); rtx last; /* Try to do the computation two ways: multiply by the negative of OP1 and then negate, or do the multiplication directly. The latter is usually faster for positive numbers and the former for negative numbers, but the opposite can be faster if the original value has a factor of 2**m +/- 1, while the negated value does not or vice versa. */ alg = synth_mult (absval, add_cost, shift_cost, mult_cost); neg_alg = synth_mult (- absval, add_cost, shift_cost, (alg.cost >= 0 ? alg.cost : mult_cost) - negate_cost); if (neg_alg.cost >= 0 && neg_alg.cost + negate_cost < alg.cost) alg = neg_alg, negate = 1, absval = - absval; if (alg.cost >= 0) { /* If we found something, it must be cheaper than multiply. So use it. */ int opno = 0; rtx accum, tem; int factors_seen = 0; op0 = protect_from_queue (op0, 0); /* Avoid referencing memory over and over. For speed, but also for correctness when mem is volatile. */ if (GET_CODE (op0) == MEM) op0 = force_reg (mode, op0); if (alg.ops == 0) accum = copy_to_mode_reg (mode, op0); else { /* 1 if this is the last in a series of adds and subtracts. */ int last = (1 == alg.ops || alg.op[1] == alg_compound); int log = floor_log2 (alg.coeff[0]); if (! factors_seen && ! last) log -= floor_log2 (alg.coeff[1]); if (alg.op[0] != alg_add) abort (); accum = expand_shift (LSHIFT_EXPR, mode, op0, build_int_2 (log, 0), NULL_RTX, 0); } while (++opno < alg.ops) { int log = floor_log2 (alg.coeff[opno]); /* 1 if this is the last in a series of adds and subtracts. */ int last = (opno + 1 == alg.ops || alg.op[opno + 1] == alg_compound); /* If we have not yet seen any separate factors (alg_compound) then turn op0<<a1 + op0<<a2 + op0<<a3... into (op0<<(a1-a2) + op0)<<(a2-a3) + op0... */ switch (alg.op[opno]) { case alg_add: if (factors_seen) { tem = expand_shift (LSHIFT_EXPR, mode, op0, build_int_2 (log, 0), NULL_RTX, 0); accum = force_operand (gen_rtx (PLUS, mode, accum, tem), accum); } else { if (! last) log -= floor_log2 (alg.coeff[opno + 1]); accum = force_operand (gen_rtx (PLUS, mode, accum, op0), accum); accum = expand_shift (LSHIFT_EXPR, mode, accum, build_int_2 (log, 0), accum, 0); } break; case alg_subtract: if (factors_seen) { tem = expand_shift (LSHIFT_EXPR, mode, op0, build_int_2 (log, 0), NULL_RTX, 0); accum = force_operand (gen_rtx (MINUS, mode, accum, tem), accum); } else { if (! last) log -= floor_log2 (alg.coeff[opno + 1]); accum = force_operand (gen_rtx (MINUS, mode, accum, op0), accum); accum = expand_shift (LSHIFT_EXPR, mode, accum, build_int_2 (log, 0), accum, 0); } break; case alg_compound: factors_seen = 1; tem = expand_shift (LSHIFT_EXPR, mode, accum, build_int_2 (log, 0), NULL_RTX, 0); log = floor_log2 (alg.coeff[opno + 1]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -