📄 unroll.c
字号:
can precondition the loop so as to make the exit tests unnecessary. Just like variable splitting, this is not safe if the loop is entered via a jump to the bottom. Also, can not do this if no strength reduce info, because precondition_loop_p uses this info. */ /* Must copy the loop body for preconditioning before the following find_splittable_regs call since that will emit insns which need to be after the preconditioned loop copies, but immediately before the unrolled loop copies. */ /* Also, it is not safe to split induction variables for the preconditioned copies of the loop body. If we split induction variables, then the code assumes that each induction variable can be represented as a function of its initial value and the loop iteration number. This is not true in this case, because the last preconditioned copy of the loop body could be any iteration from the first up to the `unroll_number-1'th, depending on the initial value of the iteration variable. Therefore we can not split induction variables here, because we can not calculate their value. Hence, this code must occur before find_splittable_regs is called. */ if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) { rtx initial_value, final_value, increment; if (precondition_loop_p (&initial_value, &final_value, &increment, loop_start, loop_end)) { register rtx diff, temp; enum machine_mode mode; rtx *labels; int abs_inc, neg_inc; map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx)); map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx)); map->const_age_map = (unsigned *) alloca (maxregnum * sizeof (unsigned)); map->const_equiv_map_size = maxregnum; global_const_equiv_map = map->const_equiv_map; global_const_equiv_map_size = maxregnum; init_reg_map (map, maxregnum); /* Limit loop unrolling to 4, since this will make 7 copies of the loop body. */ if (unroll_number > 4) unroll_number = 4; /* Save the absolute value of the increment, and also whether or not it is negative. */ neg_inc = 0; abs_inc = INTVAL (increment); if (abs_inc < 0) { abs_inc = - abs_inc; neg_inc = 1; } start_sequence (); /* Decide what mode to do these calculations in. Choose the larger of final_value's mode and initial_value's mode, or a full-word if both are constants. */ mode = GET_MODE (final_value); if (mode == VOIDmode) { mode = GET_MODE (initial_value); if (mode == VOIDmode) mode = word_mode; } else if (mode != GET_MODE (initial_value) && (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (initial_value)))) mode = GET_MODE (initial_value); /* Calculate the difference between the final and initial values. Final value may be a (plus (reg x) (const_int 1)) rtx. Let the following cse pass simplify this if initial value is a constant. We must copy the final and initial values here to avoid improperly shared rtl. */ diff = expand_binop (mode, sub_optab, copy_rtx (final_value), copy_rtx (initial_value), NULL_RTX, 0, OPTAB_LIB_WIDEN); /* Now calculate (diff % (unroll * abs (increment))) by using an and instruction. */ diff = expand_binop (GET_MODE (diff), and_optab, diff, GEN_INT (unroll_number * abs_inc - 1), NULL_RTX, 0, OPTAB_LIB_WIDEN); /* Now emit a sequence of branches to jump to the proper precond loop entry point. */ labels = (rtx *) alloca (sizeof (rtx) * unroll_number); for (i = 0; i < unroll_number; i++) labels[i] = gen_label_rtx (); /* Check for the case where the initial value is greater than or equal to the final value. In that case, we want to execute exactly one loop iteration. The code below will fail for this case. */ emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE, NULL_RTX, mode, 0, 0); if (neg_inc) emit_jump_insn (gen_ble (labels[1])); else emit_jump_insn (gen_bge (labels[1])); JUMP_LABEL (get_last_insn ()) = labels[1]; LABEL_NUSES (labels[1])++; /* Assuming the unroll_number is 4, and the increment is 2, then for a negative increment: for a positive increment: diff = 0,1 precond 0 diff = 0,7 precond 0 diff = 2,3 precond 3 diff = 1,2 precond 1 diff = 4,5 precond 2 diff = 3,4 precond 2 diff = 6,7 precond 1 diff = 5,6 precond 3 */ /* We only need to emit (unroll_number - 1) branches here, the last case just falls through to the following code. */ /* ??? This would give better code if we emitted a tree of branches instead of the current linear list of branches. */ for (i = 0; i < unroll_number - 1; i++) { int cmp_const; enum rtx_code cmp_code; /* For negative increments, must invert the constant compared against, except when comparing against zero. */ if (i == 0) { cmp_const = 0; cmp_code = EQ; } else if (neg_inc) { cmp_const = unroll_number - i; cmp_code = GE; } else { cmp_const = i; cmp_code = LE; } emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const), cmp_code, NULL_RTX, mode, 0, 0); if (i == 0) emit_jump_insn (gen_beq (labels[i])); else if (neg_inc) emit_jump_insn (gen_bge (labels[i])); else emit_jump_insn (gen_ble (labels[i])); JUMP_LABEL (get_last_insn ()) = labels[i]; LABEL_NUSES (labels[i])++; } /* If the increment is greater than one, then we need another branch, to handle other cases equivalent to 0. */ /* ??? This should be merged into the code above somehow to help simplify the code here, and reduce the number of branches emitted. For the negative increment case, the branch here could easily be merged with the `0' case branch above. For the positive increment case, it is not clear how this can be simplified. */ if (abs_inc != 1) { int cmp_const; enum rtx_code cmp_code; if (neg_inc) { cmp_const = abs_inc - 1; cmp_code = LE; } else { cmp_const = abs_inc * (unroll_number - 1) + 1; cmp_code = GE; } emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX, mode, 0, 0); if (neg_inc) emit_jump_insn (gen_ble (labels[0])); else emit_jump_insn (gen_bge (labels[0])); JUMP_LABEL (get_last_insn ()) = labels[0]; LABEL_NUSES (labels[0])++; } sequence = gen_sequence (); end_sequence (); emit_insn_before (sequence, loop_start); /* Only the last copy of the loop body here needs the exit test, so set copy_end to exclude the compare/branch here, and then reset it inside the loop when get to the last copy. */ if (GET_CODE (last_loop_insn) == BARRIER) copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); else if (GET_CODE (last_loop_insn) == JUMP_INSN) {#ifdef HAVE_cc0 /* The immediately preceding insn is a compare which we do not want to copy. */ copy_end = PREV_INSN (PREV_INSN (last_loop_insn));#else /* The immediately preceding insn may not be a compare, so we must copy it. */ copy_end = PREV_INSN (last_loop_insn);#endif } else abort (); for (i = 1; i < unroll_number; i++) { emit_label_after (labels[unroll_number - i], PREV_INSN (loop_start)); bzero ((char *) map->insn_map, max_insnno * sizeof (rtx)); bzero ((char *) map->const_equiv_map, maxregnum * sizeof (rtx)); bzero ((char *) map->const_age_map, maxregnum * sizeof (unsigned)); map->const_age = 0; for (j = 0; j < max_labelno; j++) if (local_label[j]) map->label_map[j] = gen_label_rtx (); for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++) if (local_regno[j]) map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j])); /* The last copy needs the compare/branch insns at the end, so reset copy_end here if the loop ends with a conditional branch. */ if (i == unroll_number - 1) { if (GET_CODE (last_loop_insn) == BARRIER) copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); else copy_end = last_loop_insn; } /* None of the copies are the `last_iteration', so just pass zero for that parameter. */ copy_loop_body (copy_start, copy_end, map, exit_label, 0, unroll_type, start_label, loop_end, loop_start, copy_end); } emit_label_after (labels[0], PREV_INSN (loop_start)); if (GET_CODE (last_loop_insn) == BARRIER) { insert_before = PREV_INSN (last_loop_insn); copy_end = PREV_INSN (insert_before); } else {#ifdef HAVE_cc0 /* The immediately preceding insn is a compare which we do not want to copy. */ insert_before = PREV_INSN (last_loop_insn); copy_end = PREV_INSN (insert_before);#else /* The immediately preceding insn may not be a compare, so we must copy it. */ insert_before = last_loop_insn; copy_end = PREV_INSN (last_loop_insn);#endif } /* Set unroll type to MODULO now. */ unroll_type = UNROLL_MODULO; loop_preconditioned = 1; } } /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll the loop unless all loops are being unrolled. */ if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops) { if (loop_dump_stream) fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n"); return; } /* At this point, we are guaranteed to unroll the loop. */ /* For each biv and giv, determine whether it can be safely split into a different variable for each unrolled copy of the loop body. We precalculate and save this info here, since computing it is expensive. Do this before deleting any instructions from the loop, so that back_branch_in_range_p will work correctly. */ if (splitting_not_safe) temp = 0; else temp = find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before, unroll_number); /* find_splittable_regs may have created some new registers, so must reallocate the reg_map with the new larger size, and must realloc the constant maps also. */ maxregnum = max_reg_num (); map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx)); init_reg_map (map, maxregnum); /* Space is needed in some of the map for new registers, so new_maxregnum is an (over)estimate of how many registers will exist at the end. */ new_maxregnum = maxregnum + (temp * unroll_number * 2); /* Must realloc space for the constant maps, because the number of registers may have changed. */ map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx)); map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned)); map->const_equiv_map_size = new_maxregnum; global_const_equiv_map = map->const_equiv_map; global_const_equiv_map_size = new_maxregnum; /* Search the list of bivs and givs to find ones which need to be remapped when split, and set their reg_map entry appropriately. */ for (bl = loop_iv_list; bl; bl = bl->next) { if (REGNO (bl->biv->src_reg) != bl->regno) map->reg_map[bl->regno] = bl->biv->src_reg;#if 0 /* Currently, non-reduced/final-value givs are never split. */ for (v = bl->giv; v; v = v->next_iv) if (REGNO (v->src_reg) != bl->regno) map->reg_map[REGNO (v->dest_reg)] = v->src_reg;#endif } /* If the loop is being partially unrolled, and the iteration variables are being split, and are being renamed for the split, then must fix up the compare/jump instruction at the end of the loop to refer to the new registers. This compare isn't copied, so the registers used in it will never be replaced if it isn't done here. */ if (unroll_type == UNROLL_MODULO) { insn = NEXT_INSN (copy_end); if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) PATTERN (insn) = remap_split_bivs (PATTERN (insn)); } /* For unroll_number - 1 times, make a copy of each instruction between copy_start and copy_end, and insert these new instructions before the end of the loop. */ for (i = 0; i < unroll_number; i++) { bzero ((char *) map->insn_map, max_insnno * sizeof (rtx)); bzero ((char *) map->const_equiv_map, new_maxregnum * sizeof (rtx)); bzero ((char *) map->const_age_map, new_maxregnum * sizeof (unsigned)); map->const_age = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -