📄 unroll.c
字号:
/* We must limit it to max_reg_before_loop, because only these pseudo registers have valid regno_first_uid info. Any register created after that is unlikely to be local to the loop anyways. */ local_regno = (char *) alloca (max_reg_before_loop); bzero (local_regno, max_reg_before_loop); /* Mark all local registers, i.e. the ones which are referenced only inside the loop. */ if (INSN_UID (copy_end) < max_uid_for_loop) { int copy_start_luid = INSN_LUID (copy_start); int copy_end_luid = INSN_LUID (copy_end); /* If a register is used in the jump insn, we must not duplicate it since it will also be used outside the loop. */ if (GET_CODE (copy_end) == JUMP_INSN) copy_end_luid--; /* If copy_start points to the NOTE that starts the loop, then we must use the next luid, because invariant pseudo-regs moved out of the loop have their lifetimes modified to start here, but they are not safe to duplicate. */ if (copy_start == loop_start) copy_start_luid++; /* If a pseudo's lifetime is entirely contained within this loop, then we can use a different pseudo in each unrolled copy of the loop. This results in better code. */ for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j) if (REGNO_FIRST_UID (j) > 0 && REGNO_FIRST_UID (j) <= max_uid_for_loop && uid_luid[REGNO_FIRST_UID (j)] >= copy_start_luid && REGNO_LAST_UID (j) > 0 && REGNO_LAST_UID (j) <= max_uid_for_loop && uid_luid[REGNO_LAST_UID (j)] <= copy_end_luid) { /* However, we must also check for loop-carried dependencies. If the value the pseudo has at the end of iteration X is used by iteration X+1, then we can not use a different pseudo for each unrolled copy of the loop. */ /* A pseudo is safe if regno_first_uid is a set, and this set dominates all instructions from regno_first_uid to regno_last_uid. */ /* ??? This check is simplistic. We would get better code if this check was more sophisticated. */ if (set_dominates_use (j, REGNO_FIRST_UID (j), REGNO_LAST_UID (j), copy_start, copy_end)) local_regno[j] = 1; if (loop_dump_stream) { if (local_regno[j]) fprintf (loop_dump_stream, "Marked reg %d as local\n", j); else fprintf (loop_dump_stream, "Did not mark reg %d as local\n", j); } } } /* If this loop requires exit tests when unrolled, check to see if we can precondition the loop so as to make the exit tests unnecessary. Just like variable splitting, this is not safe if the loop is entered via a jump to the bottom. Also, can not do this if no strength reduce info, because precondition_loop_p uses this info. */ /* Must copy the loop body for preconditioning before the following find_splittable_regs call since that will emit insns which need to be after the preconditioned loop copies, but immediately before the unrolled loop copies. */ /* Also, it is not safe to split induction variables for the preconditioned copies of the loop body. If we split induction variables, then the code assumes that each induction variable can be represented as a function of its initial value and the loop iteration number. This is not true in this case, because the last preconditioned copy of the loop body could be any iteration from the first up to the `unroll_number-1'th, depending on the initial value of the iteration variable. Therefore we can not split induction variables here, because we can not calculate their value. Hence, this code must occur before find_splittable_regs is called. */ if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) { rtx initial_value, final_value, increment; if (precondition_loop_p (&initial_value, &final_value, &increment, loop_start, loop_end)) { register rtx diff, temp; enum machine_mode mode; rtx *labels; int abs_inc, neg_inc; map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx)); map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx)); map->const_age_map = (unsigned *) alloca (maxregnum * sizeof (unsigned)); map->const_equiv_map_size = maxregnum; global_const_equiv_map = map->const_equiv_map; global_const_equiv_map_size = maxregnum; init_reg_map (map, maxregnum); /* Limit loop unrolling to 4, since this will make 7 copies of the loop body. */ if (unroll_number > 4) unroll_number = 4; /* Save the absolute value of the increment, and also whether or not it is negative. */ neg_inc = 0; abs_inc = INTVAL (increment); if (abs_inc < 0) { abs_inc = - abs_inc; neg_inc = 1; } start_sequence (); /* Decide what mode to do these calculations in. Choose the larger of final_value's mode and initial_value's mode, or a full-word if both are constants. */ mode = GET_MODE (final_value); if (mode == VOIDmode) { mode = GET_MODE (initial_value); if (mode == VOIDmode) mode = word_mode; } else if (mode != GET_MODE (initial_value) && (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (initial_value)))) mode = GET_MODE (initial_value); /* Calculate the difference between the final and initial values. Final value may be a (plus (reg x) (const_int 1)) rtx. Let the following cse pass simplify this if initial value is a constant. We must copy the final and initial values here to avoid improperly shared rtl. */ diff = expand_binop (mode, sub_optab, copy_rtx (final_value), copy_rtx (initial_value), NULL_RTX, 0, OPTAB_LIB_WIDEN); /* Now calculate (diff % (unroll * abs (increment))) by using an and instruction. */ diff = expand_binop (GET_MODE (diff), and_optab, diff, GEN_INT (unroll_number * abs_inc - 1), NULL_RTX, 0, OPTAB_LIB_WIDEN); /* Now emit a sequence of branches to jump to the proper precond loop entry point. */ labels = (rtx *) alloca (sizeof (rtx) * unroll_number); for (i = 0; i < unroll_number; i++) labels[i] = gen_label_rtx (); /* Check for the case where the initial value is greater than or equal to the final value. In that case, we want to execute exactly one loop iteration. The code below will fail for this case. This check does not apply if the loop has a NE comparison at the end. */ if (loop_comparison_code != NE) { emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE, NULL_RTX, mode, 0, 0); if (neg_inc) emit_jump_insn (gen_ble (labels[1])); else emit_jump_insn (gen_bge (labels[1])); JUMP_LABEL (get_last_insn ()) = labels[1]; LABEL_NUSES (labels[1])++; } /* Assuming the unroll_number is 4, and the increment is 2, then for a negative increment: for a positive increment: diff = 0,1 precond 0 diff = 0,7 precond 0 diff = 2,3 precond 3 diff = 1,2 precond 1 diff = 4,5 precond 2 diff = 3,4 precond 2 diff = 6,7 precond 1 diff = 5,6 precond 3 */ /* We only need to emit (unroll_number - 1) branches here, the last case just falls through to the following code. */ /* ??? This would give better code if we emitted a tree of branches instead of the current linear list of branches. */ for (i = 0; i < unroll_number - 1; i++) { int cmp_const; enum rtx_code cmp_code; /* For negative increments, must invert the constant compared against, except when comparing against zero. */ if (i == 0) { cmp_const = 0; cmp_code = EQ; } else if (neg_inc) { cmp_const = unroll_number - i; cmp_code = GE; } else { cmp_const = i; cmp_code = LE; } emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const), cmp_code, NULL_RTX, mode, 0, 0); if (i == 0) emit_jump_insn (gen_beq (labels[i])); else if (neg_inc) emit_jump_insn (gen_bge (labels[i])); else emit_jump_insn (gen_ble (labels[i])); JUMP_LABEL (get_last_insn ()) = labels[i]; LABEL_NUSES (labels[i])++; } /* If the increment is greater than one, then we need another branch, to handle other cases equivalent to 0. */ /* ??? This should be merged into the code above somehow to help simplify the code here, and reduce the number of branches emitted. For the negative increment case, the branch here could easily be merged with the `0' case branch above. For the positive increment case, it is not clear how this can be simplified. */ if (abs_inc != 1) { int cmp_const; enum rtx_code cmp_code; if (neg_inc) { cmp_const = abs_inc - 1; cmp_code = LE; } else { cmp_const = abs_inc * (unroll_number - 1) + 1; cmp_code = GE; } emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX, mode, 0, 0); if (neg_inc) emit_jump_insn (gen_ble (labels[0])); else emit_jump_insn (gen_bge (labels[0])); JUMP_LABEL (get_last_insn ()) = labels[0]; LABEL_NUSES (labels[0])++; } sequence = gen_sequence (); end_sequence (); emit_insn_before (sequence, loop_start); /* Only the last copy of the loop body here needs the exit test, so set copy_end to exclude the compare/branch here, and then reset it inside the loop when get to the last copy. */ if (GET_CODE (last_loop_insn) == BARRIER) copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); else if (GET_CODE (last_loop_insn) == JUMP_INSN) {#ifdef HAVE_cc0 /* The immediately preceding insn is a compare which we do not want to copy. */ copy_end = PREV_INSN (PREV_INSN (last_loop_insn));#else /* The immediately preceding insn may not be a compare, so we must copy it. */ copy_end = PREV_INSN (last_loop_insn);#endif } else abort (); for (i = 1; i < unroll_number; i++) { emit_label_after (labels[unroll_number - i], PREV_INSN (loop_start)); bzero ((char *) map->insn_map, max_insnno * sizeof (rtx)); bzero ((char *) map->const_equiv_map, maxregnum * sizeof (rtx)); bzero ((char *) map->const_age_map, maxregnum * sizeof (unsigned)); map->const_age = 0; for (j = 0; j < max_labelno; j++) if (local_label[j]) map->label_map[j] = gen_label_rtx (); for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++) if (local_regno[j]) map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j])); /* The last copy needs the compare/branch insns at the end, so reset copy_end here if the loop ends with a conditional branch. */ if (i == unroll_number - 1) { if (GET_CODE (last_loop_insn) == BARRIER) copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); else copy_end = last_loop_insn; } /* None of the copies are the `last_iteration', so just pass zero for that parameter. */ copy_loop_body (copy_start, copy_end, map, exit_label, 0, unroll_type, start_label, loop_end, loop_start, copy_end); } emit_label_after (labels[0], PREV_INSN (loop_start)); if (GET_CODE (last_loop_insn) == BARRIER) { insert_before = PREV_INSN (last_loop_insn); copy_end = PREV_INSN (insert_before); } else {#ifdef HAVE_cc0 /* The immediately preceding insn is a compare which we do not want to copy. */ insert_before = PREV_INSN (last_loop_insn); copy_end = PREV_INSN (insert_before);#else /* The immediately preceding insn may not be a compare, so we must copy it. */ insert_before = last_loop_insn; copy_end = PREV_INSN (last_loop_insn);#endif } /* Set unroll type to MODULO now. */ unroll_type = UNROLL_MODULO; loop_preconditioned = 1; } } /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll the loop unless all loops are being unrolled. */ if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops) { if (loop_dump_stream) fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n"); return; } /* At this point, we are guaranteed to unroll the loop. */ /* For each biv and giv, determine whether it can be safely split into a different variable for each unrolled copy of the loop body. We precalculate and save this info here, since computing it is expensive. Do this before deleting any instructions from the loop, so that back_branch_in_range_p will work correctly. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -