📄 modsched.java
字号:
5 9 4 8 3 7 11 2 6 10 1 5 9 0 4 8 3 7 11 2 6 10 1 5 9 0 4 8 3 7 2 6 1 5 0 4 3 2 1 0 As, can be seen, there is a repeating pattern of instructions that looks like this: 3 7 11 2 6 10 1 5 9 0 4 8 This block can be repeated over, and in effect run three iterations of the original loop in parallel. This is the kernal, created during modulo scheduling. But there is time before this block first appears: 3 7 2 6 1 5 0 4 3 2 1 0 This is the prolog. To find this, we need to know how long the schedule is (in our example 12), where that instruction is in the kernal (in our case at cycle 3), and ii. Then we can copy each of these first iterations into the prolog. The first iteration runs from 8 to 11 within the kernal. We can find out how much ran before by subracting ii from the height of the loop. That is 11-4 = 7, and we need to copy instructions from the unrolled loop from cycle 0 to 7. The second iteration runs two full iterations of the kernal and it goes from 0 to 11 - 2*ii = 3 outside the kernal. So first, I calculate how far into the kernal, the first iteration reaches (which is actually the length of the loop - the execution time in the kernal of its last instructions, so if we had a schedule of length 10 with this kernal: 3 7 2 6 1 5 9 0 4 8 we would need to do 9-2 = 7, to find the top of the first iteration. Then We just need to go up in increments of ii, copying 0 to 7, then 0 to 3, and when more iterations run in parallel, more copies are necessary. */ int max = (int)(nums.schedLength - nums.lastInstExecTime-1); //for each iteration of the prolog for(int start = 0; start<max; start+=ii) { //copy its instructions from the unrolled loop to the prolog list InstructionList oneItProList = unrolledList.partialListCopy(0, max-start); /*System.out.println("one iteration of prolog "); oneItProList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("prolog after killing P's "); */ /*DependenceFlowGraph dfg2 = new DependenceFlowGraph(graph.getName() + "_" + node.getLabel().getName() + "_tmp_mod_scheduled"); BlockNode nodeTmp = new BlockNode(); nodeTmp.addInstructions(new ArrayList(oneItProList.getInstructions())); System.out.println("add instructions to node and instantiate DFG"); dfg2.generateGraph(nodeTmp, oneItProList); nodeTmp = null; System.out.println("created DFG"); oneItProList.saveDFG(dfg2); System.out.println("saved DFG");*/ //delete chains of loads and stores to get rid of the mod prim variables oneItProList.killLSChains(); /*oneItProList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("prolog after removing block to block copies ");*/ //delete block to block aliases oneItProList.killEmBlockToBlockCopies(); /*oneItProList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("prolog after removing block defining ckts ");*/ //delete groups of instructions that result only in the definition of //block operands oneItProList.killBlockDefiningCircuits(); /*oneItProList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //add this iteration to the whole prolog list proList.addAll(oneItProList); } proList.removeDuplicateInsts(); /*System.out.println("whole pro before removing duplicates "); proList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //remove duplicate sets of instructions //proList.removeDuplicateInsts(); /*System.out.println("final pro "); proList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println(" "); System.out.println(" ");*/ /** ****************************************************************** epilog creation ****************************************************************** given the example above: 11 10 9 8 7 11 6 10 5 9 4 8 3 7 11 2 6 10 1 5 9 0 4 8 3 7 11 2 6 10 1 5 9 0 4 8 3 7 2 6 1 5 0 4 3 2 1 0 the epilog is this: 11 10 9 8 7 11 6 10 5 9 4 8 For this we just need to copy all the instructions from the end back to ii*iteration count. So for the second to last iteration 11 - ii = 7 and for the final 11- ii*2 = 3. */ max = (int)(Math.ceil(nums.schedLength/ii)*ii); InstructionList epiList = new InstructionList(); //for(int start=ii; start<=max+max%(ii); start+=ii) { //foreach iteration for(int start=ii; start<=max; start+=ii) { //System.out.println("start " + start); //System.out.println("ii " + ii); //System.out.println("max " + max); //int min = (int)(start - nums.fstStartTime + 1); // System.out.println("min " + min); //copy the iteration from the unrolled loop, but use new block variables //for each iteration, so that there will be no collisions of data InstructionList oneItEpiList = unrolledList.partialListCopyNewBlocks(start, max); /*System.out.println("one iteration of epilog "); oneItEpiList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("epilog after killing P's ");*/ /* DependenceFlowGraph dfg3 = new DependenceFlowGraph(graph.getName() + "_" + node.getLabel().getName() + "_tmp_mod_scheduled"); BlockNode nodeTmp2 = new BlockNode(); nodeTmp2.addInstructions(new ArrayList(oneItEpiList.getInstructions())); dfg3.generateGraph(nodeTmp2, oneItEpiList); oneItEpiList.saveDFG(dfg3);*/ //delete chains of loads and stores which were used to communicate over //the kernal loop boundaries oneItEpiList.killLSChains(); /*oneItEpiList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("epilog after removing block to block copies ");*/ //delete aliases caused by loading or storing blocks to blocks oneItEpiList.killEmBlockToBlockCopies(); /*oneItEpiList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //call addMuxes, to take note of primals that need mux trees and to //change the instructions to define an iteration and primal specific //block operand instead of storing to whichever primal it was previously //storing to muxAdder.addMuxes(oneItEpiList, muxGen, loopExitPredicate); /*System.out.println("epilog after adding muxes "); oneItEpiList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //add this iteration to the whole epilog list: epiList.addAll(oneItEpiList); } //often for many of the iterations, the final result that is to be stored //in the output primal has been calculated in the kernal and saved to one //of the modPrim transfer primals. In addition, often, many of the //iterations read from the same primal. If this is the case, the mux tree //can possibly be collapsed so that instead of trying to choose between //multiple loads of the same primal, it will simply take the value in the //primal once. Additionally, we only need one load, but multiple //iterations may //be loading that primal, each of which would use its own load instruction //to do so. Therefore, when I call findModPrimSources, to find out which //iterations use the value in a modPrim variable, so that mux tree //collapsing can occur, I also delete all those load statements, so //MuxTableGenerator can create one at the end for all. epiList.removeInsts(muxGen.findModPrimSources(epiList.getInstructions())); //Since MuxTableGenerator is creating its own modPrim loads, it can rewire //all uses of that modPrim to use its load. To do this, it first creates //a new block variable to hold the value after loading and then, changes //all instructions to use this. muxGen.replaceOpsWithNewMPrimBs(epiList.getInstructions()); /*System.out.println("removing mod prim loads "); epiList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //Here we add the mux tree, its control logic, the kernal predicate stuff, //and the new modPrim loads (see MuxTableGenerator.java for more) epiList.addAllInsts(muxGen.genMuxTable()); /*System.out.println("adding muxes "); epiList.printSchedule(); System.out.println(" "); System.out.println(" "); System.out.println("epilog after removing block defining ckts "); System.out.println("epilog after cseremoval ");*/ /** this is some really bad coding style, but sorry, I was feeling lazy */ //BlockNode nodeTmp = new BlockNode(); //HashSet cseIinstSet = epiList.getInstructions(); //epiList.removeInsts(cseIinstSet); //nodeTmp.addInstructions(new ArrayList(cseIinstSet)); //CSERemovalCode theCSErminator = new CSERemovalCode(); /*System.out.println("====================================================== "); System.out.println(" "); System.out.println("b4 " + nodeTmp.getInstructions());*/ //theCSErminator.removeEmCSEs(nodeTmp); //theCSErminator.removeEmCSEs(nodeTmp); //theCSErminator.removeEmCSEs(nodeTmp); /*System.out.println("afta " + nodeTmp.getInstructions()); System.out.println(" "); System.out.println("====================================================== ");*/ //epiList = new InstructionList(); //epiList.addAllInsts(nodeTmp.getInstructions()); /*epiList.printSchedule(); System.out.println(" "); System.out.println(" "); //delete all groups of instructions which only define a block variable System.out.println("epilog after removing block defining ckts ");*/ epiList.killBlockDefiningCircuits(); /*epiList.printSchedule(); System.out.println(" "); System.out.println(" ");*/ //delete duplicate instructions epiList.removeDuplicateInsts(); //delete any leftover stores to modprims since nobody else except the mod //sched blocks care about them deleteStoresToModPrims(epiList); /*System.out.println("final epi "); epiList.printSchedule();*/ //run op select on all the new instructions added by MuxTableGenerator for (Iterator it = epiList.getInstructions().iterator(); it.hasNext(); ) { Instruction instr = (Instruction)it.next(); _opSel.select(instr); } //create the epilog and prolog nodes: muxAdder.createProNEpiNodes(node, graph, proList, epiList, chipInfo); } /** The mod prim operands are registers used to transfer data between iterations of the kernal. In the example above: 3 7 11 2 6 10 1 5 9 0 4 8 At the end of cycle 3, all important values would be stored to a mod prim register, and loaded again in cycle 4, and the same would happen between 7 and 8. When the loop is unrolled and the epilog and prolog are created, these loads and stores are still there. The epilog does not need anymore of these mod prim operands, since the rest of the circuit does not care about internal loop values. Therefore, if there are any left after all the other changes to the epilog, this method is called to delete all stores left in the epilog to mod prim operands. */ public void deleteStoresToModPrims(InstructionList epiList) { for (Iterator vIt = ((HashSet)epiList.getInstSet().clone()).iterator(); vIt.hasNext();) { InstructionList.InstructionObject instObj = (InstructionList.InstructionObject) vIt.next(); Instruction inst = instObj.inst; if((Store.conforms(inst))&& (Store.getDestination(inst).getFullName().indexOf("modPrim")>=0)) epiList.remove(instObj); } } /** This method should probably be moved to the chipdef class. It initializes the operator use handlers (that is the code that counts how often individual modules are used and checks for overuse). */ public void initOpUseLists(BlockNode node, ChipDef chipDef) { for (Iterator it = node.getInstructions().iterator(); it.hasNext(); ) { Instruction instr = (Instruction)it.next(); chipDef.initOpUseLists(instr.operator()); } } private void sort(ArrayList o_list) { class DoubleCompare implements Comparator { public int compare(Object o1, Object o2) { if (o1 instanceof Instruction && o2 instanceof Instruction) { Instruction p1 = (Instruction)o1; Instruction p2 = (Instruction)o2; if (p1.getExecTime() > p2.getExecTime()) { return 1; } else if (p1.getExecTime() < p2.getExecTime()) { return -1; } else { return 0; } } else { throw new ClassCastException("Not Instruction"); } } } Collections.sort(o_list, new DoubleCompare()); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -