📄 multi_producer.uc
字号:
;------------------------------------------------------------------------------------
;
; I N T E L P R O P R I E T A R Y
;
; COPYRIGHT (c) 2001 BY INTEL CORPORATION. ALL RIGHTS
; RESERVED. NO PART OF THIS PROGRAM OR PUBLICATION MAY
; BE REPRODUCED, TRANSMITTED, TRANSCRIBED, STORED IN A
; RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER
; LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,
; MAGNETIC, OPTICAL, CHEMICAL, MANUAL, OR OTHERWISE, WITHOUT
; THE PRIOR WRITTEN PERMISSION OF :
;
; INTEL CORPORATION
;
; 2200 MISSION COLLEGE BLVD
;
; SANTA CLARA, CALIFORNIA 95052-8119
;
;------------------------------------------------------------------------------------
// File : multi_producer.uc
//
// Description :
// This example illustrates how to use scratch rings by showing a multi producer - multi
// consumer ring. ME 0 & ME 1 are the producers and produces(puts) one word (4 bytes) at a
// time on to the ring. ME 2 is the consumer which gets one word at a time. The important
// thing to note here is that before producing, producer should always check if the ring
// is full and the consumer, before consuming, should always check if the ring is empty.
// Code for ME 1 can be found in consumer.uc
//
// Each ME uses all available threads for producing/consuming. Hence it is called
// multi-producer - multi consumer (they still do not produce in parallel,
// there is still some kind of strict order (sequence) among the threads)
//
// Another thing to learn from this example when multiple threads are producing in
// strict order is what happens when the ring is full. Do we stall (loop) in that
// thread or do we drop the packet (assuming we are producing packet buffer descriptor
// on to the ring) and proceed to the next thread (which, in all likelihood will
// drop the next packet and so on , until space is available in the ring)?
// We choose to stall (loop), because that'll maintain the strict order (which in turn
// will make the pipeline deterministic). Also by stalling, we are using less resources
// so that these resources (like SRAM bandwith etc) are now available more easily
// for the consumer threads which can now finish its job quickly.
//
//
// NOTE:
// The decision to have two MEs produce and one ME to consume is to hit the ring
// full condition quickly and is for illustration purposes only. In practise we should
// balance the production speed with consumption speed for efficient utilisation of
// resources.
//
// Psuedo Code for producer.
// 1. Initialise Scratch Ring.
// Scratch ring is initialised at address 0x1000. The size of the ring is set
// to 128 entries. Init the head and tail to 0.
// 2. Signal the consumer (so that it can consume as soon as it is produced)
// 3. If ring is full stall here, until the ring has space.
// 4. produce onto the ring. (this is a length operation, will take some time to complete)
// 5. Signal next thread.
// 5a Can do anything else here.
// 6. wait for ring put to complete and a signal from prev. thread
// 7. go back to producing (step 2)
//
//
// Note: Step 4 and Step 5. Even though Step 4 takes sometime to complete, we do not wait
// to signal the next thread. (we can right away signal the next thread to start producing
// a word)
#include "scratchring.h"
#include "sig_macros.uc"
///////////////////////////////////////////////////////////////////////////////
// init_scratch_ring
// Description:
// Initialise and setup scratch ring. Ring is setup at address specified by
// SCRATCH_RING_BASE, ring number is specified by RING_NUMBER and the size
// of the ring as specifed by RING_SIZE_128(all are defined in scratchring.h).
//
// For this example the ring is initialised at base address 0x1000, ring number
// is 0, and ring size is 128.
//
// Outputs:
// None
//
// Inputs:
// RBASE The base address of the ring in scratch memory. Should
// aligned on a 4 byte boundary. This should be constant.
// RSIZE The size of the ring. Either 128, 256, 512 or 1024. This
// should be a constant.
// RING Ring number (0-15). This should be a constant.
//
// Size: 8 instructions
//
//
#macro init_scratch_ring[RBASE, RSIZE, RING]
.begin
.sig cw1, cw2, cw3 ; signals used in cap[write...]
.reg $_rhead, $_rtail, $_rbase, _base
// These define_eval are required. Otherwise the caller cannot have spaces
// in between parameters like init[a, b, c].
#define_eval RN RING
#define_eval RS RSIZE
#define_eval RB RBASE
immed[$_rhead, 0x0] ; Initialise ring head to 0
immed[$_rtail, 0x0] ; Initialise ring tail to 0;
immed[_base, RB] ; Initialise ring base to 0x1000
alu_shf[$_rbase, _base, or, RING_SIZE_/**/RS, <<30]; [31:30]= 0 => Ring size is 128
// Initialise the Scratch Ring base (and size), head and tail.
// Note: We can Queue a max. of 4 commands to any external unit
// (like sram, dram, cap, etc). Beyond this limit the ME will stall.
// The limit of 4 includes all the commands issued by all other MEs
// as well. It is the programmers responsibility to ensure this.
// Since this is the only thread and ME that is queuing cmds at this time,
// we can queue 3 commands safely.
cap[write, $_rbase, SCRATCH_RING_BASE_/**/RN], sig_done[cw1] ; base = 0x1000
cap[write, $_rhead, SCRATCH_RING_HEAD_/**/RN], sig_done[cw2] ; head = 0
cap[write, $_rtail, SCRATCH_RING_TAIL_/**/RN], sig_done[cw3] ; tail = 0
ctx_arb[cw1, cw2, cw3]
#undef RN
#undef RS
#undef RB
.end
#endm
// register and signal declarations
.reg $wdata, ring, @data
.sig scr_put ; signal for scratch put
.sig volatile wake_thrd, wake_cons ; signals waking producer, waking consumer
// We use manual allocation for these signals because we have problems
// using "visible" (.sig visible wake_cons in this file) and "remote" in multi_consumer.uc
// Until it is solved, we'll have to use this manual allocation.
.addr wake_thrd SIG_WAKE_THRD
.addr wake_cons SIG_WAKE_CONS
// Code for thread 0
.if (ctx() == 0)
// Configure the scratch ring. Base @ 0x1000, size=128, Ring number = 0
init_scratch_ring[RING_BASE, RING_SIZE, RING_NUMBER]
// Signal the ME (ME 2) that is going to consume from the ring.
signal_me[0x02, wake_cons] ; Signal "consumer" ME 2, in cluster 0
// Some one time init items across all threads.
alu_shf[ring, --, b, RING_NUMBER, <<2] ; ring number in a register
immed[@data, 1] ; data to be put into the ring
br[poll#] ; produce on the ring.
.endif
// All other Threads start here.
// First time Init code.
alu_shf[ring, --, b, RING_NUMBER, <<2] ; ring number in a register
// Wait for signal from Previous Thread.
ctx_arb[wake_thrd]
// Common code for all threads.
poll#:
// Produce on the ring, only if the ring is not full.
// If ring is full, we stall by looping. Yeilding to other threads is an option
// but is not very useful as we normally have the threads in one ME executing
// in strict order (to be deterministic) in a pipeline and we wouldn't want to
// disrupt this pipeline.
br_inp_state[RING_FULL, Full#] ; Check if ring is full
alu[$wdata, --, b, @data]
scratch[put, $wdata, 0, ring, 1], sig_done[scr_put] ; Produce one word on the ring
alu[@data, @data, +, 1] ; Increment next value to be put
// signal next thread so that next thread can start producing.
// Remember we don't have to wait for the scratch put to
// complete before signalling. Next thread can produce on to this
// ring before our put is completed.
// This is easily observed by setting break points here. The scratch put
// data of thread 0 is available in scratch memory only when you reach
// this break point for thread 2 or 3.
//.if (ctx() != 7)
br=ctx[7, thread7#]
signal_next_ctx[wake_thrd] ; signal next thread in the same ME.
br[wait#] ; wait for next iteration.
//.else
thread7#:
signal_next_me[wake_thrd] ; Thread 7, so signal next me.
//.endif
wait#:
ctx_arb[wake_thrd, scr_put] ; wait for scartch put and prev thread
br[poll#] ; Keep producing on the ring.
// The ring is full. We come here, instead of directly branching
// to poll# to enable setting breakpoint here, so that we know
// that the ring infact gets full. The fact that two MEs produce and
// only one ME consumes make sure we hit this condition soon.
Full#:
// It takes about 1600 cycles to hit ring full condition
nop ; set breakpoint here to see ring gets filled.
br[poll#] ; go back and check if ring is full
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -