📄 rf_pq.c
字号:
/* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Daniel Stodolsky * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. *//* * Code for RAID level 6 (P + Q) disk array architecture. * * $Locker: $ * $Log: rf_pq.c,v $ * Revision 1.32 1996/07/31 16:29:50 jimz * "fix" math on 32-bit machines using RF_LONGSHIFT * (may be incorrect) * * Revision 1.31 1996/07/31 15:35:01 jimz * evenodd changes; bugfixes for double-degraded archs, generalize * some formerly PQ-only functions * * Revision 1.30 1996/07/27 23:36:08 jimz * Solaris port of simulator * * Revision 1.29 1996/07/22 19:52:16 jimz * switched node params to RF_DagParam_t, a union of * a 64-bit int and a void *, for better portability * attempted hpux port, but failed partway through for * lack of a single C compiler capable of compiling all * source files * * Revision 1.28 1996/06/09 02:36:46 jimz * lots of little crufty cleanup- fixup whitespace * issues, comment #ifdefs, improve typing in some * places (esp size-related) * * Revision 1.27 1996/06/07 21:33:04 jimz * begin using consistent types for sector numbers, * stripe numbers, row+col numbers, recon unit numbers * * Revision 1.26 1996/06/02 17:31:48 jimz * Moved a lot of global stuff into array structure, where it belongs. * Fixed up paritylogging, pss modules in this manner. Some general * code cleanup. Removed lots of dead code, some dead files. * * Revision 1.25 1996/05/31 22:26:54 jimz * fix a lot of mapping problems, memory allocation problems * found some weird lock issues, fixed 'em * more code cleanup * * Revision 1.24 1996/05/30 23:22:16 jimz * bugfixes of serialization, timing problems * more cleanup * * Revision 1.23 1996/05/30 12:59:18 jimz * make etimer happier, more portable * * Revision 1.22 1996/05/27 18:56:37 jimz * more code cleanup * better typing * compiles in all 3 environments * * Revision 1.21 1996/05/24 22:17:04 jimz * continue code + namespace cleanup * typed a bunch of flags * * Revision 1.20 1996/05/24 04:28:55 jimz * release cleanup ckpt * * Revision 1.19 1996/05/23 21:46:35 jimz * checkpoint in code cleanup (release prep) * lots of types, function names have been fixed * * Revision 1.18 1996/05/23 00:33:23 jimz * code cleanup: move all debug decls to rf_options.c, all extern * debug decls to rf_options.h, all debug vars preceded by rf_ * * Revision 1.17 1996/05/18 19:51:34 jimz * major code cleanup- fix syntax, make some types consistent, * add prototypes, clean out dead code, et cetera * * Revision 1.16 1996/05/17 14:52:04 wvcii * added prototyping to QDelta() * - changed buf params from volatile unsigned long * to char * * changed QDelta for kernel * - just bzero the buf since kernel doesn't include pq decode table * * Revision 1.15 1996/05/03 19:40:20 wvcii * added includes for dag library * * Revision 1.14 1995/12/12 18:10:06 jimz * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT * fix 80-column brain damage in comments * * Revision 1.13 1995/11/30 16:19:55 wvcii * added copyright info * * Revision 1.12 1995/11/07 16:13:47 wvcii * changed PQDagSelect prototype * function no longer returns numHdrSucc, numTermAnt * note: this file contains node functions which should be * moved to rf_dagfuncs.c so that all node funcs are bundled together * * Revision 1.11 1995/10/04 03:50:33 wvcii * removed panics, minor code cleanup in dag selection * * */#include "rf_archs.h"#include "rf_types.h"#include "rf_raid.h"#include "rf_dag.h"#include "rf_dagffrd.h"#include "rf_dagffwr.h"#include "rf_dagdegrd.h"#include "rf_dagdegwr.h"#include "rf_dagutils.h"#include "rf_dagfuncs.h"#include "rf_threadid.h"#include "rf_etimer.h"#include "rf_pqdeg.h"#include "rf_general.h"#include "rf_map.h"#include "rf_pq.h"#include "rf_sys.h"RF_RedFuncs_t rf_pFuncs = { rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P" };RF_RedFuncs_t rf_pRecoveryFuncs = { rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func" };int rf_RegularONPFunc(node) RF_DagNode_t *node;{ return(rf_RegularXorFunc(node));}/* same as simpleONQ func, but the coefficient is always 1 */int rf_SimpleONPFunc(node) RF_DagNode_t *node;{ return(rf_SimpleXorFunc(node));}int rf_RecoveryPFunc(node)RF_DagNode_t *node;{ return(rf_RecoveryXorFunc(node));}int rf_RegularPFunc(node) RF_DagNode_t *node;{ return(rf_RegularXorFunc(node));}#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)static void QDelta(char *dest, char *obuf, char *nbuf, unsigned length, unsigned char coeff);static void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length, unsigned coeff);RF_RedFuncs_t rf_qFuncs = { rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q" };RF_RedFuncs_t rf_qRecoveryFuncs = { rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func" };RF_RedFuncs_t rf_pqRecoveryFuncs = { rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func" };void rf_PQDagSelect( RF_Raid_t *raidPtr, RF_IoType_t type, RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc){ RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); unsigned ndfail = asmap->numDataFailed; unsigned npfail = asmap->numParityFailed; unsigned ntfail = npfail + ndfail; RF_ASSERT(RF_IO_IS_R_OR_W(type)); if (ntfail > 2) { RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); /* *infoFunc = */ *createFunc = NULL; return; } /* ok, we can do this I/O */ if (type == RF_IO_TYPE_READ) { switch (ndfail) { case 0: /* fault free read */ *createFunc = rf_CreateFaultFreeReadDAG; /* same as raid 5 */ break; case 1: /* lost a single data unit */ /* two cases: (1) parity is not lost. do a normal raid 5 reconstruct read. (2) parity is lost. do a reconstruct read using "q". */ if (ntfail == 2) /* also lost redundancy */ { if (asmap->failedPDAtwo->type == RF_PDA_TYPE_PARITY) *createFunc = rf_PQ_110_CreateReadDAG; else *createFunc = rf_PQ_101_CreateReadDAG; } else { /* P and Q are ok. But is there a failure in some unaccessed data unit? */ if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) *createFunc = rf_PQ_200_CreateReadDAG; else *createFunc = rf_PQ_100_CreateReadDAG; } break; case 2: /* lost two data units */ /* *infoFunc = PQOneTwo; */ *createFunc = rf_PQ_200_CreateReadDAG; break; } return; } /* a write */ switch (ntfail) { case 0: /* fault free */ if (rf_suppressLocksAndLargeWrites || (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { *createFunc = rf_PQCreateSmallWriteDAG; } else { *createFunc = rf_PQCreateLargeWriteDAG; } break; case 1: /* single disk fault */ if (npfail==1) { RF_ASSERT ((asmap->failedPDA->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDA->type == RF_PDA_TYPE_Q)); if (asmap->failedPDA->type == RF_PDA_TYPE_Q) { /* q died, treat like normal mode raid5 write.*/ if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) *createFunc = rf_PQ_001_CreateSmallWriteDAG; else *createFunc = rf_PQ_001_CreateLargeWriteDAG; } else { /* parity died, small write only updating Q */ if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) || rf_NumFailedDataUnitsInStripe(raidPtr,asmap)) *createFunc = rf_PQ_010_CreateSmallWriteDAG; else *createFunc = rf_PQ_010_CreateLargeWriteDAG; } } else { /* data missing. Do a P reconstruct write if only a single data unit is lost in the stripe, otherwise a PQ reconstruct write. */ if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2) *createFunc = rf_PQ_200_CreateWriteDAG; else *createFunc = rf_PQ_100_CreateWriteDAG; } break; case 2: /* two disk faults */ switch (npfail) { case 2: /* both p and q dead */ *createFunc = rf_PQ_011_CreateWriteDAG; break; case 1: /* either p or q and dead data */ RF_ASSERT(asmap->failedPDA->type == RF_PDA_TYPE_DATA); RF_ASSERT ((asmap->failedPDAtwo->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAtwo->type == RF_PDA_TYPE_Q)); if (asmap->failedPDAtwo->type == RF_PDA_TYPE_Q) *createFunc = rf_PQ_101_CreateWriteDAG; else *createFunc = rf_PQ_110_CreateWriteDAG; break; case 0: /* double data loss */ *createFunc = rf_PQ_200_CreateWriteDAG; break; } break; default: /* more than 2 disk faults */ *createFunc = NULL; RF_PANIC(); } return;}/* Used as a stop gap info function */static void PQOne(raidPtr, nSucc, nAnte, asmap) RF_Raid_t *raidPtr; int *nSucc; int *nAnte; RF_AccessStripeMap_t *asmap;{ *nSucc = *nAnte = 1;}static void PQOneTwo(raidPtr, nSucc, nAnte, asmap) RF_Raid_t *raidPtr; int *nSucc; int *nAnte; RF_AccessStripeMap_t *asmap;{ *nSucc = 1; *nAnte = 2;}RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG){ rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, rf_RegularPQFunc, RF_FALSE);}int rf_RegularONQFunc(node) RF_DagNode_t *node;{ int np = node->numParams; int d; RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[np-1].p; int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; char *qbuf, *qpbuf; char *obuf, *nbuf; RF_PhysDiskAddr_t *old, *new; unsigned long coeff; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; RF_ETIMER_START(timer); d = (np-3)/4; RF_ASSERT (4*d+3 == np); qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ for (i=0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2*i].p; obuf = (char *) node->params[2*i+1].p; new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; nbuf = (char *) node->params[2*(d+1+i)+1].p; RF_ASSERT (new->numSector == old->numSector); RF_ASSERT (new->raidAddress == old->raidAddress); /* the stripe unit within the stripe tells us the coefficient to use for the multiply. */ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); /* compute the data unit offset within the column, then add one */ coeff = (coeff % raidPtr->Layout.numDataCol); qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); QDelta(qpbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ return(0);}/* See the SimpleXORFunc for the difference between a simple and regular func. These Q functions should be used for new q = Q(data,old data,old q) style updates and not for q = ( new data, new data, .... ) computations. The simple q takes 2(2d+1)+1 params, where d is the number of stripes written. The order of params is old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d [2d] old q pda_0, old q buffer [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d raidPtr*/int rf_SimpleONQFunc(node) RF_DagNode_t *node;{ int np = node->numParams; int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; char *qbuf; char *obuf, *nbuf; RF_PhysDiskAddr_t *old, *new; unsigned long coeff; RF_ETIMER_START(timer); d = (np-3)/4; RF_ASSERT (4*d+3 == np); qbuf = (char *) node->params[2*d+1].p; /* q buffer*/ for (i=0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2*i].p; obuf = (char *) node->params[2*i+1].p; new = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p; nbuf = (char *) node->params[2*(d+1+i)+1].p; RF_ASSERT (new->numSector == old->numSector); RF_ASSERT (new->raidAddress == old->raidAddress); /* the stripe unit within the stripe tells us the coefficient to use for the multiply. */ coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress); /* compute the data unit offset within the column, then add one */ coeff = (coeff % raidPtr->Layout.numDataCol); QDelta(qbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer); rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no I/O in this node */ return(0);}RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG){ rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);}static void RegularQSubr(node,qbuf) RF_DagNode_t *node; char *qbuf;{ int np = node->numParams; int d; RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p; unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; int i; RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; RF_Etimer_t timer; char *obuf, *qpbuf; RF_PhysDiskAddr_t *old; unsigned long coeff; RF_ETIMER_START(timer); d = (np-1)/2; RF_ASSERT (2*d+1 == np); for (i=0; i < d; i++) { old = (RF_PhysDiskAddr_t *) node->params[2*i].p; obuf = (char *) node->params[2*i+1].p; coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress); /* compute the data unit offset within the column, then add one */ coeff = (coeff % raidPtr->Layout.numDataCol); /* the input buffers may not all be aligned with the start of the stripe. so shift by their sector offset within the stripe unit */ qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU); rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff); } RF_ETIMER_STOP(timer); RF_ETIMER_EVAL(timer); tracerec->q_us += RF_ETIMER_VAL_US(timer);}/* used in degraded writes.*/static void DegrQSubr(node) RF_DagNode_t *node;{ int np = node->numParams; int d;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -