📄 jfs_logmgr.c
字号:
/* * Copyright (c) International Business Machines Corp., 2000-2003 * Portions Copyright (c) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * jfs_logmgr.c: log manager * * for related information, see transaction manager (jfs_txnmgr.c), and * recovery manager (jfs_logredo.c). * * note: for detail, RTFS. * * log buffer manager: * special purpose buffer manager supporting log i/o requirements. * per log serial pageout of logpage * queuing i/o requests and redrive i/o at iodone * maintain current logpage buffer * no caching since append only * appropriate jfs buffer cache buffers as needed * * group commit: * transactions which wrote COMMIT records in the same in-memory * log page during the pageout of previous/current log page(s) are * committed together by the pageout of the page. * * TBD lazy commit: * transactions are committed asynchronously when the log page * containing it COMMIT is paged out when it becomes full; * * serialization: * . a per log lock serialize log write. * . a per log lock serialize group commit. * . a per log lock serialize log open/close; * * TBD log integrity: * careful-write (ping-pong) of last logpage to recover from crash * in overwrite. * detection of split (out-of-order) write of physical sectors * of last logpage via timestamp at end of each sector * with its mirror data array at trailer). * * alternatives: * lsn - 64-bit monotonically increasing integer vs * 32-bit lspn and page eor. */#include <linux/fs.h>#include <linux/locks.h>#include <linux/blkdev.h>#include <linux/interrupt.h>#include <linux/smp_lock.h>#include <linux/completion.h>#include "jfs_incore.h"#include "jfs_filsys.h"#include "jfs_metapage.h"#include "jfs_txnmgr.h"#include "jfs_debug.h"/* * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) */static struct lbuf *log_redrive_list;static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);/* * log read/write serialization (per log) */#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)#define LOG_LOCK(log) down(&((log)->loglock))#define LOG_UNLOCK(log) up(&((log)->loglock))/* * log group commit serialization (per log) */#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)/* * log sync serialization (per log) */#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)#define LOGSYNC_BARRIER(logsize) ((logsize)/4)/*#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)#define LOGSYNC_BARRIER(logsize) ((logsize)/2)*//* * log buffer cache synchronization */static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)/* * See __SLEEP_COND in jfs_locks.h */#define LCACHE_SLEEP_COND(wq, cond, flags) \do { \ if (cond) \ break; \ __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \} while (0)#define LCACHE_WAKEUP(event) wake_up(event)/* * lbuf buffer cache (lCache) control *//* log buffer manager pageout control (cumulative, inclusive) */#define lbmREAD 0x0001#define lbmWRITE 0x0002 /* enqueue at tail of write queue; * init pageout if at head of queue; */#define lbmRELEASE 0x0004 /* remove from write queue * at completion of pageout; * do not free/recycle it yet: * caller will free it; */#define lbmSYNC 0x0008 /* do not return to freelist * when removed from write queue; */#define lbmFREE 0x0010 /* return to freelist * at completion of pageout; * the buffer may be recycled; */#define lbmDONE 0x0020#define lbmERROR 0x0040#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing * of log page */#define lbmDIRECT 0x0100/* * external references */extern void txLazyUnlock(struct tblock * tblk);extern int jfs_stop_threads;extern struct completion jfsIOwait;/* * forward references */static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck);static int lmNextPage(struct jfs_log * log);static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);static int lbmLogInit(struct jfs_log * log);static void lbmLogShutdown(struct jfs_log * log);static struct lbuf *lbmAllocate(struct jfs_log * log, int);static void lbmFree(struct lbuf * bp);static void lbmfree(struct lbuf * bp);static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);static int lbmIOWait(struct lbuf * bp, int flag);static void lbmIODone(struct buffer_head *bh, int);static void lbmStartIO(struct lbuf * bp);static void lmGCwrite(struct jfs_log * log, int cant_block);static int lmLogSync(struct jfs_log * log, int nosyncwait);/* * statistics */#ifdef CONFIG_JFS_STATISTICSstruct lmStat { uint commit; /* # of commit */ uint pagedone; /* # of page written */ uint submitted; /* # of pages submitted */ uint full_page; /* # of full pages submitted */ uint partial_page; /* # of partial pages submitted */} lmStat;#endif/* * NAME: lmLog() * * FUNCTION: write a log record; * * PARAMETER: * * RETURN: lsn - offset to the next log record to write (end-of-log); * -1 - error; * * note: todo: log error handler */int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck){ int lsn; int diffp, difft; struct metapage *mp = NULL; jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", log, tblk, lrd, tlck); LOG_LOCK(log); /* log by (out-of-transaction) JFS ? */ if (tblk == NULL) goto writeRecord; /* log from page ? */ if (tlck == NULL || tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) goto writeRecord; /* * initialize/update page/transaction recovery lsn */ lsn = log->lsn; LOGSYNC_LOCK(log); /* * initialize page lsn if first log write of the page */ if (mp->lsn == 0) { mp->log = log; mp->lsn = lsn; log->count++; /* insert page at tail of logsynclist */ list_add_tail(&mp->synclist, &log->synclist); } /* * initialize/update lsn of tblock of the page * * transaction inherits oldest lsn of pages associated * with allocation/deallocation of resources (their * log records are used to reconstruct allocation map * at recovery time: inode for inode allocation map, * B+-tree index of extent descriptors for block * allocation map); * allocation map pages inherit transaction lsn at * commit time to allow forwarding log syncpt past log * records associated with allocation/deallocation of * resources only after persistent map of these map pages * have been updated and propagated to home. */ /* * initialize transaction lsn: */ if (tblk->lsn == 0) { /* inherit lsn of its first page logged */ tblk->lsn = mp->lsn; log->count++; /* insert tblock after the page on logsynclist */ list_add(&tblk->synclist, &mp->synclist); } /* * update transaction lsn: */ else { /* inherit oldest/smallest lsn of page */ logdiff(diffp, mp->lsn, log); logdiff(difft, tblk->lsn, log); if (diffp < difft) { /* update tblock lsn with page lsn */ tblk->lsn = mp->lsn; /* move tblock after page on logsynclist */ list_del(&tblk->synclist); list_add(&tblk->synclist, &mp->synclist); } } LOGSYNC_UNLOCK(log); /* * write the log record */ writeRecord: lsn = lmWriteRecord(log, tblk, lrd, tlck); /* * forward log syncpt if log reached next syncpt trigger */ logdiff(diffp, lsn, log); if (diffp >= log->nextsync) lsn = lmLogSync(log, 0); /* update end-of-log lsn */ log->lsn = lsn; LOG_UNLOCK(log); /* return end-of-log address */ return lsn;}/* * NAME: lmWriteRecord() * * FUNCTION: move the log record to current log page * * PARAMETER: cd - commit descriptor * * RETURN: end-of-log address * * serialization: LOG_LOCK() held on entry/exit */static intlmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck){ int lsn = 0; /* end-of-log address */ struct lbuf *bp; /* dst log page buffer */ struct logpage *lp; /* dst log page */ caddr_t dst; /* destination address in log page */ int dstoffset; /* end-of-log offset in log page */ int freespace; /* free space in log page */ caddr_t p; /* src meta-data page */ caddr_t src; int srclen; int nbytes; /* number of bytes to move */ int i; int len; struct linelock *linelock; struct lv *lv; struct lvd *lvd; int l2linesize; len = 0; /* retrieve destination log page to write */ bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = log->eor; /* any log data to write ? */ if (tlck == NULL) goto moveLrd; /* * move log record data */ /* retrieve source meta-data page to log */ if (tlck->flag & tlckPAGELOCK) { p = (caddr_t) (tlck->mp->data); linelock = (struct linelock *) & tlck->lock; } /* retrieve source in-memory inode to log */ else if (tlck->flag & tlckINODELOCK) { if (tlck->type & tlckDTREE) p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; else p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; linelock = (struct linelock *) & tlck->lock; }#ifdef _JFS_WIP else if (tlck->flag & tlckINLINELOCK) { inlinelock = (struct inlinelock *) & tlck; p = (caddr_t) & inlinelock->pxd; linelock = (struct linelock *) & tlck; }#endif /* _JFS_WIP */ else { jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); return 0; /* Probably should trap */ } l2linesize = linelock->l2linesize; moveData: ASSERT(linelock->index <= linelock->maxcnt); lv = linelock->lv; for (i = 0; i < linelock->index; i++, lv++) { if (lv->length == 0) continue; /* is page full ? */ if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { /* page become full: move on to next page */ lmNextPage(log); bp = log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; } /* * move log vector data */ src = (u8 *) p + (lv->offset << l2linesize); srclen = lv->length << l2linesize; len += srclen; while (srclen > 0) { freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; nbytes = min(freespace, srclen); dst = (caddr_t) lp + dstoffset; memcpy(dst, src, nbytes); dstoffset += nbytes; /* is page not full ? */ if (dstoffset < LOGPSIZE - LOGPTLRSIZE) break; /* page become full: move on to next page */ lmNextPage(log); bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; srclen -= nbytes; src += nbytes; } /* * move log vector descriptor */ len += 4; lvd = (struct lvd *) ((caddr_t) lp + dstoffset); lvd->offset = cpu_to_le16(lv->offset); lvd->length = cpu_to_le16(lv->length); dstoffset += 4; jfs_info("lmWriteRecord: lv offset:%d length:%d", lv->offset, lv->length); } if ((i = linelock->next)) { linelock = (struct linelock *) lid_to_tlock(i); goto moveData; } /* * move log record descriptor */ moveLrd: lrd->length = cpu_to_le16(len); src = (caddr_t) lrd; srclen = LOGRDSIZE; while (srclen > 0) { freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; nbytes = min(freespace, srclen); dst = (caddr_t) lp + dstoffset; memcpy(dst, src, nbytes); dstoffset += nbytes; srclen -= nbytes; /* are there more to move than freespace of page ? */ if (srclen) goto pageFull; /* * end of log record descriptor */ /* update last log record eor */ log->eor = dstoffset; bp->l_eor = dstoffset; lsn = (log->page << L2LOGPSIZE) + dstoffset; if (lrd->type & cpu_to_le16(LOG_COMMIT)) { tblk->clsn = lsn; jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, bp->l_eor); INCREMENT(lmStat.commit); /* # of commit */ /* * enqueue tblock for group commit: * * enqueue tblock of non-trivial/synchronous COMMIT * at tail of group commit queue * (trivial/asynchronous COMMITs are ignored by * group commit.) */ LOGGC_LOCK(log); /* init tblock gc state */ tblk->flag = tblkGC_QUEUE; tblk->bp = log->bp; tblk->pn = log->page; tblk->eor = log->eor; /* enqueue transaction to commit queue */ tblk->cqnext = NULL; if (log->cqueue.head) { log->cqueue.tail->cqnext = tblk; log->cqueue.tail = tblk; } else log->cqueue.head = log->cqueue.tail = tblk; LOGGC_UNLOCK(log); } jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); /* page not full ? */ if (dstoffset < LOGPSIZE - LOGPTLRSIZE) return lsn; pageFull: /* page become full: move on to next page */ lmNextPage(log); bp = (struct lbuf *) log->bp; lp = (struct logpage *) bp->l_ldata; dstoffset = LOGPHDRSIZE; src += nbytes; } return lsn;}/* * NAME: lmNextPage() * * FUNCTION: write current page and allocate next page. * * PARAMETER: log * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit */static int lmNextPage(struct jfs_log * log){ struct logpage *lp; int lspn; /* log sequence page number */ int pn; /* current page number */ struct lbuf *bp; struct lbuf *nextbp; struct tblock *tblk; /* get current log page number and log sequence page number */ pn = log->page; bp = log->bp; lp = (struct logpage *) bp->l_ldata; lspn = le32_to_cpu(lp->h.page); LOGGC_LOCK(log); /* * write or queue the full page at the tail of write queue */ /* get the tail tblk on commit queue */ tblk = log->cqueue.tail; /* every tblk who has COMMIT record on the current page, * and has not been committed, must be on commit queue * since tblk is queued at commit queueu at the time * of writing its COMMIT record on the page before * page becomes full (even though the tblk thread * who wrote COMMIT record may have been suspended * currently); */ /* is page bound with outstanding tail tblk ? */ if (tblk && tblk->pn == pn) { /* mark tblk for end-of-page */ tblk->flag |= tblkGC_EOP; if (log->cflag & logGC_PAGEOUT) { /* if page is not already on write queue, * just enqueue (no lbmWRITE to prevent redrive) * buffer to wqueue to ensure correct serial order
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -