📄 xfs_log.c
字号:
tail_lsn = xfs_trans_tail_ail(mp); s = GRANT_LOCK(log); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; } else { tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; } GRANT_UNLOCK(log, s); return tail_lsn;} /* xlog_assign_tail_lsn *//* * Return the space in the log between the tail and the head. The head * is passed in the cycle/bytes formal parms. In the special case where * the reserve head has wrapped passed the tail, this calculation is no * longer valid. In this case, just return 0 which means there is no space * in the log. This works for all places where this function is called * with the reserve head. Of course, if the write head were to ever * wrap the tail, we should blow up. Rather than catch this case here, * we depend on other ASSERTions in other parts of the code. XXXmiken * * This code also handles the case where the reservation head is behind * the tail. The details of this case are described below, but the end * result is that we return the size of the log as the amount of space left. */intxlog_space_left(xlog_t *log, int cycle, int bytes){ int free_bytes; int tail_bytes; int tail_cycle; tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); tail_cycle = CYCLE_LSN(log->l_tail_lsn); if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { free_bytes = log->l_logsize - (bytes - tail_bytes); } else if ((tail_cycle + 1) < cycle) { return 0; } else if (tail_cycle < cycle) { ASSERT(tail_cycle == (cycle - 1)); free_bytes = tail_bytes - bytes; } else { /* * The reservation head is behind the tail. * In this case we just want to return the size of the * log as the amount of space left. */ xfs_fs_cmn_err(CE_ALERT, log->l_mp, "xlog_space_left: head behind tail\n" " tail_cycle = %d, tail_bytes = %d\n" " GH cycle = %d, GH bytes = %d", tail_cycle, tail_bytes, cycle, bytes); ASSERT(0); free_bytes = log->l_logsize; } return free_bytes;} /* xlog_space_left *//* * Log function which is called when an io completes. * * The log manager needs its own routine, in order to control what * happens with the buffer after the write completes. */voidxlog_iodone(xfs_buf_t *bp){ xlog_in_core_t *iclog; xlog_t *l; int aborted; iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); aborted = 0; /* * Some versions of cpp barf on the recursive definition of * ic_log -> hic_fields.ic_log and expand ic_log twice when * it is passed through two macros. Workaround broken cpp. */ l = iclog->ic_log; /* * If the ordered flag has been removed by a lower * layer, it means the underlyin device no longer supports * barrier I/O. Warn loudly and turn off barriers. */ if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; xfs_fs_cmn_err(CE_WARN, l->l_mp, "xlog_iodone: Barriers are no longer supported" " by device. Disabling barriers\n"); xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp); } /* * Race to shutdown the filesystem if we see an error. */ if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); XFS_BUF_STALE(bp); xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); /* * This flag will be propagated to the trans-committed * callback routines to let them know that the log-commit * didn't succeed. */ aborted = XFS_LI_ABORTED; } else if (iclog->ic_state & XLOG_STATE_IOERROR) { aborted = XFS_LI_ABORTED; } /* log I/O is always issued ASYNC */ ASSERT(XFS_BUF_ISASYNC(bp)); xlog_state_done_syncing(iclog, aborted); /* * do not reference the buffer (bp) here as we could race * with it being freed after writing the unmount record to the * log. */} /* xlog_iodone *//* * The bdstrat callback function for log bufs. This gives us a central * place to trap bufs in case we get hit by a log I/O error and need to * shutdown. Actually, in practice, even when we didn't get a log error, * we transition the iclogs to IOERROR state *after* flushing all existing * iclogs to disk. This is because we don't want anymore new transactions to be * started or completed afterwards. */STATIC intxlog_bdstrat_cb(struct xfs_buf *bp){ xlog_in_core_t *iclog; iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { /* note for irix bstrat will need struct bdevsw passed * Fix the following macro if the code ever is merged */ XFS_bdstrat(bp); return 0; } xfs_buftrace("XLOG__BDSTRAT IOERROR", bp); XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); xfs_biodone(bp); return XFS_ERROR(EIO);}/* * Return size of each in-core log record buffer. * * All machines get 8 x 32KB buffers by default, unless tuned otherwise. * * If the filesystem blocksize is too large, we may need to choose a * larger size since the directory code currently logs entire blocks. */STATIC voidxlog_get_iclog_buffer_size(xfs_mount_t *mp, xlog_t *log){ int size; int xhdrs; if (mp->m_logbufs <= 0) log->l_iclog_bufs = XLOG_MAX_ICLOGS; else log->l_iclog_bufs = mp->m_logbufs; /* * Buffer size passed in from mount system call. */ if (mp->m_logbsize > 0) { size = log->l_iclog_size = mp->m_logbsize; log->l_iclog_size_log = 0; while (size != 1) { log->l_iclog_size_log++; size >>= 1; } if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { /* # headers = size / 32K * one header holds cycles from 32K of data */ xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) xhdrs++; log->l_iclog_hsize = xhdrs << BBSHIFT; log->l_iclog_heads = xhdrs; } else { ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); log->l_iclog_hsize = BBSIZE; log->l_iclog_heads = 1; } goto done; } /* All machines use 32KB buffers by default. */ log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; /* the default log size is 16k or 32k which is one header sector */ log->l_iclog_hsize = BBSIZE; log->l_iclog_heads = 1; /* * For 16KB, we use 3 32KB buffers. For 32KB block sizes, we use * 4 32KB buffers. For 64KB block sizes, we use 8 32KB buffers. */ if (mp->m_sb.sb_blocksize >= 16*1024) { log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; if (mp->m_logbufs <= 0) { switch (mp->m_sb.sb_blocksize) { case 16*1024: /* 16 KB */ log->l_iclog_bufs = 3; break; case 32*1024: /* 32 KB */ log->l_iclog_bufs = 4; break; case 64*1024: /* 64 KB */ log->l_iclog_bufs = 8; break; default: xlog_panic("XFS: Invalid blocksize"); break; } } }done: /* are we being asked to make the sizes selected above visible? */ if (mp->m_logbufs == 0) mp->m_logbufs = log->l_iclog_bufs; if (mp->m_logbsize == 0) mp->m_logbsize = log->l_iclog_size;} /* xlog_get_iclog_buffer_size *//* * This routine initializes some of the log structure for a given mount point. * Its primary purpose is to fill in enough, so recovery can occur. However, * some other stuff may be filled in too. */STATIC xlog_t *xlog_alloc_log(xfs_mount_t *mp, xfs_buftarg_t *log_target, xfs_daddr_t blk_offset, int num_bblks){ xlog_t *log; xlog_rec_header_t *head; xlog_in_core_t **iclogp; xlog_in_core_t *iclog, *prev_iclog=NULL; xfs_buf_t *bp; int i; int iclogsize; log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); log->l_mp = mp; log->l_targ = log_target; log->l_logsize = BBTOB(num_bblks); log->l_logBBstart = blk_offset; log->l_logBBsize = num_bblks; log->l_covered_state = XLOG_STATE_COVER_IDLE; log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, 1, 0); /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ log->l_grant_reserve_cycle = 1; log->l_grant_write_cycle = 1; if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) { log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); /* for larger sector sizes, must have v2 or external log */ ASSERT(log->l_sectbb_log == 0 || log->l_logBBstart == 0 || XFS_SB_VERSION_HASLOGV2(&mp->m_sb)); ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); } log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); log->l_xbuf = bp; spinlock_init(&log->l_icloglock, "iclog"); spinlock_init(&log->l_grant_lock, "grhead_iclog"); initnsema(&log->l_flushsema, 0, "ic-flush"); xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); iclogp = &log->l_iclog; /* * The amount of memory to allocate for the iclog structure is * rather funky due to the way the structure is defined. It is * done this way so that we can use different sizes for machines * with different amounts of memory. See the definition of * xlog_in_core_t in xfs_log_priv.h for details. */ iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { *iclogp = (xlog_in_core_t *) kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->hic_data = bp->b_addr; log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); INT_SET(head->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); INT_SET(head->h_version, ARCH_CONVERT, XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); INT_SET(head->h_size, ARCH_CONVERT, log->l_iclog_size); /* new fields */ INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); iclogp = &iclog->ic_next; } *iclogp = log->l_iclog; /* complete ring */ log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log;} /* xlog_alloc_log *//* * Write out the commit record of a transaction associated with the given * ticket. Return the lsn of the commit record. */STATIC intxlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, xlog_in_core_t **iclog, xfs_lsn_t *commitlsnp){ int error; xfs_log_iovec_t reg[1]; reg[0].i_addr = NULL; reg[0].i_len = 0; XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, iclog, XLOG_COMMIT_TRANS))) { xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); } return error;} /* xlog_commit_record *//* * Push on the buffer cache code if we ever use more than 75% of the on-disk * log space. This code pushes on the lsn which would supposedly free up * the 25% which we want to leave free. We may need to adopt a policy which * pushes on an lsn which is further along in the log once we reach the high * water mark. In this manner, we would be creating a low water mark. */voidxlog_grant_push_ail(xfs_mount_t *mp, int need_bytes){ xlog_t *log = mp->m_log; /* pointer to the log */ xfs_lsn_t tail_lsn; /* lsn of the log tail */ xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ int free_blocks; /* free blocks left to write to */ int free_bytes; /* free bytes left to write to */ int threshold_block; /* block in lsn we'd like to be at */ int threshold_cycle; /* lsn cycle we'd like to be at */ int free_threshold; SPLDECL(s); ASSERT(BTOBB(need_bytes) < log->l_logBBsize); s = GRANT_LOCK(log); free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, log->l_grant_reserve_bytes); tail_lsn = log->l_tail_lsn; free_blocks = BTOBBT(free_bytes); /* * Set the threshold for the minimum number of free blocks in the * log to the maximum of what the caller needs, one quarter of the * log, and 256 blocks. */ free_threshold = BTOBB(need_bytes); free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); free_threshold = MAX(free_threshold, 256); if (free_blocks < free_threshold) { threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; threshold_cycle = CYCLE_LSN(tail_lsn);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -