📄 xfs_log.c
字号:
if (threshold_block >= log->l_logBBsize) { threshold_block -= log->l_logBBsize; threshold_cycle += 1; } ASSIGN_ANY_LSN_HOST(threshold_lsn, threshold_cycle, threshold_block); /* Don't pass in an lsn greater than the lsn of the last * log record known to be on disk. */ if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) threshold_lsn = log->l_last_sync_lsn; } GRANT_UNLOCK(log, s); /* * Get the transaction layer to kick the dirty buffers out to * disk asynchronously. No point in trying to do this if * the filesystem is shutting down. */ if (threshold_lsn && !XLOG_FORCED_SHUTDOWN(log)) xfs_trans_push_ail(mp, threshold_lsn);} /* xlog_grant_push_ail *//* * Flush out the in-core log (iclog) to the on-disk log in an asynchronous * fashion. Previously, we should have moved the current iclog * ptr in the log to point to the next available iclog. This allows further * write to continue while this code syncs out an iclog ready to go. * Before an in-core log can be written out, the data section must be scanned * to save away the 1st word of each BBSIZE block into the header. We replace * it with the current cycle count. Each BBSIZE block is tagged with the * cycle count because there in an implicit assumption that drives will * guarantee that entire 512 byte blocks get written at once. In other words, * we can't have part of a 512 byte block written and part not written. By * tagging each block, we will know which blocks are valid when recovering * after an unclean shutdown. * * This routine is single threaded on the iclog. No other thread can be in * this routine with the same iclog. Changing contents of iclog can there- * fore be done without grabbing the state machine lock. Updating the global * log will require grabbing the lock though. * * The entire log manager uses a logical block numbering scheme. Only * log_sync (and then only bwrite()) know about the fact that the log may * not start with block zero on a given device. The log block start offset * is added immediately before calling bwrite(). */intxlog_sync(xlog_t *log, xlog_in_core_t *iclog){ xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; int i, ops; uint count; /* byte count of bwrite */ uint count_init; /* initial count before roundup */ int roundoff; /* roundoff to BB or stripe */ int split = 0; /* split write into two regions */ int error; SPLDECL(s); int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); XFS_STATS_INC(xs_log_writes); ASSERT(iclog->ic_refcnt == 0); /* Add for LR header */ count_init = log->l_iclog_hsize + iclog->ic_offset; /* Round out the log write size */ if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { /* we have a v2 stripe unit to use */ count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); } else { count = BBTOB(BTOBB(count_init)); } roundoff = count - count_init; ASSERT(roundoff >= 0); ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && roundoff < log->l_mp->m_sb.sb_logsunit) || (log->l_mp->m_sb.sb_logsunit <= 1 && roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ s = GRANT_LOCK(log); xlog_grant_add_space(log, roundoff); GRANT_UNLOCK(log, s); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); /* real byte length */ if (v2) { INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset + roundoff); } else { INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset); } /* put ops count in correct order */ ops = iclog->ic_header.h_num_logops; INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); bp = iclog->ic_bp; ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); iclog->ic_bwritecnt = 2; /* split into 2 writes */ } else { iclog->ic_bwritecnt = 1; } XFS_BUF_SET_COUNT(bp, count); XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); /* * Do an ordered write for the log block. * Its unnecessary to flush the first split block in the log wrap case. */ if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER)) XFS_BUF_ORDERED(bp); ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); xlog_verify_iclog(log, iclog, count, B_TRUE); /* account for log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); /* * Don't call xfs_bwrite here. We do log-syncs even when the filesystem * is shutting down. */ XFS_BUF_WRITE(bp); if ((error = XFS_bwrite(bp))) { xfs_ioerror_alert("xlog_sync", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; } if (split) { bp = iclog->ic_log->l_xbuf; ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ (__psint_t)count), split); XFS_BUF_SET_FSPRIVATE(bp, iclog); XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) XFS_BUF_ORDERED(bp); dptr = XFS_BUF_PTR(bp); /* * Bump the cycle numbers at the start of each block * since this part of the buffer is at the start of * a new cycle. Watch out for the header magic number * case, though. */ for (i=0; i<split; i += BBSIZE) { INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); if (INT_GET(*(uint *)dptr, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) INT_MOD(*(uint *)dptr, ARCH_CONVERT, +1); dptr += BBSIZE; } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); /* account for internal log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); XFS_BUF_WRITE(bp); if ((error = XFS_bwrite(bp))) { xfs_ioerror_alert("xlog_sync (split)", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; } } return 0;} /* xlog_sync *//* * Deallocate a log structure */voidxlog_dealloc_log(xlog_t *log){ xlog_in_core_t *iclog, *next_iclog; xlog_ticket_t *tic, *next_tic; int i; iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { sv_destroy(&iclog->ic_forcesema); sv_destroy(&iclog->ic_writesema); xfs_buf_free(iclog->ic_bp);#ifdef XFS_LOG_TRACE if (iclog->ic_trace != NULL) { ktrace_free(iclog->ic_trace); }#endif next_iclog = iclog->ic_next; kmem_free(iclog, sizeof(xlog_in_core_t)); iclog = next_iclog; } freesema(&log->l_flushsema); spinlock_destroy(&log->l_icloglock); spinlock_destroy(&log->l_grant_lock); /* XXXsup take a look at this again. */ if ((log->l_ticket_cnt != log->l_ticket_tcnt) && !XLOG_FORCED_SHUTDOWN(log)) { xfs_fs_cmn_err(CE_WARN, log->l_mp, "xlog_dealloc_log: (cnt: %d, total: %d)", log->l_ticket_cnt, log->l_ticket_tcnt); /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ } else { tic = log->l_unmount_free; while (tic) { next_tic = tic->t_next; kmem_free(tic, NBPP); tic = next_tic; } } xfs_buf_free(log->l_xbuf);#ifdef XFS_LOG_TRACE if (log->l_trace != NULL) { ktrace_free(log->l_trace); } if (log->l_grant_trace != NULL) { ktrace_free(log->l_grant_trace); }#endif log->l_mp->m_log = NULL; kmem_free(log, sizeof(xlog_t));} /* xlog_dealloc_log *//* * Update counters atomically now that memcpy is done. *//* ARGSUSED */static inline voidxlog_state_finish_copy(xlog_t *log, xlog_in_core_t *iclog, int record_cnt, int copy_bytes){ SPLDECL(s); s = LOG_LOCK(log); iclog->ic_header.h_num_logops += record_cnt; iclog->ic_offset += copy_bytes; LOG_UNLOCK(log, s);} /* xlog_state_finish_copy *//* * print out info relating to regions written which consume * the reservation */STATIC voidxlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket){ uint i; uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); /* match with XLOG_REG_TYPE_* in xfs_log.h */ static char *res_type_str[XLOG_REG_TYPE_MAX] = { "bformat", "bchunk", "efi_format", "efd_format", "iformat", "icore", "iext", "ibroot", "ilocal", "iattr_ext", "iattr_broot", "iattr_local", "qformat", "dquot", "quotaoff", "LR header", "unmount", "commit", "trans header" }; static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { "SETATTR_NOT_SIZE", "SETATTR_SIZE", "INACTIVE", "CREATE", "CREATE_TRUNC", "TRUNCATE_FILE", "REMOVE", "LINK", "RENAME", "MKDIR", "RMDIR", "SYMLINK", "SET_DMATTRS", "GROWFS", "STRAT_WRITE", "DIOSTRAT", "WRITE_SYNC", "WRITEID", "ADDAFORK", "ATTRINVAL", "ATRUNCATE", "ATTR_SET", "ATTR_RM", "ATTR_FLAG", "CLEAR_AGI_BUCKET", "QM_SBCHANGE", "DUMMY1", "DUMMY2", "QM_QUOTAOFF", "QM_DQALLOC", "QM_SETQLIM", "QM_DQCLUSTER", "QM_QINOCREATE", "QM_QUOTAOFF_END", "SB_UNIT", "FSYNC_TS", "GROWFSRT_ALLOC", "GROWFSRT_ZERO", "GROWFSRT_FREE", "SWAPEXT" }; xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_write: reservation summary:\n" " trans type = %s (%u)\n" " unit res = %d bytes\n" " current res = %d bytes\n" " total reg = %u bytes (o/flow = %u bytes)\n" " ophdrs = %u (ophdr space = %u bytes)\n" " ophdr + reg = %u bytes\n" " num regions = %u\n", ((ticket->t_trans_type <= 0 || ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), ticket->t_trans_type, ticket->t_unit_res, ticket->t_curr_res, ticket->t_res_arr_sum, ticket->t_res_o_flow, ticket->t_res_num_ophdrs, ophdr_spc, ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc, ticket->t_res_num); for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; cmn_err(CE_WARN, "region[%u]: %s - %u bytes\n", i, ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? "bad-rtype" : res_type_str[r_type-1]), ticket->t_res_arr[i].r_len); }}/* * Write some region out to in-core log * * This will be called when writing externally provided regions or when * writing out a commit record for a given transaction. * * General algorithm: * 1. Find total length of this write. This may include adding to the * lengths passed in. * 2. Check whether we violate the tickets reservation. * 3. While writing to this iclog * A. Reserve as much space in this iclog as can get * B. If this is first write, save away start lsn * C. While writing this region: * 1. If first write of transaction, write start record * 2. Write log operation header (header per region) * 3. Find out if we can fit entire region into this iclog * 4. Potentially, verify destination memcpy ptr * 5. Memcpy (partial) region * 6. If partial copy, release iclog; otherwise, continue * copying more regions into current iclog * 4. Mark want sync bit (in simulation mode) * 5. Release iclog for potential flush to on-disk log. * * ERRORS: * 1. Panic if reservation is overrun. This should never happen since * reservation amounts are generated internal to the filesystem. * NOTES: * 1. Tickets are single threaded data structures. * 2. The XLOG_END_TRANS & XLOG_CONTINUE_TRANS flags are passed down to the * syncing routine. When a single log_write region needs to span * multiple in-core logs, the XLOG_CONTINUE_TRANS bit should be set * on all log operation writes which don't contain the end of the * region. The XLOG_END_TRANS bit is used for the in-core log * operation which contains the end of the continued log_write region. * 3. When xlog_state_get_iclog_space() grabs the rest of the current iclog, * we don't really know exactly how much space will be used. As a result, * we don't update ic_offset until the end when we know exactly how many * bytes have been written out. */intxlog_write(xfs_mount_t * mp, xfs_log_iovec_t reg[], int nentries, xfs_log_ticket_t tic, xfs_lsn_t *start_lsn, xlog_in_core_t **commit_iclog, uint flags){ xlog_t *log = mp->m_log; xlog_ticket_t *ticket = (xlog_ticket_t *)tic; xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ xlog_op_header_t *logop_head; /* ptr to log operation header */ __psint_t ptr; /* copy address into data region */ int len; /* # xlog_write() bytes 2 still copy */ int index; /* region index currently copying */ int log_offset; /* offset (from 0) into data region */ int start_rec_copy; /* # bytes to copy for start record */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -