📄 trx0trx.c
字号:
trx->no = ut_dulint_max; } if (undo->dict_operation) { trx->dict_operation = undo->dict_operation; trx->table_id = undo->table_id; } if (!undo->empty) { trx->undo_no = ut_dulint_add(undo->top_undo_no, 1); } trx_list_insert_ordered(trx); undo = UT_LIST_GET_NEXT(undo_list, undo); } undo = UT_LIST_GET_FIRST(rseg->update_undo_list); while (undo != NULL) { trx = trx_get_on_id(undo->trx_id); if (NULL == trx) { trx = trx_create(NULL); trx->id = undo->trx_id; trx->xid = undo->xid; if (undo->state != TRX_UNDO_ACTIVE) { /* Prepared transactions are left in the prepared state waiting for a commit or abort decision from MySQL */ if (undo->state == TRX_UNDO_PREPARED) { fprintf(stderr,"InnoDB: Transaction %lu %lu was in the XA prepared state.\n", ut_dulint_get_high(trx->id), ut_dulint_get_low(trx->id)); if (srv_force_recovery == 0) { trx->conc_state = TRX_PREPARED; } else { fprintf(stderr,"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n"); trx->conc_state = TRX_ACTIVE; } } else { trx->conc_state = TRX_COMMITTED_IN_MEMORY; } /* We give a dummy value for the trx number */ trx->no = trx->id; } else { trx->conc_state = TRX_ACTIVE; /* A running transaction always has the number field inited to ut_dulint_max */ trx->no = ut_dulint_max; } trx->rseg = rseg; trx_list_insert_ordered(trx); if (undo->dict_operation) { trx->dict_operation = undo->dict_operation; trx->table_id = undo->table_id; } } trx->update_undo = undo; if ((!undo->empty) && (ut_dulint_cmp(undo->top_undo_no, trx->undo_no) >= 0)) { trx->undo_no = ut_dulint_add(undo->top_undo_no, 1); } undo = UT_LIST_GET_NEXT(undo_list, undo); } rseg = UT_LIST_GET_NEXT(rseg_list, rseg); }}/**********************************************************************Assigns a rollback segment to a transaction in a round-robin fashion.Skips the SYSTEM rollback segment if another is available. */UNIV_INLINEulinttrx_assign_rseg(void)/*=================*/ /* out: assigned rollback segment id */{ trx_rseg_t* rseg = trx_sys->latest_rseg;#ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex));#endif /* UNIV_SYNC_DEBUG */loop: /* Get next rseg in a round-robin fashion */ rseg = UT_LIST_GET_NEXT(rseg_list, rseg); if (rseg == NULL) { rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); } /* If it is the SYSTEM rollback segment, and there exist others, skip it */ if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID) && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) { goto loop; } trx_sys->latest_rseg = rseg; return(rseg->id);}/********************************************************************Starts a new transaction. */ibooltrx_start_low(/*==========*/ /* out: TRUE */ trx_t* trx, /* in: transaction */ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED is passed, the system chooses the rollback segment automatically in a round-robin fashion */{ trx_rseg_t* rseg;#ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex));#endif /* UNIV_SYNC_DEBUG */ ut_ad(trx->rseg == NULL); if (trx->type == TRX_PURGE) { trx->id = ut_dulint_zero; trx->conc_state = TRX_ACTIVE; trx->start_time = time(NULL); return(TRUE); } ut_ad(trx->conc_state != TRX_ACTIVE); if (rseg_id == ULINT_UNDEFINED) { rseg_id = trx_assign_rseg(); } rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); trx->id = trx_sys_get_new_trx_id(); /* The initial value for trx->no: ut_dulint_max is used in read_view_open_now: */ trx->no = ut_dulint_max; trx->rseg = rseg; trx->conc_state = TRX_ACTIVE; trx->start_time = time(NULL); UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx); return(TRUE);}/********************************************************************Starts a new transaction. */ibooltrx_start(/*======*/ /* out: TRUE */ trx_t* trx, /* in: transaction */ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED is passed, the system chooses the rollback segment automatically in a round-robin fashion */{ ibool ret; mutex_enter(&kernel_mutex); ret = trx_start_low(trx, rseg_id); mutex_exit(&kernel_mutex); return(ret);}/********************************************************************Commits a transaction. */voidtrx_commit_off_kernel(/*==================*/ trx_t* trx) /* in: transaction */{ page_t* update_hdr_page; dulint lsn; trx_rseg_t* rseg; trx_undo_t* undo; ibool must_flush_log = FALSE; mtr_t mtr; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex));#endif /* UNIV_SYNC_DEBUG */ trx->must_flush_log_later = FALSE; rseg = trx->rseg; if (trx->insert_undo != NULL || trx->update_undo != NULL) { mutex_exit(&kernel_mutex); mtr_start(&mtr); must_flush_log = TRUE; /* Change the undo log segment states from TRX_UNDO_ACTIVE to some other state: these modifications to the file data structure define the transaction as committed in the file based world, at the serialization point of the log sequence number lsn obtained below. */ mutex_enter(&(rseg->mutex)); if (trx->insert_undo != NULL) { trx_undo_set_state_at_finish(trx, trx->insert_undo, &mtr); } undo = trx->update_undo; if (undo) { mutex_enter(&kernel_mutex); trx->no = trx_sys_get_new_trx_no(); mutex_exit(&kernel_mutex); /* It is not necessary to obtain trx->undo_mutex here because only a single OS thread is allowed to do the transaction commit for this transaction. */ update_hdr_page = trx_undo_set_state_at_finish(trx, undo, &mtr); /* We have to do the cleanup for the update log while holding the rseg mutex because update log headers have to be put to the history list in the order of the trx number. */ trx_undo_update_cleanup(trx, update_hdr_page, &mtr); } mutex_exit(&(rseg->mutex)); /* Update the latest MySQL binlog name and offset info in trx sys header if MySQL binlogging is on or the database server is a MySQL replication slave */ if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { trx_sys_update_mysql_binlog_offset( trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); trx->mysql_log_file_name = NULL; } if (trx->mysql_master_log_file_name[0] != '\0') { /* This database server is a MySQL replication slave */ trx_sys_update_mysql_binlog_offset( trx->mysql_master_log_file_name, trx->mysql_master_log_pos, TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); } /* The following call commits the mini-transaction, making the whole transaction committed in the file-based world, at this log sequence number. The transaction becomes 'durable' when we write the log to disk, but in the logical sense the commit in the file-based data structures (undo logs etc.) happens here. NOTE that transaction numbers, which are assigned only to transactions with an update undo log, do not necessarily come in exactly the same order as commit lsn's, if the transactions have different rollback segments. To get exactly the same order we should hold the kernel mutex up to this point, adding to to the contention of the kernel mutex. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */ /*--------------*/ mtr_commit(&mtr); /*--------------*/ lsn = mtr.end_lsn; mutex_enter(&kernel_mutex); } ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);#ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex));#endif /* UNIV_SYNC_DEBUG */ /* The following assignment makes the transaction committed in memory and makes its changes to data visible to other transactions. NOTE that there is a small discrepancy from the strict formal visibility rules here: a human user of the database can see modifications made by another transaction T even before the necessary log segment has been flushed to the disk. If the database happens to crash before the flush, the user has seen modifications from T which will never be a committed transaction. However, any transaction T2 which sees the modifications of the committing transaction T, and which also itself makes modifications to the database, will get an lsn larger than the committing transaction T. In the case where the log flush fails, and T never gets committed, also T2 will never get committed. */ /*--------------------------------------*/ trx->conc_state = TRX_COMMITTED_IN_MEMORY; /*--------------------------------------*/ lock_release_off_kernel(trx); if (trx->global_read_view) { read_view_close(trx->global_read_view); mem_heap_empty(trx->global_read_view_heap); trx->global_read_view = NULL; } trx->read_view = NULL; if (must_flush_log) { mutex_exit(&kernel_mutex); if (trx->insert_undo != NULL) { trx_undo_insert_cleanup(trx); } /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ /*-------------------------------------*/ /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the transaction durable if the OS does not crash. We may also flush the log files to disk, making the transaction durable also at an OS crash or a power outage. The idea in InnoDB's group commit is that a group of transactions gather behind a trx doing a physical disk write to log files, and when that physical write has been completed, one of those transactions does a write which commits the whole group. Note that this group commit will only bring benefit if there are > 2 users in the database. Then at least 2 users can gather behind one doing the physical log write to disk. If we are calling trx_commit() under MySQL's binlog mutex, we will delay possible log write and flush to a separate function trx_commit_complete_for_mysql(), which is only called when the thread has released the binlog mutex. This is to make the group commit algorithm to work. Otherwise, the MySQL binlog mutex would serialize all commits and prevent a group of transactions from gathering. */ if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; } else if (srv_flush_log_at_trx_commit == 0) { /* Do nothing */ } else if (srv_flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { /* Write the log to the log files AND flush them to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { ut_error; } trx->commit_lsn = lsn; /*-------------------------------------*/ mutex_enter(&kernel_mutex); } /* Free savepoints */ trx_roll_savepoints_free(trx, NULL); trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);}/********************************************************************Cleans up a transaction at database startup. The cleanup is needed ifthe transaction already got to the middle of a commit when the databasecrashed, andf we cannot roll it back. */voidtrx_cleanup_at_db_startup(/*======================*/ trx_t* trx) /* in: transaction */{ if (trx->insert_undo != NULL) { trx_undo_insert_cleanup(trx); } trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);}/************************************************************************Assigns a read view for a consistent read query. All the consistent readswithin the same transaction will get the same read view, which is createdwhen this function is first called for a new started transaction. */read_view_t*trx_assign_read_view(/*=================*/ /* out: consistent read view */ trx_t* trx) /* in: active transaction */{ ut_ad(trx->conc_state == TRX_ACTIVE); if (trx->read_view) { return(trx->read_view); } mutex_enter(&kernel_mutex); if (!trx->read_view) { trx->read_view = read_view_open_now(trx, trx->global_read_view_heap); trx->global_read_view = trx->read_view; } mutex_exit(&kernel_mutex); return(trx->read_view);}/********************************************************************Commits a transaction. NOTE that the kernel mutex is temporarily released. */staticvoidtrx_handle_commit_sig_off_kernel(/*=============================*/ trx_t* trx, /* in: transaction */ que_thr_t** next_thr) /* in/out: next query thread to run; if the value which is passed in is a pointer to a NULL pointer, then the calling function can start running a new query thread */{ trx_sig_t* sig; trx_sig_t* next_sig; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex));#endif /* UNIV_SYNC_DEBUG */ trx->que_state = TRX_QUE_COMMITTING;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -