📄 fil0fil.c
字号:
/******************************************************The tablespace memory cache(c) 1995 Innobase OyCreated 10/25/1995 Heikki Tuuri*******************************************************/#include "fil0fil.h"#include "mem0mem.h"#include "sync0sync.h"#include "hash0hash.h"#include "os0file.h"#include "os0sync.h"#include "mach0data.h"#include "ibuf0ibuf.h"#include "buf0buf.h"#include "buf0flu.h"#include "buf0lru.h"#include "log0log.h"#include "log0recv.h"#include "fsp0fsp.h"#include "srv0srv.h"#include "srv0start.h"#include "mtr0mtr.h"#include "mtr0log.h"#include "dict0dict.h" /* IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE =============================================The tablespace cache is responsible for providing fast read/write access totablespaces and logs of the database. File creation and deletion is donein other modules which know more of the logic of the operation, however.A tablespace consists of a chain of files. The size of the files does nothave to be divisible by the database block size, because we may just leavethe last incomplete block unused. When a new file is appended to thetablespace, the maximum size of the file is also specified. At the moment,we think that it is best to extend the file to its maximum size already atthe creation of the file, because then we can avoid dynamically extendingthe file when more space is needed for the tablespace.A block's position in the tablespace is specified with a 32-bit unsignedinteger. The files in the chain are thought to be catenated, and the blockcorresponding to an address n is the nth block in the catenated file (wherethe first block is named the 0th block, and the incomplete block fragmentsat the end of files are not taken into account). A tablespace can be extendedby appending a new file at the end of the chain.Our tablespace concept is similar to the one of Oracle.To acquire more speed in disk transfers, a technique called disk striping issometimes used. This means that logical block addresses are divided in around-robin fashion across several disks. Windows NT supports disk striping,so there we do not need to support it in the database. Disk striping isimplemented in hardware in RAID disks. We conclude that it is not necessaryto implement it in the database. Oracle 7 does not support disk striping,either.Another trick used at some database sites is replacing tablespace files byraw disks, that is, the whole physical disk drive, or a partition of it, isopened as a single file, and it is accessed through byte offsets calculatedfrom the start of the disk or the partition. This is recommended in somebooks on database tuning to achieve more speed in i/o. Using raw diskcertainly prevents the OS from fragmenting disk space, but it is not clearif it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS filesystem + EIDE Conner disk only a negligible difference in speed when readingfrom a file, versus reading from a raw disk. To have fast access to a tablespace or a log file, we put the data structuresto a hash table. Each tablespace and log file is given an unique 32-bitidentifier.Some operating systems do not support many open files at the same time,though NT seems to tolerate at least 900 open files. Therefore, we put theopen files in an LRU-list. If we need to open another file, we may close thefile at the end of the LRU-list. When an i/o-operation is pending on a file,the file cannot be closed. We take the file nodes with pending i/o-operationsout of the LRU-list and keep a count of pending operations. When an operationcompletes, we decrement the count and return the file node to the LRU-list ifthe count drops to zero. *//* When mysqld is run, the default directory "." is the mysqld datadir,but in the MySQL Embedded Server Library and ibbackup it is not the defaultdirectory, and we must set the base file path explicitly */const char* fil_path_to_mysql_datadir = ".";/* The number of fsyncs done to the log */ulint fil_n_log_flushes = 0;ulint fil_n_pending_log_flushes = 0;ulint fil_n_pending_tablespace_flushes = 0;/* Null file address */fil_addr_t fil_addr_null = {FIL_NULL, 0};/* File node of a tablespace or the log data space */struct fil_node_struct { fil_space_t* space; /* backpointer to the space where this node belongs */ char* name; /* path to the file */ ibool open; /* TRUE if file open */ os_file_t handle; /* OS handle to the file, if file open */ ibool is_raw_disk;/* TRUE if the 'file' is actually a raw device or a raw disk partition */ ulint size; /* size of the file in database pages, 0 if not known yet; the possible last incomplete megabyte may be ignored if space == 0 */ ulint n_pending; /* count of pending i/o's on this file; closing of the file is not allowed if this is > 0 */ ulint n_pending_flushes; /* count of pending flushes on this file; closing of the file is not allowed if this is > 0 */ ib_longlong modification_counter;/* when we write to the file we increment this by one */ ib_longlong flush_counter;/* up to what modification_counter value we have flushed the modifications to disk */ UT_LIST_NODE_T(fil_node_t) chain; /* link field for the file chain */ UT_LIST_NODE_T(fil_node_t) LRU; /* link field for the LRU list */ ulint magic_n;};#define FIL_NODE_MAGIC_N 89389/* Tablespace or log data space: let us call them by a common name space */struct fil_space_struct { char* name; /* space name = the path to the first file in it */ ulint id; /* space id */ ib_longlong tablespace_version; /* in DISCARD/IMPORT this timestamp is used to check if we should ignore an insert buffer merge request for a page because it actually was for the previous incarnation of the space */ ibool mark; /* this is set to TRUE at database startup if the space corresponds to a table in the InnoDB data dictionary; so we can print a warning of orphaned tablespaces */ ibool stop_ios;/* TRUE if we want to rename the .ibd file of tablespace and want to stop temporarily posting of new i/o requests on the file */ ibool stop_ibuf_merges; /* we set this TRUE when we start deleting a single-table tablespace */ ibool is_being_deleted; /* this is set to TRUE when we start deleting a single-table tablespace and its file; when this flag is set no further i/o or flush requests can be placed on this space, though there may be such requests still being processed on this space */ ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */ UT_LIST_BASE_NODE_T(fil_node_t) chain; /* base node for the file chain */ ulint size; /* space size in pages; 0 if a single-table tablespace whose size we do not know yet; last incomplete megabytes in data files may be ignored if space == 0 */ ulint n_reserved_extents; /* number of reserved free extents for ongoing operations like B-tree page split */ ulint n_pending_flushes; /* this is > 0 when flushing the tablespace to disk; dropping of the tablespace is forbidden if this is > 0 */ ulint n_pending_ibuf_merges;/* this is > 0 when merging insert buffer entries to a page so that we may need to access the ibuf bitmap page in the tablespade: dropping of the tablespace is forbidden if this is > 0 */ hash_node_t hash; /* hash chain node */ hash_node_t name_hash;/* hash chain the name_hash table */ rw_lock_t latch; /* latch protecting the file space storage allocation */ UT_LIST_NODE_T(fil_space_t) unflushed_spaces; /* list of spaces with at least one unflushed file we have written to */ ibool is_in_unflushed_spaces; /* TRUE if this space is currently in the list above */ UT_LIST_NODE_T(fil_space_t) space_list; /* list of all spaces */ ibuf_data_t* ibuf_data; /* insert buffer data */ ulint magic_n;};#define FIL_SPACE_MAGIC_N 89472/* The tablespace memory cache; also the totality of logs = the log data space,is stored here; below we talk about tablespaces, but also the ib_logfilesform a 'space' and it is handled here */typedef struct fil_system_struct fil_system_t;struct fil_system_struct { mutex_t mutex; /* The mutex protecting the cache */ hash_table_t* spaces; /* The hash table of spaces in the system; they are hashed on the space id */ hash_table_t* name_hash; /* hash table based on the space name */ UT_LIST_BASE_NODE_T(fil_node_t) LRU; /* base node for the LRU list of the most recently used open files with no pending i/o's; if we start an i/o on the file, we first remove it from this list, and return it to the start of the list when the i/o ends; log files and the system tablespace are not put to this list: they are opened after the startup, and kept open until shutdown */ UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces; /* base node for the list of those tablespaces whose files contain unflushed writes; those spaces have at least one file node where modification_counter > flush_counter */ ulint n_open; /* number of files currently open */ ulint max_n_open; /* n_open is not allowed to exceed this */ ib_longlong modification_counter;/* when we write to a file we increment this by one */ ulint max_assigned_id;/* maximum space id in the existing tables, or assigned during the time mysqld has been up; at an InnoDB startup we scan the data dictionary and set here the maximum of the space id's of the tables there */ ib_longlong tablespace_version; /* a counter which is incremented for every space object memory creation; every space mem object gets a 'timestamp' from this; in DISCARD/ IMPORT this is used to check if we should ignore an insert buffer merge request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /* list of all file spaces */};/* The tablespace memory cache. This variable is NULL before the module isinitialized. */fil_system_t* fil_system = NULL;/* The tablespace memory cache hash table size */#define FIL_SYSTEM_HASH_SIZE 50 /* TODO: make bigger! *//************************************************************************NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!Prepares a file node for i/o. Opens the file if it is closed. Updates thepending i/o's field in the node and the system appropriately. Takes the nodeoff the LRU list if it is in the LRU list. The caller must hold the fil_sysmutex. */staticvoidfil_node_prepare_for_io(/*====================*/ fil_node_t* node, /* in: file node */ fil_system_t* system, /* in: tablespace memory cache */ fil_space_t* space); /* in: space *//************************************************************************Updates the data structures when an i/o operation finishes. Updates thepending i/o's field in the node appropriately. */staticvoidfil_node_complete_io(/*=================*/ fil_node_t* node, /* in: file node */ fil_system_t* system, /* in: tablespace memory cache */ ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks the node as modified if type == OS_FILE_WRITE *//***********************************************************************Checks if a single-table tablespace for a given table name exists in thetablespace memory cache. */staticulintfil_get_space_id_for_table(/*=======================*/ /* out: space id, ULINT_UNDEFINED if not found */ const char* name); /* in: table name in the standard 'databasename/tablename' format *//***********************************************************************Returns the version number of a tablespace, -1 if not found. */ib_longlongfil_space_get_version(/*==================*/ /* out: version number, -1 if the tablespace does not exist in the memory cache */ ulint id) /* in: space id */{ fil_system_t* system = fil_system; fil_space_t* space; ib_longlong version = -1; ut_ad(system); mutex_enter(&(system->mutex)); HASH_SEARCH(hash, system->spaces, id, space, space->id == id); if (space) { version = space->tablespace_version; } mutex_exit(&(system->mutex)); return(version);}/***********************************************************************Returns the latch of a file space. */rw_lock_t*fil_space_get_latch(/*================*/ /* out: latch protecting storage allocation */ ulint id) /* in: space id */{ fil_system_t* system = fil_system; fil_space_t* space; ut_ad(system); mutex_enter(&(system->mutex)); HASH_SEARCH(hash, system->spaces, id, space, space->id == id); ut_a(space); mutex_exit(&(system->mutex)); return(&(space->latch));}/***********************************************************************Returns the type of a file space. */ulintfil_space_get_type(/*===============*/ /* out: FIL_TABLESPACE or FIL_LOG */ ulint id) /* in: space id */{ fil_system_t* system = fil_system; fil_space_t* space; ut_ad(system); mutex_enter(&(system->mutex)); HASH_SEARCH(hash, system->spaces, id, space, space->id == id); ut_a(space); mutex_exit(&(system->mutex)); return(space->purpose);}/***********************************************************************Returns the ibuf data of a file space. */ibuf_data_t*fil_space_get_ibuf_data(/*====================*/ /* out: ibuf data for this space */ ulint id) /* in: space id */{ fil_system_t* system = fil_system; fil_space_t* space; ut_ad(system); ut_a(id == 0); mutex_enter(&(system->mutex)); HASH_SEARCH(hash, system->spaces, id, space, space->id == id); mutex_exit(&(system->mutex)); ut_a(space); return(space->ibuf_data);}/**************************************************************************Checks if all the file nodes in a space are flushed. The caller must holdthe fil_system mutex. */staticiboolfil_space_is_flushed(/*=================*/ /* out: TRUE if all are flushed */ fil_space_t* space) /* in: space */{ fil_node_t* node;#ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(fil_system->mutex)));#endif /* UNIV_SYNC_DEBUG */ node = UT_LIST_GET_FIRST(space->chain); while (node) { if (node->modification_counter > node->flush_counter) { return(FALSE); } node = UT_LIST_GET_NEXT(chain, node); } return(TRUE);}/***********************************************************************Appends a new file to the chain of files of a space. File must be closed. */voidfil_node_create(/*============*/ const char* name, /* in: file name (file must be closed) */ ulint size, /* in: file size in database blocks, rounded downwards to an integer */ ulint id, /* in: space id where to append */ ibool is_raw) /* in: TRUE if a raw device or a raw disk partition */{ fil_system_t* system = fil_system; fil_node_t* node; fil_space_t* space; ut_a(system); ut_a(name); mutex_enter(&(system->mutex)); node = mem_alloc(sizeof(fil_node_t)); node->name = mem_strdup(name); node->open = FALSE; ut_a(!is_raw || srv_start_raw_disk_in_use); node->is_raw_disk = is_raw; node->size = size; node->magic_n = FIL_NODE_MAGIC_N; node->n_pending = 0; node->n_pending_flushes = 0; node->modification_counter = 0; node->flush_counter = 0; HASH_SEARCH(hash, system->spaces, id, space, space->id == id); if (!space) { ut_print_timestamp(stderr); fprintf(stderr," InnoDB: Error: Could not find tablespace %lu for\n""InnoDB: file ", (ulong) id); ut_print_filename(stderr, name); fputs(" in the tablespace memory cache.\n", stderr); mem_free(node->name);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -