📄 jfs_dtree.c
字号:
/* * Copyright (C) International Business Machines Corp., 2000-2004 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * jfs_dtree.c: directory B+-tree manager * * B+-tree with variable length key directory: * * each directory page is structured as an array of 32-byte * directory entry slots initialized as a freelist * to avoid search/compaction of free space at insertion. * when an entry is inserted, a number of slots are allocated * from the freelist as required to store variable length data * of the entry; when the entry is deleted, slots of the entry * are returned to freelist. * * leaf entry stores full name as key and file serial number * (aka inode number) as data. * internal/router entry stores sufffix compressed name * as key and simple extent descriptor as data. * * each directory page maintains a sorted entry index table * which stores the start slot index of sorted entries * to allow binary search on the table. * * directory starts as a root/leaf page in on-disk inode * inline data area. * when it becomes full, it starts a leaf of a external extent * of length of 1 block. each time the first leaf becomes full, * it is extended rather than split (its size is doubled), * until its length becoms 4 KBytes, from then the extent is split * with new 4 Kbyte extent when it becomes full * to reduce external fragmentation of small directories. * * blah, blah, blah, for linear scan of directory in pieces by * readdir(). * * * case-insensitive directory file system * * names are stored in case-sensitive way in leaf entry. * but stored, searched and compared in case-insensitive (uppercase) order * (i.e., both search key and entry key are folded for search/compare): * (note that case-sensitive order is BROKEN in storage, e.g., * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad * * entries which folds to the same key makes up a equivalent class * whose members are stored as contiguous cluster (may cross page boundary) * but whose order is arbitrary and acts as duplicate, e.g., * abc, Abc, aBc, abC) * * once match is found at leaf, requires scan forward/backward * either for, in case-insensitive search, duplicate * or for, in case-sensitive search, for exact match * * router entry must be created/stored in case-insensitive way * in internal entry: * (right most key of left page and left most key of right page * are folded, and its suffix compression is propagated as router * key in parent) * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> * should be made the router key for the split) * * case-insensitive search: * * fold search key; * * case-insensitive search of B-tree: * for internal entry, router key is already folded; * for leaf entry, fold the entry key before comparison. * * if (leaf entry case-insensitive match found) * if (next entry satisfies case-insensitive match) * return EDUPLICATE; * if (prev entry satisfies case-insensitive match) * return EDUPLICATE; * return match; * else * return no match; * * serialization: * target directory inode lock is being held on entry/exit * of all main directory service routines. * * log based recovery: */#include <linux/fs.h>#include "jfs_incore.h"#include "jfs_superblock.h"#include "jfs_filsys.h"#include "jfs_metapage.h"#include "jfs_dmap.h"#include "jfs_unicode.h"#include "jfs_debug.h"/* dtree split parameter */struct dtsplit { struct metapage *mp; s16 index; s16 nslot; struct component_name *key; ddata_t *data; struct pxdlist *pxdlist;};#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)/* get page buffer for specified block address */#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\{\ BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ if (!(RC))\ {\ if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ {\ BT_PUTPAGE(MP);\ jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ MP = NULL;\ RC = -EIO;\ }\ }\}/* for consistency */#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)/* * forward references */static int dtSplitUp(tid_t tid, struct inode *ip, struct dtsplit * split, struct btstack * btstack);static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);static int dtExtendPage(tid_t tid, struct inode *ip, struct dtsplit * split, struct btstack * btstack);static int dtSplitRoot(tid_t tid, struct inode *ip, struct dtsplit * split, struct metapage ** rmpp);static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, dtpage_t * fp, struct btstack * btstack);static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);static int dtReadFirst(struct inode *ip, struct btstack * btstack);static int dtReadNext(struct inode *ip, loff_t * offset, struct btstack * btstack);static int dtCompare(struct component_name * key, dtpage_t * p, int si);static int ciCompare(struct component_name * key, dtpage_t * p, int si, int flag);static void dtGetKey(dtpage_t * p, int i, struct component_name * key, int flag);static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, int ri, struct component_name * key, int flag);static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, ddata_t * data, struct dt_lock **);static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, int do_index);static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);#define ciToUpper(c) UniStrupr((c)->name)/* * read_index_page() * * Reads a page of a directory's index table. * Having metadata mapped into the directory inode's address space * presents a multitude of problems. We avoid this by mapping to * the absolute address space outside of the *_metapage routines */static struct metapage *read_index_page(struct inode *inode, s64 blkno){ int rc; s64 xaddr; int xflag; s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); if (rc || (xlen == 0)) return NULL; return read_metapage(inode, xaddr, PSIZE, 1);}/* * get_index_page() * * Same as get_index_page(), but get's a new page without reading */static struct metapage *get_index_page(struct inode *inode, s64 blkno){ int rc; s64 xaddr; int xflag; s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); if (rc || (xlen == 0)) return NULL; return get_metapage(inode, xaddr, PSIZE, 1);}/* * find_index() * * Returns dtree page containing directory table entry for specified * index and pointer to its entry. * * mp must be released by caller. */static struct dir_table_slot *find_index(struct inode *ip, u32 index, struct metapage ** mp, s64 *lblock){ struct jfs_inode_info *jfs_ip = JFS_IP(ip); s64 blkno; s64 offset; int page_offset; struct dir_table_slot *slot; static int maxWarnings = 10; if (index < 2) { if (maxWarnings) { jfs_warn("find_entry called with index = %d", index); maxWarnings--; } return 0; } if (index >= jfs_ip->next_index) { jfs_warn("find_entry called with index >= next_index"); return 0; } if (jfs_ip->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)) { /* * Inline directory table */ *mp = 0; slot = &jfs_ip->i_dirtable[index - 2]; } else { offset = (index - 2) * sizeof(struct dir_table_slot); page_offset = offset & (PSIZE - 1); blkno = ((offset + 1) >> L2PSIZE) << JFS_SBI(ip->i_sb)->l2nbperpage; if (*mp && (*lblock != blkno)) { release_metapage(*mp); *mp = 0; } if (*mp == 0) { *lblock = blkno; *mp = read_index_page(ip, blkno); } if (*mp == 0) { jfs_err("free_index: error reading directory table"); return 0; } slot = (struct dir_table_slot *) ((char *) (*mp)->data + page_offset); } return slot;}static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, u32 index){ struct tlock *tlck; struct linelock *llck; struct lv *lv; tlck = txLock(tid, ip, mp, tlckDATA); llck = (struct linelock *) tlck->lock; if (llck->index >= llck->maxcnt) llck = txLinelock(llck); lv = &llck->lv[llck->index]; /* * Linelock slot size is twice the size of directory table * slot size. 512 entries per page. */ lv->offset = ((index - 2) & 511) >> 1; lv->length = 1; llck->index++;}/* * add_index() * * Adds an entry to the directory index table. This is used to provide * each directory entry with a persistent index in which to resume * directory traversals */static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot){ struct super_block *sb = ip->i_sb; struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_inode_info *jfs_ip = JFS_IP(ip); u64 blkno; struct dir_table_slot *dirtab_slot; u32 index; struct linelock *llck; struct lv *lv; struct metapage *mp; s64 offset; uint page_offset; struct tlock *tlck; s64 xaddr; ASSERT(DO_INDEX(ip)); if (jfs_ip->next_index < 2) { jfs_warn("add_index: next_index = %d. Resetting!", jfs_ip->next_index); jfs_ip->next_index = 2; } index = jfs_ip->next_index++; if (index <= MAX_INLINE_DIRTABLE_ENTRY) { /* * i_size reflects size of index table, or 8 bytes per entry. */ ip->i_size = (loff_t) (index - 1) << 3; /* * dir table fits inline within inode */ dirtab_slot = &jfs_ip->i_dirtable[index-2]; dirtab_slot->flag = DIR_INDEX_VALID; dirtab_slot->slot = slot; DTSaddress(dirtab_slot, bn); set_cflag(COMMIT_Dirtable, ip); return index; } if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { struct dir_table_slot temp_table[12]; /* * It's time to move the inline table to an external * page and begin to build the xtree */ if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) goto clean_up; /* No space */ /* * Save the table, we're going to overwrite it with the * xtree root */ memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); /* * Initialize empty x-tree */ xtInitRoot(tid, ip); /* * Allocate the first block & add it to the xtree */ if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { /* This really shouldn't fail */ jfs_warn("add_index: xtInsert failed!"); memcpy(&jfs_ip->i_dirtable, temp_table, sizeof (temp_table)); goto clean_up; } ip->i_size = PSIZE; ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); if ((mp = get_index_page(ip, 0)) == 0) { jfs_err("add_index: get_metapage failed!"); xtTruncate(tid, ip, 0, COMMIT_PWMAP); memcpy(&jfs_ip->i_dirtable, temp_table, sizeof (temp_table)); goto clean_up; } tlck = txLock(tid, ip, mp, tlckDATA); llck = (struct linelock *) & tlck->lock; ASSERT(llck->index == 0); lv = &llck->lv[0]; lv->offset = 0; lv->length = 6; /* tlckDATA slot size is 16 bytes */ llck->index++; memcpy(mp->data, temp_table, sizeof(temp_table)); mark_metapage_dirty(mp); release_metapage(mp); /* * Logging is now directed by xtree tlocks */ clear_cflag(COMMIT_Dirtable, ip); } offset = (index - 2) * sizeof(struct dir_table_slot); page_offset = offset & (PSIZE - 1); blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; if (page_offset == 0) { /* * This will be the beginning of a new page */ xaddr = 0; if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { jfs_warn("add_index: xtInsert failed!"); goto clean_up; } ip->i_size += PSIZE; ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage); if ((mp = get_index_page(ip, blkno))) memset(mp->data, 0, PSIZE); /* Just looks better */ else xtTruncate(tid, ip, offset, COMMIT_PWMAP); } else mp = read_index_page(ip, blkno); if (mp == 0) { jfs_err("add_index: get/read_metapage failed!"); goto clean_up; } lock_index(tid, ip, mp, index); dirtab_slot = (struct dir_table_slot *) ((char *) mp->data + page_offset); dirtab_slot->flag = DIR_INDEX_VALID; dirtab_slot->slot = slot; DTSaddress(dirtab_slot, bn); mark_metapage_dirty(mp); release_metapage(mp); return index; clean_up: jfs_ip->next_index--; return 0;}/* * free_index() * * Marks an entry to the directory index table as free. */static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next){ struct dir_table_slot *dirtab_slot; s64 lblock; struct metapage *mp = 0; dirtab_slot = find_index(ip, index, &mp, &lblock); if (dirtab_slot == 0) return; dirtab_slot->flag = DIR_INDEX_FREE; dirtab_slot->slot = dirtab_slot->addr1 = 0; dirtab_slot->addr2 = cpu_to_le32(next); if (mp) { lock_index(tid, ip, mp, index); mark_metapage_dirty(mp); release_metapage(mp); } else set_cflag(COMMIT_Dirtable, ip);}/* * modify_index() * * Changes an entry in the directory index table */static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, int slot, struct metapage ** mp, u64 *lblock){ struct dir_table_slot *dirtab_slot; dirtab_slot = find_index(ip, index, mp, lblock); if (dirtab_slot == 0) return; DTSaddress(dirtab_slot, bn); dirtab_slot->slot = slot; if (*mp) { lock_index(tid, ip, *mp, index); mark_metapage_dirty(*mp); } else set_cflag(COMMIT_Dirtable, ip);}/* * read_index() * * reads a directory table slot */static int read_index(struct inode *ip, u32 index, struct dir_table_slot * dirtab_slot){ s64 lblock; struct metapage *mp = 0; struct dir_table_slot *slot; slot = find_index(ip, index, &mp, &lblock); if (slot == 0) { return -EIO;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -