📄 i2o_block.c
字号:
/* * I2O Random Block Storage Class OSM * * (C) Copyright 1999 Red Hat Software * * Written by Alan Cox, Building Number Three Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * This is a beta test release. Most of the good code was taken * from the nbd driver by Pavel Machek, who in turn took some of it * from loop.c. Isn't free software great for reusability 8) * * Fixes/additions: * Steve Ralston: * Multiple device handling error fixes, * Added a queue depth. * Alan Cox: * FC920 has an rmw bug. Dont or in the end marker. * Removed queue walk, fixed for 64bitness. * Deepak Saxena: * Independent queues per IOP * Support for dynamic device creation/deletion * Code cleanup * Support for larger I/Os through merge* functions * (taken from DAC960 driver) * Boji T Kannanthanam: * Reduced the timeout during RAID 5 creation. * This is to prevent race condition when a RAID volume * is created and immediately deleted. * * To do: * Serial number scanning to find duplicates for FC multipathing * Remove the random timeout in the code needed for RAID 5 * volume creation. */#include <linux/major.h>#include <linux/module.h>#include <linux/sched.h>#include <linux/fs.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/file.h>#include <linux/ioctl.h>#include <linux/i2o.h>#include <linux/blkdev.h>#include <linux/blkpg.h>#include <linux/malloc.h>#include <linux/hdreg.h>#include <linux/notifier.h>#include <linux/reboot.h>#include <asm/uaccess.h>#include <asm/semaphore.h>#include <asm/io.h>#include <asm/atomic.h>#include <linux/smp_lock.h>#include <linux/wait.h>#define MAJOR_NR I2O_MAJOR#include <linux/blk.h>#define MAX_I2OB 16#define MAX_I2OB_DEPTH 128#define MAX_I2OB_RETRIES 4//#define DRIVERDEBUG#ifdef DRIVERDEBUG#define DEBUG( s )#else#define DEBUG( s ) printk( s ) #endif/* * Events that this OSM is interested in */#define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \ I2O_EVT_IND_BSA_VOLUME_UNLOAD | \ I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \ I2O_EVT_IND_BSA_CAPACITY_CHANGE)/* * I2O Block Error Codes - should be in a header file really... */#define I2O_BSA_DSC_SUCCESS 0x0000#define I2O_BSA_DSC_MEDIA_ERROR 0x0001#define I2O_BSA_DSC_ACCESS_ERROR 0x0002#define I2O_BSA_DSC_DEVICE_FAILURE 0x0003#define I2O_BSA_DSC_DEVICE_NOT_READY 0x0004#define I2O_BSA_DSC_MEDIA_NOT_PRESENT 0x0005#define I2O_BSA_DSC_MEDIA_LOCKED 0x0006#define I2O_BSA_DSC_MEDIA_FAILURE 0x0007#define I2O_BSA_DSC_PROTOCOL_FAILURE 0x0008#define I2O_BSA_DSC_BUS_FAILURE 0x0009#define I2O_BSA_DSC_ACCESS_VIOLATION 0x000A#define I2O_BSA_DSC_WRITE_PROTECTED 0x000B#define I2O_BSA_DSC_DEVICE_RESET 0x000C#define I2O_BSA_DSC_VOLUME_CHANGED 0x000D#define I2O_BSA_DSC_TIMEOUT 0x000E/* * Some of these can be made smaller later */static int i2ob_blksizes[MAX_I2OB<<4];static int i2ob_hardsizes[MAX_I2OB<<4];static int i2ob_sizes[MAX_I2OB<<4];static int i2ob_media_change_flag[MAX_I2OB];static u32 i2ob_max_sectors[MAX_I2OB<<4];static int i2ob_context;/* * I2O Block device descriptor */struct i2ob_device{ struct i2o_controller *controller; struct i2o_device *i2odev; int unit; int tid; int flags; int refcnt; struct request *head, *tail; request_queue_t *req_queue; int max_segments; int done_flag;};/* * FIXME: * We should cache align these to avoid ping-ponging lines on SMP * boxes under heavy I/O load... */struct i2ob_request{ struct i2ob_request *next; struct request *req; int num;};/* * Per IOP requst queue information * * We have a separate requeust_queue_t per IOP so that a heavilly * loaded I2O block device on an IOP does not starve block devices * across all I2O controllers. * */struct i2ob_iop_queue{ atomic_t queue_depth; struct i2ob_request request_queue[MAX_I2OB_DEPTH]; struct i2ob_request *i2ob_qhead; request_queue_t req_queue;};static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS] = {NULL};/* * Each I2O disk is one of these. */static struct i2ob_device i2ob_dev[MAX_I2OB<<4];static int i2ob_dev_count = 0;static struct hd_struct i2ob[MAX_I2OB<<4];static struct gendisk i2ob_gendisk; /* Declared later *//* * Mutex and spin lock for event handling synchronization * evt_msg contains the last event. */DECLARE_MUTEX(i2ob_evt_sem);static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;static unsigned int evt_msg[MSG_FRAME_SIZE>>2];DECLARE_WAIT_QUEUE_HEAD(i2ob_evt_wait);static struct timer_list i2ob_timer;static int i2ob_timer_started = 0;static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *, struct i2o_message *);static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);static void i2ob_reboot_event(void);static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);static void i2ob_end_request(struct request *);static void i2ob_request(request_queue_t *);static int i2ob_init_iop(unsigned int);static request_queue_t* i2ob_get_queue(kdev_t);static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);static int do_i2ob_revalidate(kdev_t, int);static int i2ob_evt(void *);static int evt_pid = 0;static int evt_running = 0;/* * I2O OSM registration structure...keeps getting bigger and bigger :) */static struct i2o_handler i2o_block_handler ={ i2o_block_reply, i2ob_new_device, i2ob_del_device, i2ob_reboot_event, "I2O Block OSM", 0, I2O_CLASS_RANDOM_BLOCK_STORAGE};/* * Get a message */static u32 i2ob_get(struct i2ob_device *dev){ struct i2o_controller *c=dev->controller; return I2O_POST_READ32(c);} /* * Turn a Linux block request into an I2O block read/write. */static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit){ struct i2o_controller *c = dev->controller; int tid = dev->tid; unsigned long msg; unsigned long mptr; u64 offset; struct request *req = ireq->req; struct buffer_head *bh = req->bh; int count = req->nr_sectors<<9; char *last = NULL; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); /* Map the message to a virtual address */ msg = c->mem_offset + m; /* * Build the message based on the request. */ __raw_writel(i2ob_context|(unit<<8), msg+8); __raw_writel(ireq->num, msg+12); __raw_writel(req->nr_sectors << 9, msg+20); /* This can be optimised later - just want to be sure its right for starters */ offset = ((u64)(req->sector+base)) << 9; __raw_writel( offset & 0xFFFFFFFF, msg+24); __raw_writel(offset>>32, msg+28); mptr=msg+32; if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); /* We don't yet do cache/readahead and other magic */ __raw_writel(1<<16, msg+16); while(bh!=NULL) { if(bh->b_data == last) { size += bh->b_size; last += bh->b_size; if(bh->b_reqnext) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { if(bh->b_reqnext) __raw_writel(0x10000000|(bh->b_size), mptr); else __raw_writel(0xD0000000|(bh->b_size), mptr); __raw_writel(virt_to_bus(bh->b_data), mptr+4); mptr += 8; size = bh->b_size; last = bh->b_data + size; } count -= bh->b_size; bh = bh->b_reqnext; } } else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); /* * Allow replies to come back once data is cached in the controller * This allows us to handle writes quickly thus giving more of the * queue to reads. */ __raw_writel(0x00000010, msg+16); while(bh!=NULL) { if(bh->b_data == last) { size += bh->b_size; last += bh->b_size; if(bh->b_reqnext) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { if(bh->b_reqnext) __raw_writel(0x14000000|(bh->b_size), mptr); else __raw_writel(0xD4000000|(bh->b_size), mptr); __raw_writel(virt_to_bus(bh->b_data), mptr+4); mptr += 8; size = bh->b_size; last = bh->b_data + size; } count -= bh->b_size; bh = bh->b_reqnext; } } __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg); if(req->current_nr_sectors > i2ob_max_sectors[unit]) printk("Gathered sectors %ld.\n", req->current_nr_sectors); if(count != 0) { printk(KERN_ERR "Request count botched by %d.\n", count); } i2o_post_message(c,m); atomic_inc(&i2ob_queues[c->unit]->queue_depth); return 0;}/* * Remove a request from the _locked_ request list. We update both the * list chain and if this is the last item the tail pointer. Caller * must hold the lock. */ static inline void i2ob_unhook_request(struct i2ob_request *ireq, unsigned int iop){ ireq->next = i2ob_queues[iop]->i2ob_qhead; i2ob_queues[iop]->i2ob_qhead = ireq;}/* * Request completion handler */ static inline void i2ob_end_request(struct request *req){ /* * Loop until all of the buffers that are linked * to this request have been marked updated and * unlocked. */ while (end_that_request_first( req, !req->errors, "i2o block" )); /* * It is now ok to complete the request. */ end_that_request_last( req );}/* * Request merging functions */static inline int i2ob_new_segment(request_queue_t *q, struct request *req, int __max_segments){ int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; if (__max_segments < max_segments) max_segments = __max_segments; if (req->nr_segments < max_segments) { req->nr_segments++; q->elevator.nr_segments++; return 1; } return 0;}static int i2ob_back_merge(request_queue_t *q, struct request *req, struct buffer_head *bh, int __max_segments){ if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) return 1; return i2ob_new_segment(q, req, __max_segments);}static int i2ob_front_merge(request_queue_t *q, struct request *req, struct buffer_head *bh, int __max_segments){ if (bh->b_data + bh->b_size == req->bh->b_data) return 1; return i2ob_new_segment(q, req, __max_segments);}static int i2ob_merge_requests(request_queue_t *q, struct request *req, struct request *next, int __max_segments){ int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; int total_segments = req->nr_segments + next->nr_segments; int same_segment; if (__max_segments < max_segments) max_segments = __max_segments; same_segment = 0; if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) { total_segments--; same_segment = 1; } if (total_segments > max_segments) return 0; q->elevator.nr_segments -= same_segment; req->nr_segments = total_segments; return 1;}/* * OSM reply handler. This gets all the message replies */static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg){ unsigned long flags; struct i2ob_request *ireq = NULL; u8 st; u32 *m = (u32 *)msg; u8 unit = (m[2]>>8)&0xF0; /* low 4 bits are partition */ struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)]; /* * FAILed message */ if(m[0] & (1<<13)) { /* * FAILed message from controller * We increment the error count and abort it * * In theory this will never happen. The I2O block class * speficiation states that block devices never return * FAILs but instead use the REQ status field...but * better be on the safe side since no one really follows * the spec to the book :) */ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; spin_lock_irqsave(&io_request_lock, flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); spin_unlock_irqrestore(&io_request_lock, flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; m[0]|=(I2O_CMD_UTIL_NOP)<<24; i2o_post_message(c,virt_to_bus(m)); return; } if(msg->function == I2O_CMD_UTIL_EVT_REGISTER) { spin_lock(&i2ob_evt_lock); memcpy(&evt_msg, m, msg->size); spin_unlock(&i2ob_evt_lock); wake_up_interruptible(&i2ob_evt_wait); return; } if(!dev->i2odev) { /* * This is HACK, but Intel Integrated RAID allows user * to delete a volume that is claimed, locked, and in use * by the OS. We have to check for a reply from a * non-existent device and flag it as an error or the system * goes kaput... */ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); spin_lock_irqsave(&io_request_lock, flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); spin_unlock_irqrestore(&io_request_lock, flags); return; } /* * Lets see what is cooking. We stuffed the * request in the context. */ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; st=m[4]>>24; if(st!=0) { char *bsa_errors[] = { "Success", "Media Error", "Failure communicating to device", "Device Failure", "Device is not ready", "Media not present", "Media is locked by another user", "Media has failed", "Failure communicating to device", "Device bus failure", "Device is locked by another user", "Device is write protected", "Device has reset", "Volume has changed, waiting for acknowledgement" }; printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF ); printk("\n"); ireq->req->errors++; } else ireq->req->errors = 0; /* * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ spin_lock_irqsave(&io_request_lock, flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); /* * We may be able to do more I/O */ i2ob_request(dev->req_queue); spin_unlock_irqrestore(&io_request_lock, flags);}/* * Event handler. Needs to be a separate thread b/c we may have * to do things like scan a partition table, or query parameters * which cannot be done from an interrupt or from a bottom half. */static int i2ob_evt(void *dummy){ unsigned int evt; unsigned int flags; int unit; int i; lock_kernel(); daemonize(); unlock_kernel();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -