File: raw-2.2.18.FULL.diff

package info (click to toggle)
kernel-patch-kiobuf 2.2.18-3
links: PTS
area: main
in suites: woody
size: 104 kB
ctags: 7
sloc: makefile: 30
file content (1117 lines) | stat: -rw-r--r-- 26,128 bytes
--- linux-2.2.18.raw/drivers/char/Makefile.~1~	Mon Dec 11 00:49:41 2000
+++ linux-2.2.18.raw/drivers/char/Makefile	Fri Dec 15 16:34:40 2000
@@ -20,7 +20,7 @@
 
 O_TARGET := char.o
 M_OBJS   :=
-O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
+O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
 OX_OBJS  := pty.o misc.o
 obj-y 	 :=
 obj-m	 :=
--- linux-2.2.18.raw/drivers/char/mem.c.~1~	Mon Dec 11 00:49:42 2000
+++ linux-2.2.18.raw/drivers/char/mem.c	Fri Dec 15 16:34:41 2000
@@ -17,6 +17,7 @@
 #include <linux/joystick.h>
 #include <linux/i2c.h>
 #include <linux/capability.h>
+#include <linux/raw.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -608,6 +609,7 @@
 	if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 	rand_initialize();
+	raw_init();
 #if defined (CONFIG_FB)
 	fbmem_init();
 #endif
--- linux-2.2.18.raw/drivers/char/raw.c.~1~	Fri Dec 15 16:34:41 2000
+++ linux-2.2.18.raw/drivers/char/raw.c	Fri Dec 15 16:34:41 2000
@@ -0,0 +1,388 @@
+/*
+ * linux/drivers/char/raw.c
+ *
+ * Front-end raw character devices.  These can be bound to any block
+ * devices to provide genuine Unix raw character device semantics.
+ *
+ * We reserve minor number 0 for a control interface.  ioctl()s on this
+ * device are used to bind the other minor numbers to block devices.
+ */
+
+#include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/raw.h>
+#include <asm/uaccess.h>
+
+#define dprintk(x...) 
+
+static kdev_t raw_device_bindings[256] = {};
+static int raw_device_inuse[256] = {};
+static int raw_device_sector_size[256] = {};
+static int raw_device_sector_bits[256] = {};
+
+extern struct file_operations * get_blkfops(unsigned int major);
+
+static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
+
+ssize_t	raw_read(struct file *, char *, size_t, loff_t *);
+ssize_t	raw_write(struct file *, const char *, size_t, loff_t *);
+int	raw_open(struct inode *, struct file *);
+int	raw_release(struct inode *, struct file *);
+int	raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+
+static struct file_operations raw_fops = {
+	NULL,		/* llseek */
+	raw_read,	/* read */
+	raw_write,	/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	NULL,		/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	raw_release,	/* release */
+	NULL		/* fsync */
+};
+
+static struct file_operations raw_ctl_fops = {
+	NULL,		/* llseek */
+	NULL,		/* read */
+	NULL,		/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	raw_ctl_ioctl,	/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	NULL,		/* no special release code */
+	NULL		/* fsync */
+};
+
+
+
+void __init raw_init(void)
+{
+	register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+}
+
+
+/*
+ * The raw IO open and release code needs to fake appropriate
+ * open/release calls to the underlying block devices.  
+ */
+
+static int bdev_open(kdev_t dev, int mode)
+{
+	int err = 0;
+	struct file dummy_file = {};
+	struct dentry dummy_dentry = {};
+	struct inode * inode = get_empty_inode();
+	
+	if (!inode)
+		return -ENOMEM;
+	
+	dummy_file.f_op = get_blkfops(MAJOR(dev));
+	if (!dummy_file.f_op) {
+		err = -ENODEV;
+		goto done;
+	}
+	
+	if (dummy_file.f_op->open) {
+		inode->i_rdev = dev;
+		dummy_dentry.d_inode = inode;
+		dummy_file.f_dentry = &dummy_dentry;
+		dummy_file.f_mode = mode;
+		err = dummy_file.f_op->open(inode, &dummy_file);
+	}
+
+ done:
+	iput(inode);
+	return err;
+}
+
+static int bdev_close(kdev_t dev)
+{
+	int err;
+	struct inode * inode = get_empty_inode();
+
+	if (!inode)
+		return -ENOMEM;
+	
+	inode->i_rdev = dev;
+	err = blkdev_release(inode);
+	iput(inode);
+	return err;
+}
+
+
+
+/* 
+ * Open/close code for raw IO.
+ */
+
+int raw_open(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	int err;
+	int sector_size;
+	int sector_bits;
+
+	minor = MINOR(inode->i_rdev);
+	
+	/* 
+	 * Is it the control device? 
+	 */
+	
+	if (minor == 0) {
+		filp->f_op = &raw_ctl_fops;
+		return 0;
+	}
+	
+	/*
+	 * No, it is a normal raw device.  All we need to do on open is
+	 * to check that the device is bound, and force the underlying
+	 * block device to a sector-size blocksize. 
+	 */
+
+	bdev = raw_device_bindings[minor];
+	if (bdev == NODEV) 
+		return -ENODEV;
+
+	err = bdev_open(bdev, filp->f_mode);
+	if (err)
+		return err;
+	
+	/*
+	 * Don't change the blocksize if we already have users using
+	 * this device 
+	 */
+
+	if (raw_device_inuse[minor]++)
+		return 0;
+	
+	/* 
+	 * Don't interfere with mounted devices: we cannot safely set
+	 * the blocksize on a device which is already mounted.  
+	 */
+	
+	sector_size = 512;
+	if (lookup_vfsmnt(bdev) != NULL) {
+		if (blksize_size[MAJOR(bdev)])
+			sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
+	} else {
+		if (hardsect_size[MAJOR(bdev)])
+			sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
+	}
+
+	set_blocksize(bdev, sector_size);
+	raw_device_sector_size[minor] = sector_size;
+
+	for (sector_bits = 0; !(sector_size & 1); )
+		sector_size>>=1, sector_bits++;
+	raw_device_sector_bits[minor] = sector_bits;
+	
+	return 0;
+}
+
+int raw_release(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	
+	minor = MINOR(inode->i_rdev);
+	bdev = raw_device_bindings[minor];
+	bdev_close(bdev);
+	raw_device_inuse[minor]--;
+	return 0;
+}
+
+
+
+/*
+ * Deal with ioctls against the raw-device control interface, to bind
+ * and unbind other raw devices.  
+ */
+
+int raw_ctl_ioctl(struct inode *inode, 
+		  struct file *flip,
+		  unsigned int command, 
+		  unsigned long arg)
+{
+	struct raw_config_request rq;
+	int err = 0;
+	int minor;
+	
+	switch (command) {
+	case RAW_SETBIND:
+	case RAW_GETBIND:
+
+		/* First, find out which raw minor we want */
+
+		err = copy_from_user(&rq, (void *) arg, sizeof(rq));
+		if (err)
+			break;
+		
+		minor = rq.raw_minor;
+		if (minor == 0 || minor > MINORMASK) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (command == RAW_SETBIND) {
+			/* 
+			 * For now, we don't need to check that the underlying
+			 * block device is present or not: we can do that when
+			 * the raw device is opened.  Just check that the
+			 * major/minor numbers make sense. 
+			 */
+
+			if ((rq.block_major == NODEV && 
+			     rq.block_minor != NODEV) ||
+			    rq.block_major > MAX_BLKDEV ||
+			    rq.block_minor > MINORMASK) {
+				err = -EINVAL;
+				break;
+			}
+			
+			if (raw_device_inuse[minor]) {
+				err = -EBUSY;
+				break;
+			}
+			raw_device_bindings[minor] = 
+				MKDEV(rq.block_major, rq.block_minor);
+		} else {
+			rq.block_major = MAJOR(raw_device_bindings[minor]);
+			rq.block_minor = MINOR(raw_device_bindings[minor]);
+			err = copy_to_user((void *) arg, &rq, sizeof(rq));
+		}
+		break;
+		
+	default:
+		err = -EINVAL;
+	}
+	
+	return err;
+}
+
+
+
+ssize_t	raw_read(struct file *filp, char * buf, 
+		 size_t size, loff_t *offp)
+{
+	return rw_raw_dev(READ, filp, buf, size, offp);
+}
+
+ssize_t	raw_write(struct file *filp, const char *buf, 
+		  size_t size, loff_t *offp)
+{
+	return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
+}
+
+#define SECTOR_BITS 9
+#define SECTOR_SIZE (1U << SECTOR_BITS)
+#define SECTOR_MASK (SECTOR_SIZE - 1)
+
+ssize_t	rw_raw_dev(int rw, struct file *filp, char *buf, 
+		   size_t size, loff_t *offp)
+{
+	struct kiobuf * iobuf;
+	int		err;
+	unsigned long	blocknr, blocks;
+	unsigned long	b[KIO_MAX_SECTORS];
+	size_t		transferred;
+	int		iosize;
+	int		i;
+	int		minor;
+	kdev_t		dev;
+	unsigned long	limit;
+
+	int		sector_size, sector_bits, sector_mask;
+	int		max_sectors;
+	
+	/*
+	 * First, a few checks on device size limits 
+	 */
+
+	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
+	dev = raw_device_bindings[minor];
+	sector_size = raw_device_sector_size[minor];
+	sector_bits = raw_device_sector_bits[minor];
+	sector_mask = sector_size- 1;
+	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
+	
+	if (blk_size[MAJOR(dev)])
+		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
+	else
+		limit = INT_MAX;
+	dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
+		 MAJOR(dev), MINOR(dev), limit);
+	
+	if ((*offp & sector_mask) || (size & sector_mask))
+		return -EINVAL;
+	if ((*offp >> sector_bits) >= limit)  {
+		if (size)
+			return -ENXIO;
+		return 0;
+	}
+
+	/* 
+	 * We'll just use one kiobuf
+	 */
+
+	err = alloc_kiovec(1, &iobuf);
+	if (err)
+		return err;
+
+	/*
+	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
+	 * unmapping the single kiobuf as we go to perform each chunk of
+	 * IO.  
+	 */
+
+	transferred = 0;
+	blocknr = *offp >> sector_bits;
+	while (size > 0) {
+		blocks = size >> sector_bits;
+		if (blocks > max_sectors)
+			blocks = max_sectors;
+		if (blocks > limit - blocknr)
+			blocks = limit - blocknr;
+		if (!blocks)
+			break;
+
+		iosize = blocks << sector_bits;
+		
+		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+		if (err)
+			break;
+		
+		for (i=0; i < blocks; i++)
+ 			b[i] = blocknr++;
+		
+		err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size, 0);
+
+		if (err >= 0) {
+			transferred += err;
+			size -= err;
+			buf += err;
+		}
+
+		unmap_kiobuf(iobuf);
+
+		if (err != iosize)
+			break;
+	}
+	
+	free_kiovec(1, &iobuf);
+
+	if (transferred) {
+		*offp += transferred;
+		return transferred;
+	}
+	
+	return err;
+}
--- linux-2.2.18.raw/fs/buffer.c.~1~	Mon Dec 11 00:49:44 2000
+++ linux-2.2.18.raw/fs/buffer.c	Fri Dec 15 16:34:41 2000
@@ -43,6 +43,7 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/quotaops.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1259,6 +1260,180 @@
 bad_count:
 	printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
 	return;
+}
+
+
+/*
+ * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
+ * for them to complete.  Clean up the buffer_heads afterwards.  
+ */
+
+#define dprintk(x...)
+
+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
+{
+	int iosize;
+	int i;
+	int err;
+	struct buffer_head *tmp;
+
+	dprintk ("do_kio start\n");
+	
+	ll_rw_block(rw, nr, bh);
+	iosize = err = 0;
+	
+	for (i = nr; --i >= 0; ) {
+		tmp = bh[i];
+		wait_on_buffer(tmp);
+		if (!buffer_uptodate(tmp)) {
+			err = -EIO;
+			/* We are waiting on bh'es in reverse order so
+                           clearing iosize on error calculates the
+                           amount of IO before the first error. */
+			iosize = 0;
+		}
+		
+		put_unused_buffer_head(tmp);
+		iosize += size;
+	}
+	wake_up(&buffer_wait);
+	
+	dprintk ("do_kio end %d %d\n", iosize, err);
+	
+	if (iosize)
+		return iosize;
+	else
+		return err;
+}
+
+/*
+ * Start I/O on a physical range of kernel memory, defined by a vector
+ * of kiobuf structs (much like a user-space iovec list).
+ *
+ * IO is submitted asynchronously: you need to check page->locked,
+ * page->uptodate, and maybe wait on page->wait.
+ *
+ * It is up to the caller to make sure that there are enough blocks
+ * passed in to completely map the iobufs to disk.  */
+
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+	       kdev_t dev, unsigned long b[], int size, int bmap)
+{
+	int		err;
+	int		length;
+	int		transferred;
+	int		i;
+	int		bufind;
+	int		pageind;
+	int		bhind;
+	int		offset;
+	unsigned long	blocknr;
+	struct kiobuf *	iobuf = NULL;
+	unsigned long	page;
+	struct page *	map;
+	struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
+
+	/* 
+	 * First, do some alignment and validity checks 
+	 */
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		if ((iobuf->offset & (size-1)) ||
+		    (iobuf->length & (size-1)))
+			return -EINVAL;
+		if (!iobuf->nr_pages)
+			panic("brw_kiovec: iobuf not initialised");
+	}
+
+	/* DEBUG */
+#if 0
+	return iobuf->length;
+#endif
+	dprintk ("brw_kiovec: start\n");
+	
+	/* 
+	 * OK to walk down the iovec doing page IO on each page we find. 
+	 */
+	bufind = bhind = transferred = err = 0;
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		offset = iobuf->offset;
+		length = iobuf->length;
+		dprintk ("iobuf %d %d %d\n", offset, length, size);
+
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			page = iobuf->pagelist[pageind];
+			map  = iobuf->maplist[pageind];
+
+			while (length > 0) {
+				blocknr = b[bufind++];
+				tmp = get_unused_buffer_head(0);
+				if (!tmp) {
+					err = -ENOMEM;
+					goto error;
+				}
+				
+				tmp->b_dev = B_FREE;
+				tmp->b_size = size;
+				tmp->b_data = (char *) (page + offset);
+				tmp->b_this_page = tmp;
+
+				init_buffer(tmp, dev, blocknr,
+					    end_buffer_io_sync, NULL);
+				if (rw == WRITE) {
+					set_bit(BH_Uptodate, &tmp->b_state);
+					set_bit(BH_Dirty, &tmp->b_state);
+				}
+
+				dprintk ("buffer %d (%d) at %p\n", 
+					 bhind, tmp->b_blocknr, tmp->b_data);
+				bh[bhind++] = tmp;
+				length -= size;
+				offset += size;
+
+				/* 
+				 * Start the IO if we have got too much or if
+				 * this is the end of the last iobuf 
+				 */
+				if (bhind >= KIO_MAX_SECTORS) {
+					err = do_kio(rw, bhind, bh, size);
+					if (err >= 0)
+						transferred += err;
+					else
+						goto finished;
+					bhind = 0;
+				}
+				
+				if (offset >= PAGE_SIZE) {
+					offset = 0;
+					break;
+				}
+			} /* End of block loop */
+		} /* End of page loop */		
+	} /* End of iovec loop */
+
+	/* Is there any IO still left to submit? */
+	if (bhind) {
+		err = do_kio(rw, bhind, bh, size);
+		if (err >= 0)
+			transferred += err;
+		else
+			goto finished;
+	}
+
+ finished:
+	dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
+	if (transferred)
+		return transferred;
+	return err;
+
+ error:
+	/* We got an error allocation the bh'es.  Just free the current
+           buffer_heads and exit. */
+	for (i = 0; i < bhind; i++)
+		put_unused_buffer_head(bh[i]);
+	wake_up(&buffer_wait);
+	goto finished;
 }
 
 /*
--- linux-2.2.18.raw/fs/iobuf.c.~1~	Fri Dec 15 16:34:41 2000
+++ linux-2.2.18.raw/fs/iobuf.c	Fri Dec 15 16:34:41 2000
@@ -0,0 +1,106 @@
+/*
+ * iobuf.c
+ *
+ * Keep track of the general-purpose IO-buffer structures used to track
+ * abstract kernel-space io buffers.
+ * 
+ */
+
+#include <linux/iobuf.h>
+#include <linux/malloc.h>
+#include <linux/slab.h>
+
+static kmem_cache_t *kiobuf_cachep;
+
+void __init kiobuf_init(void)
+{
+	kiobuf_cachep =  kmem_cache_create("kiobuf",
+					   sizeof(struct kiobuf),
+					   0,
+					   SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!kiobuf_cachep)
+		panic("Cannot create kernel iobuf cache\n");
+}
+
+
+int alloc_kiovec(int nr, struct kiobuf **bufp)
+{
+	int i;
+	struct kiobuf *iobuf;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+		if (!iobuf) {
+			free_kiovec(i, bufp);
+			return -ENOMEM;
+		}
+		
+		memset(iobuf, 0, sizeof(*iobuf));
+		iobuf->array_len = KIO_STATIC_PAGES;
+		iobuf->pagelist  = iobuf->page_array;
+		iobuf->maplist   = iobuf->map_array;
+		*bufp++ = iobuf;
+	}
+	
+	return 0;
+}
+
+void free_kiovec(int nr, struct kiobuf **bufp) 
+{
+	struct kiobuf *iobuf;
+	int i;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = bufp[i];
+		if (iobuf->array_len > KIO_STATIC_PAGES) {
+			kfree (iobuf->pagelist);
+			kfree (iobuf->maplist);
+		}
+		kmem_cache_free(kiobuf_cachep, bufp[i]);
+	}
+}
+
+int expand_kiobuf(struct kiobuf *iobuf, int wanted)
+{
+	unsigned long *	pagelist;
+	struct page ** maplist;
+	
+	if (iobuf->array_len >= wanted)
+		return 0;
+	
+	pagelist = (unsigned long *) 
+		kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
+	if (!pagelist)
+		return -ENOMEM;
+	
+	maplist = (struct page **) 
+		kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
+	if (!maplist) {
+		kfree(pagelist);
+		return -ENOMEM;
+	}
+
+	/* Did it grow while we waited? */
+	if (iobuf->array_len >= wanted) {
+		kfree(pagelist);
+		kfree(maplist);
+		return 0;
+	}
+	
+	memcpy (pagelist, iobuf->pagelist, 
+		iobuf->array_len * sizeof(unsigned long));
+	memcpy (maplist,  iobuf->maplist,   
+		iobuf->array_len * sizeof(struct page **));
+
+	if (iobuf->array_len > KIO_STATIC_PAGES) {
+		kfree (iobuf->pagelist);
+		kfree (iobuf->maplist);
+	}
+	
+	iobuf->pagelist  = pagelist;
+	iobuf->maplist   = maplist;
+	iobuf->array_len = wanted;
+	return 0;
+}
+
+
--- linux-2.2.18.raw/fs/Makefile.~1~	Thu Aug 26 01:29:49 1999
+++ linux-2.2.18.raw/fs/Makefile	Fri Dec 15 16:34:41 2000
@@ -13,7 +13,7 @@
 O_OBJS    = open.o read_write.o devices.o file_table.o buffer.o \
 		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
-		dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) 
+		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) 
 
 MOD_LIST_NAME := FS_MODULES
 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
--- linux-2.2.18.raw/include/linux/iobuf.h.~1~	Fri Dec 15 16:34:41 2000
+++ linux-2.2.18.raw/include/linux/iobuf.h	Fri Dec 15 16:34:41 2000
@@ -0,0 +1,70 @@
+/*
+ * iobuf.h
+ *
+ * Defines the structures used to track abstract kernel-space io buffers.
+ *
+ */
+
+#ifndef __LINUX_IOBUF_H
+#define __LINUX_IOBUF_H
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+/*
+ * The kiobuf structure describes a physical set of pages reserved
+ * locked for IO.  The reference counts on each page will have been
+ * incremented, and the flags field will indicate whether or not we have
+ * pre-locked all of the pages for IO.
+ *
+ * kiobufs may be passed in arrays to form a kiovec, but we must
+ * preserve the property that no page is present more than once over the
+ * entire iovec.
+ */
+
+#define KIO_MAX_ATOMIC_IO	64 /* in kb */
+#define KIO_MAX_ATOMIC_BYTES	(64 * 1024)
+#define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10))
+#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
+
+struct kiobuf 
+{
+	int		nr_pages;	/* Pages actually referenced */
+	int		array_len;	/* Space in the allocated lists */
+	int		offset;		/* Offset to start of valid data */
+	int		length;		/* Number of valid bytes of data */
+
+	/* Keep separate track of the physical addresses and page
+	 * structs involved.  If we do IO to a memory-mapped device
+	 * region, there won't necessarily be page structs defined for
+	 * every address. */
+
+	unsigned long *	pagelist;
+	struct page **	maplist;
+
+	unsigned int	locked : 1;	/* If set, pages has been locked */
+	
+	/* Always embed enough struct pages for 64k of IO */
+	unsigned long	page_array[KIO_STATIC_PAGES];
+	struct page *	map_array[KIO_STATIC_PAGES];
+};
+
+
+/* mm/memory.c */
+
+int	map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+void	unmap_kiobuf(struct kiobuf *iobuf);
+
+/* fs/iobuf.c */
+
+void __init kiobuf_init(void);
+int	alloc_kiovec(int nr, struct kiobuf **);
+void	free_kiovec(int nr, struct kiobuf **);
+int	expand_kiobuf(struct kiobuf *, int);
+
+/* fs/buffer.c */
+
+int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+		   kdev_t dev, unsigned long b[], int size, int bmap);
+
+#endif /* __LINUX_IOBUF_H */
--- linux-2.2.18.raw/include/linux/major.h.~1~	Mon Dec 11 00:49:44 2000
+++ linux-2.2.18.raw/include/linux/major.h	Fri Dec 15 16:34:41 2000
@@ -126,6 +126,8 @@
 
 #define AURORA_MAJOR 79
 
+#define RAW_MAJOR	162
+
 #define UNIX98_PTY_MASTER_MAJOR	128
 #define UNIX98_PTY_MAJOR_COUNT	8
 #define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
--- linux-2.2.18.raw/include/linux/raw.h.~1~	Fri Dec 15 16:34:41 2000
+++ linux-2.2.18.raw/include/linux/raw.h	Fri Dec 15 16:34:41 2000
@@ -0,0 +1,23 @@
+#ifndef __LINUX_RAW_H
+#define __LINUX_RAW_H
+
+#include <linux/types.h>
+
+#define RAW_SETBIND	_IO( 0xac, 0 )
+#define RAW_GETBIND	_IO( 0xac, 1 )
+
+struct raw_config_request 
+{
+	int	raw_minor;
+	__u64	block_major;
+	__u64	block_minor;
+};
+
+#ifdef __KERNEL__
+
+/* drivers/char/raw.c */
+extern void raw_init(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_RAW_H */
--- linux-2.2.18.raw/init/main.c.~1~	Mon Dec 11 00:49:44 2000
+++ linux-2.2.18.raw/init/main.c	Fri Dec 15 16:34:41 2000
@@ -22,6 +22,7 @@
 #include <linux/smp_lock.h>
 #include <linux/blk.h>
 #include <linux/hdreg.h>
+#include <linux/iobuf.h>
 #include <linux/init.h>
 
 #include <asm/io.h>
@@ -1441,6 +1442,7 @@
 #ifdef CONFIG_ARCH_S390
 	ccwcache_init();
 #endif
+	kiobuf_init();
 	signals_init();
 	inode_init();
 	file_table_init();
--- linux-2.2.18.raw/kernel/ksyms.c.~1~	Mon Dec 11 00:49:44 2000
+++ linux-2.2.18.raw/kernel/ksyms.c	Fri Dec 15 16:34:41 2000
@@ -37,6 +37,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/capability.h>
+#include <linux/iobuf.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -252,6 +253,14 @@
 EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_segments);
 EXPORT_SYMBOL(max_readahead);
+
+/* kiobuf support */
+EXPORT_SYMBOL(map_user_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(free_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+EXPORT_SYMBOL(brw_kiovec);
 
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
--- linux-2.2.18.raw/mm/memory.c.~1~	Mon Dec 11 00:49:44 2000
+++ linux-2.2.18.raw/mm/memory.c	Fri Dec 15 16:34:41 2000
@@ -37,6 +37,8 @@
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -395,6 +397,221 @@
 		if (mm->rss < 0)
 			mm->rss = 0;
 	}
+}
+
+
+/*
+ * Do a quick page-table lookup for a single page. 
+ */
+static unsigned long get_page(unsigned long address, int write) 
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(current->mm, address);
+	pmd = pmd_offset(pgd, address);
+	if (pmd) {
+		pte_t * pte = pte_offset(pmd, address);
+		if (pte && pte_present(*pte)) {
+			if (!write ||
+			    (pte_write(*pte) && pte_dirty(*pte)))
+				return pte_page(*pte);
+		}
+	}
+	
+	return 0;
+}
+
+/* 
+ * Given a physical address, is there a useful struct page pointing to it?
+ */
+
+static struct page * get_page_map(unsigned long page)
+{
+	struct page *map;
+	
+	if (MAP_NR(page) >= max_mapnr)
+		return 0;
+	if (page == ZERO_PAGE(page))
+		return 0;
+	map = mem_map + MAP_NR(page);
+	if (PageReserved(map))
+		return 0;
+	return map;
+}
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin and lock the pages for IO.  
+ */
+
+#define dprintk(x...)
+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	unsigned long		ptr, end;
+	int			err;
+	struct mm_struct *	mm;
+	struct vm_area_struct *	vma;
+	unsigned long		page;
+	struct page *		map;
+	int			doublepage = 0;
+	int			repeat = 0;
+	int			i;
+	 /* if we read from disk it means we write to memory */
+	int			writemem = (rw == READ);
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	ptr = va & PAGE_MASK;
+	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+	if (err)
+		return err;
+
+ repeat:
+	down(&mm->mmap_sem);
+
+	err = -EFAULT;
+	vma = NULL;
+	iobuf->locked = writemem;
+	iobuf->offset = va & ~PAGE_MASK;
+	iobuf->length = len;
+	
+	i = 0;
+	
+	/* 
+	 * First of all, try to fault in all of the necessary pages
+	 */
+	while (ptr < end) {
+		if (!vma || ptr >= vma->vm_end) {
+			vma = find_vma(mm, ptr);
+			if (!vma)
+				goto out;
+			if (vma->vm_start > ptr) {
+				if (!(vma->vm_flags & VM_GROWSDOWN))
+					goto out;
+				if (expand_stack(vma, ptr))
+					goto out;
+			}
+			err = -EACCES;
+			if (writemem) {
+				if (!(vma->vm_flags & VM_WRITE))
+					goto out;
+			} else {
+				if (!(vma->vm_flags & VM_READ))
+					goto out;
+			}
+			err = -EFAULT;
+		}
+		while (!(page = get_page(ptr, writemem))) {
+			int ret;
+
+			ret = handle_mm_fault(current, vma, ptr, writemem);
+			if (ret <= 0) {
+				if (!ret)
+					goto out;
+				else {
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+		}
+		map = get_page_map(page);
+		if (map) {
+			if (writemem) {
+				/*
+				 * Lock down the pages only if we're going
+				 * to write to memory. If if we're reading
+				 * from memory we're free to go ahead
+				 * only after pinning the page on the
+				 * physical side.
+				 */
+				if (PageLocked(map))
+					goto retry;
+				set_bit(PG_locked, &map->flags);
+			}
+			flush_dcache_page(page_address(map));
+			atomic_inc(&map->count);
+		}
+		dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
+		iobuf->pagelist[i] = page;
+		iobuf->maplist[i] = map;
+		iobuf->nr_pages = ++i;
+		
+		ptr += PAGE_SIZE;
+	}
+
+	up(&mm->mmap_sem);
+	dprintk ("map_user_kiobuf: end OK\n");
+	return 0;
+
+ out:
+	up(&mm->mmap_sem);
+	unmap_kiobuf(iobuf);
+	dprintk ("map_user_kiobuf: end %d\n", err);
+	return err;
+
+ retry:
+
+	/* 
+	 * Undo the locking so far, wait on the page we got to, and try again.
+	 */
+	unmap_kiobuf(iobuf);
+	up(&mm->mmap_sem);
+	ptr = va & PAGE_MASK;
+
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(map)) {
+		/* If so, we may well have the page mapped twice in the
+		 * IO address range.  Bad news.  Of course, it _might_
+		 * just be a coincidence, but if it happens more than
+		 * once, chances are we have a double-mapped page. */
+		if (++doublepage >= 3) {
+			return -EINVAL;
+		}
+	}
+	
+	/*
+	 * Try again...
+	 */
+	wait_on_page(map);
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kiobuf (struct kiobuf *iobuf) 
+{
+	int i;
+	struct page *map;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		map = iobuf->maplist[i];
+		
+		if (map) {
+			if (iobuf->locked) {
+				clear_bit(PG_locked, &map->flags);
+				wake_up(&map->wait);
+			}
+			__free_page(map);
+		}
+	}
+	
+	iobuf->nr_pages = 0;
+	iobuf->locked = 0;
 }
 
 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,