This should fix it:
diff -urN 2.4.13pre6/fs/block_dev.c o_direct/fs/block_dev.c
--- 2.4.13pre6/fs/block_dev.c Sun Oct 21 20:03:47 2001
+++ o_direct/fs/block_dev.c Tue Oct 23 14:18:35 2001
@@ -113,6 +113,11 @@
return 0;
}
+static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
+{
+ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block);
+}
+
static int blkdev_writepage(struct page * page)
{
return block_write_full_page(page, blkdev_get_block);
@@ -632,6 +637,7 @@
sync_page: block_sync_page,
prepare_write: blkdev_prepare_write,
commit_write: blkdev_commit_write,
+ direct_IO: blkdev_direct_IO,
};
struct file_operations def_blk_fops = {
diff -urN 2.4.13pre6/fs/buffer.c o_direct/fs/buffer.c
--- 2.4.13pre6/fs/buffer.c Sun Oct 21 20:03:47 2001
+++ o_direct/fs/buffer.c Tue Oct 23 14:18:35 2001
@@ -1942,6 +1942,47 @@
return tmp.b_blocknr;
}
+int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block)
+{
+ int i, nr_blocks, retval;
+ unsigned long * blocks = iobuf->blocks;
+
+ nr_blocks = iobuf->length / blocksize;
+ /* build the blocklist */
+ for (i = 0; i < nr_blocks; i++, blocknr++) {
+ struct buffer_head bh;
+
+ bh.b_state = 0;
+ bh.b_dev = inode->i_dev;
+ bh.b_size = blocksize;
+
+ retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
+ if (retval)
+ goto out;
+
+ if (rw == READ) {
+ if (buffer_new(&bh))
+ BUG();
+ if (!buffer_mapped(&bh)) {
+ /* there was an hole in the filesystem */
+ blocks[i] = -1UL;
+ continue;
+ }
+ } else {
+ if (buffer_new(&bh))
+ unmap_underlying_metadata(&bh);
+ if (!buffer_mapped(&bh))
+ BUG();
+ }
+ blocks[i] = bh.b_blocknr;
+ }
+
+ retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
+
+ out:
+ return retval;
+}
+
/*
* IO completion routine for a buffer_head being used for kiobuf IO: we
* can't dispatch the kiobuf callback until io_count reaches 0.
diff -urN 2.4.13pre6/fs/ext2/inode.c o_direct/fs/ext2/inode.c
--- 2.4.13pre6/fs/ext2/inode.c Sun Oct 21 20:03:47 2001
+++ o_direct/fs/ext2/inode.c Tue Oct 23 14:18:35 2001
@@ -592,13 +592,18 @@
{
return generic_block_bmap(mapping,block,ext2_get_block);
}
+static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
+{
+ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
+}
struct address_space_operations ext2_aops = {
readpage: ext2_readpage,
writepage: ext2_writepage,
sync_page: block_sync_page,
prepare_write: ext2_prepare_write,
commit_write: generic_commit_write,
- bmap: ext2_bmap
+ bmap: ext2_bmap,
+ direct_IO: ext2_direct_IO,
};
/*
diff -urN 2.4.13pre6/include/linux/fs.h o_direct/include/linux/fs.h
--- 2.4.13pre6/include/linux/fs.h Sun Oct 21 20:03:51 2001
+++ o_direct/include/linux/fs.h Tue Oct 23 14:18:35 2001
@@ -1368,6 +1368,7 @@
int generic_block_bmap(struct address_space *, long, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
+extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
extern int waitfor_one_page(struct page*);
diff -urN 2.4.13pre6/kernel/ksyms.c o_direct/kernel/ksyms.c
--- 2.4.13pre6/kernel/ksyms.c Sun Oct 21 20:03:52 2001
+++ o_direct/kernel/ksyms.c Tue Oct 23 14:18:47 2001
@@ -199,6 +199,7 @@
EXPORT_SYMBOL(unlock_buffer);
EXPORT_SYMBOL(__wait_on_buffer);
EXPORT_SYMBOL(___wait_on_page);
+EXPORT_SYMBOL(generic_direct_IO);
EXPORT_SYMBOL(block_write_full_page);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_prepare_write);
diff -urN 2.4.13pre6/mm/filemap.c o_direct/mm/filemap.c
--- 2.4.13pre6/mm/filemap.c Sun Oct 21 20:03:52 2001
+++ o_direct/mm/filemap.c Tue Oct 23 14:18:35 2001
@@ -1356,6 +1356,87 @@
UPDATE_ATIME(inode);
}
+static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
+{
+ ssize_t retval;
+ int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
+ struct kiobuf * iobuf;
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct address_space * mapping = inode->i_mapping;
+
+ new_iobuf = 0;
+ iobuf = filp->f_iobuf;
+ if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
+ /*
+ * A parallel read/write is using the preallocated iobuf
+ * so just run slow and allocate a new one.
+ */
+ retval = alloc_kiovec(1, &iobuf);
+ if (retval)
+ goto out;
+ new_iobuf = 1;
+ }
+
+ blocksize = 1 << inode->i_blkbits;
+ blocksize_bits = inode->i_blkbits;
+ blocksize_mask = blocksize - 1;
+ chunk_size = KIO_MAX_ATOMIC_IO << 10;
+
+ retval = -EINVAL;
+ if ((offset & blocksize_mask) || (count & blocksize_mask))
+ goto out_free;
+ if (!mapping->a_ops->direct_IO)
+ goto out_free;
+
+ /*
+ * Flush to disk exlusively the _data_, metadata must remains
+ * completly asynchronous or performance will go to /dev/null.
+ */
+ filemap_fdatasync(mapping);
+ retval = fsync_inode_data_buffers(inode);
+ filemap_fdatawait(mapping);
+ if (retval < 0)
+ goto out_free;
+
+ progress = retval = 0;
+ while (count > 0) {
+ iosize = count;
+ if (iosize > chunk_size)
+ iosize = chunk_size;
+
+ retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+ if (retval)
+ break;
+
+ retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
+
+ if (rw == READ && retval > 0)
+ mark_dirty_kiobuf(iobuf, retval);
+
+ if (retval >= 0) {
+ count -= retval;
+ buf += retval;
+ progress += retval;
+ }
+
+ unmap_kiobuf(iobuf);
+
+ if (retval != iosize)
+ break;
+ }
+
+ if (progress)
+ retval = progress;
+
+ out_free:
+ if (!new_iobuf)
+ clear_bit(0, &filp->f_iobuf_lock);
+ else
+ free_kiovec(1, &iobuf);
+ out:
+ return retval;
+}
+
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
char *kaddr;
@@ -1389,6 +1470,9 @@
if ((ssize_t) count < 0)
return -EINVAL;
+ if (filp->f_flags & O_DIRECT)
+ goto o_direct;
+
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
retval = 0;
@@ -1407,7 +1491,29 @@
retval = desc.error;
}
}
+ out:
return retval;
+
+ o_direct:
+ {
+ loff_t pos = *ppos, size;
+ struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
+ struct inode *inode = mapping->host;
+
+ retval = 0;
+ if (!count)
+ goto out; /* skip atime */
+ size = inode->i_size;
+ if (pos < size) {
+ if (pos + count > size)
+ count = size - pos;
+ retval = generic_file_direct_IO(READ, filp, buf, count, pos);
+ if (retval > 0)
+ *ppos = pos + retval;
+ }
+ UPDATE_ATIME(filp->f_dentry->d_inode);
+ goto out;
+ }
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
@@ -2660,7 +2766,8 @@
written = 0;
- if (file->f_flags & O_APPEND)
+ /* FIXME: this is for backwards compatibility with 2.4 */
+ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
pos = inode->i_size;
/*
@@ -2740,6 +2847,9 @@
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
+ if (file->f_flags & O_DIRECT)
+ goto o_direct;
+
do {
unsigned long index, offset;
long page_fault;
@@ -2814,6 +2924,7 @@
if ((status >= 0) && (file->f_flags & O_SYNC))
status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
+out_status:
err = written ? written : status;
out:
@@ -2822,6 +2933,25 @@
fail_write:
status = -EFAULT;
goto unlock;
+
+o_direct:
+ written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
+ if (written > 0) {
+ loff_t end = pos + written;
+ if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
+ inode->i_size = end;
+ mark_inode_dirty(inode);
+ }
+ *ppos = end;
+ invalidate_inode_pages2(mapping);
+ }
+ /*
+ * Sync the fs metadata but not the minor inode changes and
+ * of course not the data as we did direct DMA for the IO.
+ */
+ if (written >= 0 && file->f_flags & O_SYNC)
+ status = generic_osync_inode(inode, OSYNC_METADATA);
+ goto out_status;
}
void __init page_cache_init(unsigned long mempages)
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/