The method is: open /dev/futex, use sys_futex(FUTEX_AWAIT) to attach
it to a particular futex, then use select or poll (or set the fd up
for sigio signals, and expect a SIGIO).
You need to use FUTEX_AWAIT again after poll succeeds or SIGIO
(ie. it's oneshot). Calling it while a futex is already outstanding
forgets about the old futex.
The reason for this method is that it's pretty convenient for
programs, and since each one pins a page down, tying that to a struct
file * means we have an implicit limit.
Code below. Feedback welcome.
Rusty.
-- Anyone who quotes me in their sig is an idiot. -- Rusty Russell.Name: Waker can unpin page, rather than waiting process Author: Rusty Russell Status: Tested in 2.5.20 Depends: Futex/copy-from-user.patch.gz Futex/unpin-page-fix.patch.gz Depends: Futex/waitq.patch.gz
D: This changes the implementation so that the waker actually unpins D: the page. This is preparation for the async interface, where the D: process which registered interest is not in the kernel.
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.19104/kernel/futex.c linux-2.5.20.19104.updated/kernel/futex.c --- linux-2.5.20.19104/kernel/futex.c Thu Jun 6 17:13:46 2002 +++ linux-2.5.20.19104.updated/kernel/futex.c Thu Jun 6 17:14:30 2002 @@ -98,11 +98,13 @@ if (this->page == page && this->offset == offset) { list_del_init(i); tell_waiter(this); + unpin_page(this->page); num_woken++; if (num_woken >= num) break; } } spin_unlock(&futex_lock); + unpin_page(page); return num_woken; } @@ -192,9 +194,10 @@ } out: set_current_state(TASK_RUNNING); - /* Were we woken up anyway? */ + /* Were we woken up anyway? If so, it unpinned page. */ if (!unqueue_me(&q)) return 0; + unpin_page(page); return ret; } @@ -225,6 +228,7 @@ if (IS_ERR(page)) return PTR_ERR(page); + /* On success, these routines unpin the pages themselves. */ head = hash_futex(page, pos_in_page); switch (op) { case FUTEX_WAIT: @@ -236,7 +240,8 @@ default: ret = -EINVAL; } - unpin_page(page); + if (ret < 0) + unpin_page(page); return ret; } Name: Asynchronous interface for futexes Author: Rusty Russell Status: Tested on 2.5.20 Depends: Futex/comment-fix.patch.gz Futex/copy-from-user.patch.gz Depends: Futex/no-write-needed.patch.gz Futex/unpin-page-fix.patch.gz Depends: Futex/waitq.patch.gz Futex/waker-unpin-page.patch.gz
D: This patch adds a FUTEX_AWAIT and /dev/futex, for attaching futexes D: to file descriptors, which can be used with poll, select or SIGIO.
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/include/linux/futex.h linux-2.5.20.15557.updated/include/linux/futex.h --- linux-2.5.20.15557/include/linux/futex.h Sat May 25 14:34:59 2002 +++ linux-2.5.20.15557.updated/include/linux/futex.h Wed Jun 5 22:01:44 2002 @@ -4,5 +4,6 @@ /* Second argument to futex syscall */ #define FUTEX_WAIT (0) #define FUTEX_WAKE (1) +#define FUTEX_AWAIT (2) #endif diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/kernel/futex.c linux-2.5.20.15557.updated/kernel/futex.c --- linux-2.5.20.15557/kernel/futex.c Wed Jun 5 22:01:41 2002 +++ linux-2.5.20.15557.updated/kernel/futex.c Wed Jun 5 22:02:09 2002 @@ -34,6 +34,10 @@ #include <linux/highmem.h> #include <linux/time.h> #include <linux/pagemap.h> +#include <linux/file.h> +#include <linux/slab.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/poll.h> #include <asm/uaccess.h> /* Simple "sleep if unchanged" interface. */ @@ -41,11 +45,18 @@ /* FIXME: This may be way too small. --RR */ #define FUTEX_HASHBITS 6 +extern void send_sigio(struct fown_struct *fown, int fd, int band); + /* We use this instead of a normal wait_queue_t, so we can wake only the relevent ones (hashed queues may be shared) */ struct futex_q { struct list_head list; wait_queue_head_t waiters; + + /* For AWAIT, sigio sent using these. */ + int fd; + struct file *filp; + /* Page struct and offset within it. */ struct page *page; unsigned int offset; @@ -54,6 +65,7 @@ /* The key for the hash is the address + index + offset within page */ static struct list_head futex_queues[1<<FUTEX_HASHBITS]; static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED; +extern struct file_operations futex_fops; static inline struct list_head *hash_futex(struct page *page, unsigned long offset) @@ -73,9 +85,12 @@ page_cache_release(page); } +/* Waiter may be sitting in FUTEX_WAIT or poll, or async */ static inline void tell_waiter(struct futex_q *q) { wake_up_all(&q->waiters); + if (q->fd != -1) + send_sigio(&q->filp->f_owner, q->fd, POLL_IN); } static int futex_wake(struct list_head *head, @@ -113,6 +128,7 @@ add_wait_queue(&q->waiters, wait); q->page = page; q->offset = offset; + q->fd = -1; spin_lock(&futex_lock); list_add_tail(&q->list, head); @@ -196,6 +212,38 @@ return ret; } +static int futex_await(struct list_head *head, + struct page *page, + int offset, + int fd) +{ + struct file *filp; + struct futex_q *q; + + filp = fget(fd); + if (!filp || filp->f_op != &futex_fops) + return -EBADF; + q = filp->private_data; + + spin_lock(&futex_lock); + /* Eliminate any old notification, wake any pollers, release page. */ + if (!list_empty(&q->list)) { + list_del(&q->list); + wake_up_all(&q->waiters); + unpin_page(q->page); + } + + q->filp = filp; + q->fd = fd; + q->page = page; + q->offset = offset; + list_add_tail(&q->list, head); + spin_unlock(&futex_lock); + fput(filp); + + return 0; +} + asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime) { int ret; @@ -229,6 +277,9 @@ case FUTEX_WAIT: ret = futex_wait(head, page, pos_in_page, val, uaddr, time); break; + case FUTEX_AWAIT: + ret = futex_await(head, page, pos_in_page, val); + break; case FUTEX_WAKE: ret = futex_wake(head, page, pos_in_page, val); break; @@ -241,12 +292,68 @@ return ret; } +static int futex_open(struct inode *inode, struct file *filp) +{ + struct futex_q *q; + + q = kmalloc(sizeof(*q), GFP_KERNEL); + if (!q) + return -ENOMEM; + INIT_LIST_HEAD(&q->list); + init_waitqueue_head(&q->waiters); + + filp->private_data = q; + return 0; +} + +static int futex_close(struct inode *inode, struct file *filp) +{ + struct futex_q *q = filp->private_data; + + spin_lock(&futex_lock); + if (!list_empty(&q->list)) { + list_del(&q->list); + unpin_page(q->page); + BUG_ON(waitqueue_active(&q->waiters)); + } + spin_unlock(&futex_lock); + kfree(filp->private_data); + return 0; +} + +/* You need to do a FUTEX_AWAIT to arm this after each successful poll */ +static unsigned int futex_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct futex_q *q = filp->private_data; + int ret = 0; + + spin_lock(&futex_lock); + if (!list_empty(&q->list)) + poll_wait(filp, &q->waiters, wait); + else + ret = POLLIN | POLLRDNORM; + spin_unlock(&futex_lock); + + return ret; +} + +static struct file_operations futex_fops = { + open: futex_open, + release: futex_close, + poll: futex_poll, +}; + static int __init init(void) { + int futex_major; unsigned int i; for (i = 0; i < ARRAY_SIZE(futex_queues); i++) INIT_LIST_HEAD(&futex_queues[i]); + futex_major = devfs_register_chrdev(0, "futex", &futex_fops); + devfs_register(NULL, "futex", DEVFS_FL_NONE, futex_major, + 0, S_IFCHR | 0666, &futex_fops, NULL); return 0; } __initcall(init); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/