[PATCH] task ornaments

David Howells (dhowells@redhat.com)
Fri, 25 Jan 2002 18:54:10 +0000


Hi Linus,

Given application of the syscall latency patch #1, the attached patch adds
task ornaments to 2.5.3-pre5, which:

* permit notification of certain major process events (such as signals,
exec, fork/clone, exit, syscall entry/exit).

* will permit ptrace() stuff to be done without task reparenting.

* will make it easier to produce a Wine support module.

* may make it possible to abstract out the parent signal notification.

* (mingo@redhat.com has also expressed interest with respect to tux).

* (hch@caldera.com has also expressed interest with respect to the linux
ABI stuff).

Most notable are the changes to the following files:

include/linux/sched.h
include/linux/taskornament.h
kernel/taskornament.c

David

diff -uNr linux-work-253p5/arch/i386/kernel/ptrace.c linux-orn-253p5/arch/i386/kernel/ptrace.c
--- linux-work-253p5/arch/i386/kernel/ptrace.c Fri Jan 25 15:01:07 2002
+++ linux-orn-253p5/arch/i386/kernel/ptrace.c Fri Jan 25 17:25:46 2002
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>
+#include <linux/taskornament.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -456,9 +457,12 @@
__attribute__((regparm(3)))
void do_syscall_trace(struct pt_regs *regs, int entryexit)
{
+ task_ornament_notify_syscall(regs,entryexit);
+
if ((current->ptrace & (PT_PTRACED|PT_SYSCALLTRACE)) !=
(PT_PTRACED|PT_SYSCALLTRACE))
return;
+
/* the 0x80 provides a way for the tracing parent to distinguish
between a syscall stop and SIGTRAP delivery */
current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
@@ -484,7 +488,5 @@
void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
struct task_work work_pending)
{
- /* deal with pending signal delivery */
- if (work_pending.sigpending)
- do_signal(regs,oldset);
+ task_ornament_notify_resume(regs,oldset,work_pending);
}
diff -uNr linux-work-253p5/arch/i386/kernel/signal.c linux-orn-253p5/arch/i386/kernel/signal.c
--- linux-work-253p5/arch/i386/kernel/signal.c Fri Jan 25 15:01:07 2002
+++ linux-orn-253p5/arch/i386/kernel/signal.c Fri Jan 25 16:35:57 2002
@@ -20,6 +20,7 @@
#include <linux/stddef.h>
#include <linux/tty.h>
#include <linux/personality.h>
+#include <linux/taskornament.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
@@ -606,6 +607,27 @@
if (!signr)
break;

+ if (!list_empty(&current->ornaments)) {
+ int action = task_ornament_notify_signal(signr,&info);
+ switch (action) {
+ case 0: /* no action */
+ break;
+ case -1: /* cancelled */
+ continue;
+
+ default:
+ /* change of signal
+ * - if the (new) signal is now blocked, requeue it
+ */
+ signr = action;
+ if (sigismember(&current->blocked,signr)) {
+ send_sig_info(signr,&info,current);
+ continue;
+ }
+ break;
+ }
+ }
+
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
/* Let the debugger run. */
current->exit_code = signr;
diff -uNr linux-work-253p5/fs/exec.c linux-orn-253p5/fs/exec.c
--- linux-work-253p5/fs/exec.c Fri Jan 25 14:53:15 2002
+++ linux-orn-253p5/fs/exec.c Fri Jan 25 16:35:57 2002
@@ -35,6 +35,7 @@
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/personality.h>
+#include <linux/taskornament.h>
#define __NO_VERSION__
#include <linux/module.h>

@@ -548,6 +549,9 @@
if (retval) goto mmap_failed;

/* This is the point of no return */
+ if (!list_empty(&current->ornaments))
+ task_ornament_notify_execve();
+
release_old_signals(oldsig);

current->sas_ss_sp = current->sas_ss_size = 0;
diff -uNr linux-work-253p5/fs/proc/base.c linux-orn-253p5/fs/proc/base.c
--- linux-work-253p5/fs/proc/base.c Fri Jan 25 14:53:15 2002
+++ linux-orn-253p5/fs/proc/base.c Fri Jan 25 16:36:57 2002
@@ -26,6 +26,7 @@
#include <linux/seq_file.h>
#include <linux/namespace.h>
#include <linux/mm.h>
+#include <linux/taskornament.h>

/*
* For hysterical raisins we keep the same inumbers as in the old procfs.
@@ -289,6 +290,19 @@
release: mounts_release,
};

+static ssize_t pid_taskornlist_read(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct task_struct *task = inode->u.proc_i.task;
+
+ return proc_pid_read_taskornlist(task, file, buf, count, ppos);
+}
+
+static struct file_operations proc_taskornlist_operations = {
+ read: pid_taskornlist_read,
+};
+
#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */

static ssize_t proc_info_read(struct file * file, char * buf,
@@ -541,6 +555,7 @@
PROC_PID_MAPS,
PROC_PID_CPU,
PROC_PID_MOUNTS,
+ PROC_PID_TASKORN,
PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */
};

@@ -557,6 +572,7 @@
#endif
E(PROC_PID_MAPS, "maps", S_IFREG|S_IRUGO),
E(PROC_PID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
+ E(PROC_PID_TASKORN, "ornaments", S_IFREG|S_IRUGO),
E(PROC_PID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
E(PROC_PID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_PID_EXE, "exe", S_IFLNK|S_IRWXUGO),
@@ -920,6 +936,9 @@
inode->i_op = &proc_mem_inode_operations;
inode->i_fop = &proc_mem_operations;
break;
+ case PROC_PID_TASKORN:
+ inode->i_fop = &proc_taskornlist_operations;
+ break;
case PROC_PID_MOUNTS:
inode->i_fop = &proc_mounts_operations;
break;
diff -uNr linux-work-253p5/include/linux/init_task.h linux-orn-253p5/include/linux/init_task.h
--- linux-work-253p5/include/linux/init_task.h Fri Jan 25 15:12:17 2002
+++ linux-orn-253p5/include/linux/init_task.h Fri Jan 25 17:07:21 2002
@@ -86,6 +86,7 @@
pending: { NULL, &tsk.pending.head, {{0}}}, \
blocked: {{0}}, \
alloc_lock: SPIN_LOCK_UNLOCKED, \
+ ornaments: LIST_HEAD_INIT(tsk.ornaments), \
journal_info: NULL, \
}

diff -uNr linux-work-253p5/include/linux/sched.h linux-orn-253p5/include/linux/sched.h
--- linux-work-253p5/include/linux/sched.h Fri Jan 25 15:05:45 2002
+++ linux-orn-253p5/include/linux/sched.h Fri Jan 25 17:44:28 2002
@@ -364,6 +364,9 @@
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;

+/* Task ornaments */
+ struct list_head ornaments; /* modify: hold alloc_lock and sigmask_lock */
+
/* journalling filesystem info */
void *journal_info;
};
diff -uNr linux-work-253p5/include/linux/taskornament.h linux-orn-253p5/include/linux/taskornament.h
--- linux-work-253p5/include/linux/taskornament.h Thu Jan 1 01:00:00 1970
+++ linux-orn-253p5/include/linux/taskornament.h Fri Jan 25 17:48:28 2002
@@ -0,0 +1,192 @@
+/* taskornament.h: task ornament definitions
+ *
+ * (c) Copyright 2001 David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _H_F92CE22E_93FF_11D4_8781_0000C0005742
+#define _H_F92CE22E_93FF_11D4_8781_0000C0005742
+
+#ifdef __KERNEL__
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <asm/atomic.h>
+
+struct task_ornament_operations;
+struct task_ornament;
+struct task_struct;
+
+/*****************************************************************************/
+/*
+ * ornament that can be strung onto a task to intercept important task events
+ */
+struct task_ornament
+{
+ struct list_head to_list;
+ struct task_ornament_operations *to_ops;
+ struct task_struct *to_task; /* owner task */
+ atomic_t to_count;
+ int to_magic;
+ unsigned short to_order; /* list ordering priority (lowest first) */
+ unsigned short to_nfy_resume : 1; /* invoke resumption operation */
+ void *to_data;
+ void *to_data2;
+};
+
+/*****************************************************************************/
+/*
+ * the operations that allow an ornament to intercept task events
+ * - all ops but close() and proc_read() will be called in the context of the owner process
+ */
+struct task_ornament_operations
+{
+ /* ornament name */
+ const char *name;
+
+ /* responsible module */
+ struct module *owner;
+
+ /* default ornament sort key */
+ unsigned short def_order;
+
+ /* destructor
+ * - can sleep
+ */
+ void (*close)(struct task_ornament *ornament);
+
+ /* ornament specific information request
+ * - called from /proc/<pid>/ornaments implementation
+ * - must terminate any text it adds with newline chars
+ * - should return the next text insertion point
+ * - can sleep
+ */
+ char *(*proc_read)(struct task_ornament *ornament, char *ptr, char *end);
+
+ /* notification of exit (status in exit_code)
+ * - current->flags will have been marked PF_EXITING
+ * - the big kernel lock will be held by do_exit();
+ * - can sleep
+ */
+ void (*exit)(struct task_ornament *ornament, int exit_code);
+
+ /* notification of execve
+ * - can add/remove ornaments (including itself)
+ * - can sleep
+ */
+ void (*execve)(struct task_ornament *ornament);
+
+ /* notification that fork/clone has set up the new process and
+ * is just about to dispatch it
+ * - ornaments have to copy themselves to the child
+ * - can add/remove ornaments (including itself)
+ * - can sleep
+ */
+ void (*fork)(struct task_ornament *ornament,
+ struct task_struct *child,
+ unsigned long clone_flags);
+
+ /* notification of pending resumption of userspace execution
+ * - only called if orn->to_work is set and task->ornwork is set
+ * - can sleep, but preferred not to
+ * - can add/remove ornaments (including itself)
+ */
+ void (*resume)(struct task_ornament *ornament);
+
+ /* notification of pending signal delivery
+ * - set *action to -1 to cancel the signal
+ * - set *action to new signal number and update *info to change the signal
+ * - leave *action unchanged otherwise
+ * - *action and *info may already have been changed by a previous ornament
+ * - can sleep
+ * - can add/remove ornaments (including itself)
+ */
+ void (*signal)(struct task_ornament *ornament, int signr, siginfo_t *info, int *action);
+
+ /* syscall trace hook
+ * - entryexit is 0 on entry, 1 on exit
+ */
+ void (*syscall_trace)(struct task_ornament *ornament, struct pt_regs *regs, int entryexit);
+};
+
+#define TASK_ORNAMENT_MAGIC 0x54614572
+
+#define task_ornament_valid(to) ((to)->to_magic==TASK_ORNAMENT_MAGIC)
+
+static __inline__
+void ornget(struct task_ornament *orn)
+{
+ atomic_inc(&orn->to_count);
+}
+
+extern void __ornput(struct task_ornament *orn);
+static __inline__
+void ornput(struct task_ornament *orn)
+{
+ if (atomic_dec_and_test(&orn->to_count))
+ __ornput(orn);
+}
+
+extern void task_ornament_init(void);
+extern struct task_ornament *task_ornament_alloc(struct task_ornament_operations *);
+extern int __task_ornament_add(struct task_struct *target, struct task_ornament *orn);
+extern void __task_ornament_remove(struct task_struct *target, struct task_ornament *orn);
+extern void exit_task_ornaments(int exit_code);
+extern int task_ornament_notify_signal(int signr, siginfo_t *info);
+extern void task_ornament_notify_execve(void);
+extern void task_ornament_notify_fork(struct task_struct *child, unsigned long clone_flags);
+extern int task_ornament_request_resume_service(struct task_ornament *orn);
+
+extern asmlinkage
+void FASTCALL(task_ornament_notify_syscall(struct pt_regs *regs, int entryexit));
+
+extern asmlinkage
+void FASTCALL(task_ornament_notify_resume(struct pt_regs *regs, sigset_t *oldset,
+ struct task_work work_pending));
+
+extern asmlinkage
+int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
+
+extern void syscall_trace(void);
+
+extern struct task_ornament *__task_ornament_find(struct task_struct *target,
+ const struct task_ornament_operations *type);
+
+static __inline__
+int task_ornament_add(struct task_struct *target, struct task_ornament *orn)
+{
+ int ret;
+ task_lock(target);
+ ret = __task_ornament_add(target,orn);
+ task_unlock(target);
+ return ret;
+}
+
+static __inline__
+void task_ornament_remove(struct task_struct *target, struct task_ornament *orn)
+{
+ task_lock(target);
+ __task_ornament_remove(target,orn);
+ task_unlock(target);
+ ornput(orn);
+}
+
+static __inline__
+struct task_ornament *task_ornament_find(struct task_struct *target,
+ const struct task_ornament_operations *type)
+{
+ struct task_ornament *result;
+ task_lock(target);
+ result = __task_ornament_find(target,type);
+ task_unlock(target);
+ return result;
+}
+
+extern struct file_operations task_ornament_ptrace_operations;
+extern ssize_t proc_pid_read_taskornlist(struct task_struct *tsk, struct file *file,
+ char *buf, size_t count, loff_t *ppos);
+
+#endif /* __KERNEL__ */
+
+#endif /* _H_F92CE22E_93FF_11D4_8781_0000C0005742 */
diff -uNr linux-work-253p5/kernel/Makefile linux-orn-253p5/kernel/Makefile
--- linux-work-253p5/kernel/Makefile Fri Jan 25 14:53:17 2002
+++ linux-orn-253p5/kernel/Makefile Fri Jan 25 16:37:51 2002
@@ -10,12 +10,12 @@
O_TARGET := kernel.o

export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \
- printk.o device.o
+ printk.o device.o taskornament.o

obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
module.o exit.o itimer.o info.o time.o softirq.o resource.o \
sysctl.o acct.o capability.o ptrace.o timer.o user.o \
- signal.o sys.o kmod.o context.o device.o
+ signal.o sys.o kmod.o context.o device.o taskornament.o

obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += ksyms.o
diff -uNr linux-work-253p5/kernel/exit.c linux-orn-253p5/kernel/exit.c
--- linux-work-253p5/kernel/exit.c Fri Jan 25 14:53:17 2002
+++ linux-orn-253p5/kernel/exit.c Fri Jan 25 16:38:47 2002
@@ -18,6 +18,7 @@
#include <linux/acct.h>
#endif
#include <linux/file.h>
+#include <linux/taskornament.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -458,7 +459,6 @@
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
-
write_lock_irq(&tasklist_lock);
current->state = TASK_ZOMBIE;
do_notify_parent(current, current->exit_signal);
@@ -530,6 +530,7 @@
if (tsk->binfmt && tsk->binfmt->module)
__MOD_DEC_USE_COUNT(tsk->binfmt->module);

+ exit_task_ornaments(code);
tsk->exit_code = code;
exit_notify();
schedule();
diff -uNr linux-work-253p5/kernel/fork.c linux-orn-253p5/kernel/fork.c
--- linux-work-253p5/kernel/fork.c Fri Jan 25 15:01:07 2002
+++ linux-orn-253p5/kernel/fork.c Fri Jan 25 16:40:22 2002
@@ -22,6 +22,7 @@
#include <linux/namespace.h>
#include <linux/personality.h>
#include <linux/file.h>
+#include <linux/taskornament.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -677,6 +678,13 @@
if (retval)
goto bad_fork_cleanup_namespace;
p->semundo = NULL;
+
+ /*
+ * tell attached ornaments that they may want to duplicate themselves
+ */
+ INIT_LIST_HEAD(&p->ornaments);
+ if (!list_empty(&current->ornaments))
+ task_ornament_notify_fork(p,clone_flags);

/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
@@ -829,4 +837,6 @@
SLAB_HWCACHE_ALIGN, NULL, NULL);
if(!mm_cachep)
panic("vma_init: Cannot alloc mm_struct SLAB cache");
+
+ task_ornament_init();
}
diff -uNr linux-work-253p5/kernel/taskornament.c linux-orn-253p5/kernel/taskornament.c
--- linux-work-253p5/kernel/taskornament.c Thu Jan 1 01:00:00 1970
+++ linux-orn-253p5/kernel/taskornament.c Fri Jan 25 17:51:39 2002
@@ -0,0 +1,695 @@
+/* taskornament.c: task ornament handling
+ *
+ * Copyright (c) 2001 David Howells (dhowells@redhat.com)
+ */
+#include <linux/sched.h>
+#include <linux/taskornament.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+EXPORT_SYMBOL(__ornput);
+EXPORT_SYMBOL(task_ornament_alloc);
+EXPORT_SYMBOL(__task_ornament_add);
+EXPORT_SYMBOL(__task_ornament_remove);
+EXPORT_SYMBOL(__task_ornament_find);
+EXPORT_SYMBOL(task_ornament_request_resume_service);
+
+//#define ktrace(FMT,...) printk("[%05d] "FMT,current->pid,##__VA_ARGS__)
+#define ktrace(FMT,...)
+
+#define task_ornament_buoy_init(x) \
+do { \
+ INIT_LIST_HEAD(&(x)->to_list); \
+ (x)->to_ops = NULL; \
+ atomic_set(&(x)->to_count,0x3fffffff); \
+ (x)->to_magic = TASK_ORNAMENT_MAGIC; \
+} while(0)
+
+static inline void __add_orn(struct task_struct *target, struct task_ornament *orn,
+ struct task_ornament *after)
+{
+ list_add(&orn->to_list,&after->to_list);
+}
+
+static inline void __remove_orn(struct task_struct *target, struct task_ornament *orn)
+{
+ list_del(&orn->to_list);
+}
+
+/* SLAB cache for task_ornament structures */
+static kmem_cache_t *task_ornament_cachep;
+
+void __init task_ornament_init(void)
+{
+ task_ornament_cachep =
+ kmem_cache_create("task_ornament", sizeof(struct task_ornament), 0,
+ 0, NULL, NULL);
+ if (!task_ornament_cachep)
+ panic("cannot create task ornament SLAB cache");
+}
+
+/*****************************************************************************/
+/*
+ * allocate a task ornament
+ */
+struct task_ornament *task_ornament_alloc(struct task_ornament_operations *ops)
+{
+ struct task_ornament *orn;
+
+ orn = (struct task_ornament *) kmem_cache_alloc(task_ornament_cachep,SLAB_KERNEL);
+ if (orn) {
+ INIT_LIST_HEAD(&orn->to_list);
+ orn->to_task = NULL;
+ orn->to_ops = ops;
+ if (ops->owner)
+ __MOD_INC_USE_COUNT(ops->owner);
+ atomic_set(&orn->to_count,1);
+ orn->to_magic = TASK_ORNAMENT_MAGIC;
+ orn->to_order = ops->def_order;
+ }
+
+ return orn;
+} /* end task_ornament_alloc() */
+
+/*****************************************************************************/
+/*
+ * destroy an ornament
+ * - to_count should already be zero
+ */
+void __ornput(struct task_ornament *orn)
+{
+ struct task_ornament_operations *ops = orn->to_ops;
+
+ if (ops->close)
+ ops->close(orn);
+ if (ops->owner)
+ __MOD_DEC_USE_COUNT(ops->owner);
+
+ kmem_cache_free(task_ornament_cachep,orn);
+} /* end __ornput() */
+
+/*****************************************************************************/
+/*
+ * add an ornament to a task
+ * - must be called holding task_lock(target)
+ * - returns 0 if successful, -ESRCH if the target process is a zombie or is exiting
+ */
+int __task_ornament_add(struct task_struct *target, struct task_ornament *orn)
+{
+ struct list_head *_ptr;
+ int ret;
+
+ ktrace("==>__task_ornament_add(%d,%p)\n",target->pid,orn);
+
+ if (!task_ornament_valid(orn))
+ panic("Adding invalid task ornament: %p [caller %p]\n",
+ orn,__builtin_return_address(0));
+
+ ret = -ESRCH;
+ if (target->state!=TASK_ZOMBIE && !(target->flags&PF_EXITING)) {
+ list_for_each(_ptr,&target->ornaments) {
+ struct task_ornament *ptr = list_entry(_ptr,struct task_ornament,to_list);
+ if (ptr->to_order>orn->to_order)
+ break;
+ }
+
+ list_add_tail(&orn->to_list,_ptr);
+ ornget(orn);
+ orn->to_task = target;
+ ret = 0;
+ }
+
+ ktrace("<== __task_ornament_add() = %d\n",ret);
+
+ return ret;
+} /* end __task_ornament_add() */
+
+/*****************************************************************************/
+/*
+ * remove an ornament from a task
+ * - does nothing if the target process is a zombie or is exiting
+ * - doesn't decrement the ornament usage count (since the destructor might sleep)
+ */
+void __task_ornament_remove(struct task_struct *target, struct task_ornament *orn)
+{
+ ktrace("==>__task_ornament_remove(%d,%p)\n",target->pid,orn);
+
+ if (!task_ornament_valid(orn))
+ panic("Removing invalid task ornament: %p [caller %p]\n",
+ orn,__builtin_return_address(0));
+
+ if (target->state!=TASK_ZOMBIE && !(target->flags&PF_EXITING)) {
+ list_del(&orn->to_list);
+ orn->to_task = NULL;
+ }
+
+ ktrace("<==__task_ornament_remove()\n");
+} /* end __task_ornament_remove() */
+
+/*****************************************************************************/
+/*
+ * find an ornament of a particular type attached to a specific task
+ * - note that if an ornament is returned, then its counter will be incremented
+ * - must be called holding task_lock(target)
+ */
+struct task_ornament *__task_ornament_find(struct task_struct *target,
+ const struct task_ornament_operations *type)
+{
+ struct task_ornament *orn;
+ struct list_head *ptr;
+
+ if (!type) return NULL;
+
+ for (ptr=target->ornaments.next; ptr!=&target->ornaments; ptr=ptr->next) {
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("Find: invalid task ornament: %p [caller %p]\n",
+ orn,__builtin_return_address(0));
+
+ if (orn->to_ops==type)
+ goto found;
+ }
+
+ return NULL;
+
+ found:
+ ornget(orn);
+ return orn;
+} /* end __task_ornament_find() */
+
+/*****************************************************************************/
+/*
+ * send exit notifications to the current process's ornaments
+ * - this'll be called from exit_notify() in kernel/signal.c
+ * - do_exit() holds the big kernel lock
+ * - the ornaments are removed from the list before they are notified
+ * - addition is prevented by PF_EXITING in current->flags
+ */
+void exit_task_ornaments(int exit_code)
+{
+ struct task_ornament *orn;
+ struct task_struct *tsk = current;
+
+ ktrace("==>exit_task_ornaments()\n");
+
+ /* eat the list */
+ task_lock(tsk);
+ while (!list_empty(&tsk->ornaments)) {
+ orn = list_entry(tsk->ornaments.next,struct task_ornament,to_list);
+ tsk->ornaments.next = orn->to_list.next;
+ task_unlock(tsk);
+
+ if (!task_ornament_valid(orn))
+ panic("Exit: invalid task ornament: %p\n",orn);
+
+ if (orn->to_ops && orn->to_ops->exit)
+ orn->to_ops->exit(orn,exit_code);
+ orn->to_task = NULL;
+
+ ornput(orn);
+ task_lock(tsk);
+ }
+
+ task_unlock(tsk);
+
+ ktrace("<==exit_task_ornaments()\n");
+
+} /* end exit_task_ornaments() */
+
+/*****************************************************************************/
+/*
+ * perform various tasks immediately prior to resuming userspace execution
+ * - deal with pending signals
+ * - poll ornaments that are requesting resumption notification
+ */
+void task_ornament_notify_resume(struct pt_regs *regs, sigset_t *oldset,
+ struct task_work work_pending)
+{
+ struct task_ornament buoy, *orn;
+ struct task_struct *tsk = current;
+ struct list_head *ptr;
+
+ ktrace("==>task_ornament_notify_resume(%p,%p,%08x)\n",
+ regs,oldset,*(__u32*)&work_pending);
+
+ /* deal with pending signal delivery */
+ if (work_pending.sigpending)
+ do_signal(regs,oldset);
+
+ /* deal with resumption notification */
+ if (work_pending.notify_resume) {
+ task_ornament_buoy_init(&buoy);
+ tsk->work.notify_resume = 0;
+
+ /* loop through all task ornaments, but be careful in case the
+ * list is rearranged. The buoy is used as a marker between the current
+ * position and the next
+ */
+ task_lock(tsk);
+
+ ptr = tsk->ornaments.next;
+ while (ptr != &tsk->ornaments) {
+ /* find the next ornament with a resume op */
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("Resume: invalid task ornament: %p\n",orn);
+
+ if (!orn->to_ops || !orn->to_ops->execve || !orn->to_nfy_resume) {
+ ptr = ptr->next;
+ continue;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+
+ /* mark the queue so we know where our next ornament is */
+ buoy.to_order = orn->to_order;
+ __add_orn(tsk,&buoy,orn);
+ task_unlock(tsk);
+
+ /* call the operation */
+ orn->to_ops->resume(orn);
+ ornput(orn);
+
+ /* remove the marker */
+ task_lock(tsk);
+ ptr = buoy.to_list.next;
+ __remove_orn(tsk,&buoy);
+ }
+
+ task_unlock(tsk);
+ }
+
+ ktrace("<==task_ornament_notify_resume()\n");
+} /* end task_ornament_notify_resume() */
+
+/*****************************************************************************/
+/*
+ * send signal delivery notifications to the current process's ornaments
+ * - returns 0 to keep the signal
+ * - returns -1 to cancel the signal immediately
+ * - returns new signal number to change the signal (and has updated *info)
+ * - this'll be called from notify_parent() in kernel/signal.c
+ * - the list is _not_ locked whilst the notification routine is running
+ * - a buoy is placed in the list to mark the next ornament to be processed
+ */
+int task_ornament_notify_signal(int signr, siginfo_t *info)
+{
+ struct task_ornament buoy, *orn;
+ struct task_struct *tsk = current;
+ struct list_head *ptr;
+ int action;
+
+ ktrace("==>task_ornament_notify_signal(%d,%p)\n",signr,info);
+
+ task_ornament_buoy_init(&buoy);
+
+ /* loop through all task ornaments, but be careful in case the list is
+ * rearranged. The buoy is used as a marker between the current
+ * position and the next
+ */
+ task_lock(tsk);
+
+ action = 0;
+ ptr = tsk->ornaments.next;
+ while (ptr != &tsk->ornaments) {
+ /* find the next ornament with a signal op */
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("Signal: invalid task ornament: %p\n",orn);
+
+ if (!orn->to_ops || !orn->to_ops->signal) {
+ ptr = ptr->next;
+ continue;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+
+ /* mark the queue so we know where our next ornament is */
+ buoy.to_order = orn->to_order;
+ __add_orn(tsk,&buoy,orn);
+ task_unlock(tsk);
+
+ /* call the operation */
+ orn->to_ops->signal(orn,signr,info,&action);
+ ornput(orn);
+
+ /* remove the marker */
+ task_lock(tsk);
+ ptr = buoy.to_list.next;
+ __remove_orn(tsk,&buoy);
+ }
+
+ task_unlock(tsk);
+
+ ktrace("<==task_ornament_notify_signal() = %d\n",action);
+
+ return action;
+} /* end task_ornament_notify_signal() */
+
+/*****************************************************************************/
+/*
+ * notify an ornament of the current process execve'ing itself
+ * - this'll be called from flush_old_exec() in kernel/exec.c
+ * - the list is _not_ locked whilst the notification routine is running
+ * - a buoy is placed in the list to mark the next ornament to be processed
+ */
+void task_ornament_notify_execve(void)
+{
+ struct task_ornament buoy, *orn;
+ struct task_struct *tsk = current;
+ struct list_head *ptr;
+
+ ktrace("==>task_ornament_notify_execve()\n");
+
+ task_ornament_buoy_init(&buoy);
+
+ /* loop through all task ornaments, but be careful in case the
+ * list is rearranged. The buoy is used as a marker between the current
+ * position and the next
+ */
+ task_lock(tsk);
+
+ ptr = tsk->ornaments.next;
+ while (ptr != &tsk->ornaments) {
+ /* find the next ornament with an execve op */
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("Execve: invalid task ornament: %p\n",orn);
+
+ if (!orn->to_ops || !orn->to_ops->execve) {
+ ptr = ptr->next;
+ continue;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+
+ /* mark the queue so we know where our next ornament is */
+ buoy.to_order = orn->to_order;
+ __add_orn(tsk,&buoy,orn);
+ task_unlock(tsk);
+
+ /* call the operation */
+ orn->to_ops->execve(orn);
+ ornput(orn);
+
+ /* remove the marker */
+ task_lock(tsk);
+ ptr = buoy.to_list.next;
+ __remove_orn(tsk,&buoy);
+ }
+
+ task_unlock(tsk);
+
+ ktrace("<==task_ornament_notify_execve()\n");
+
+} /* end task_ornament_notify_execve() */
+
+/*****************************************************************************/
+/*
+ * notify an ornament of the current process forking/cloning itself
+ * - this'll be called from do_fork() in kernel/fork.c
+ * - the list is _not_ locked whilst the notification routine is running
+ * - a buoy is placed in the list to mark the next ornament to be processed
+ * - a second buoy is placed at the end of the list to prevent processing of
+ * ornaments added during processing
+ */
+void task_ornament_notify_fork(struct task_struct *child, unsigned long clone_flags)
+{
+ struct task_ornament buoy, *orn;
+ struct task_struct *tsk = current;
+ struct list_head *ptr;
+
+ ktrace("==>task_ornament_notify_fork(%d,%lu)\n",child->pid,clone_flags);
+
+ task_ornament_buoy_init(&buoy);
+
+ /* loop through all task ornaments, but be careful in case the
+ * list is rearranged. The buoy is used as a marker between the current
+ * position and the next
+ */
+ task_lock(tsk);
+
+ ptr = tsk->ornaments.next;
+ while (ptr != &tsk->ornaments) {
+ /* skip over buoys from other processors */
+ if (ptr==&buoy.to_list)
+ break;
+
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("Fork: invalid task ornament: %p\n",orn);
+
+ if (!orn->to_ops || !orn->to_ops->fork) {
+ ptr = ptr->next;
+ break;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+
+ /* mark the queue so we know where our next ornament is */
+ buoy.to_order = orn->to_order;
+ __add_orn(tsk,&buoy,orn);
+ task_unlock(tsk);
+
+ /* call the operation */
+ orn->to_ops->fork(orn,child,clone_flags);
+ ornput(orn);
+
+ /* remove the marker */
+ task_lock(tsk);
+ ptr = buoy.to_list.next;
+ __remove_orn(tsk,&buoy);
+ }
+
+ task_unlock(tsk);
+
+ ktrace("<==task_ornament_notify_fork()\n");
+
+} /* end task_ornament_notify_fork() */
+
+/*****************************************************************************/
+/*
+ * notify an ornament of the current process entering or exiting a syscall
+ * - this'll be called from syscall_trace() in arch/xxx/kernel/ptrace.c
+ * - the list is _not_ locked whilst the notification routine is running
+ * - a buoy is placed in the list to mark the next ornament to be processed
+ * - a second buoy is placed at the end of the list to prevent processing of
+ * ornaments added during processing
+ */
+void task_ornament_notify_syscall(struct pt_regs *regs, int entryexit)
+{
+ struct task_ornament buoy, *orn;
+ struct task_struct *tsk = current;
+ struct list_head *ptr;
+
+ ktrace("==>task_ornament_notify_syscall(%d)\n",entryexit);
+
+ task_ornament_buoy_init(&buoy);
+
+ /* loop through all task ornaments, but be careful in case the
+ * list is rearranged. The buoy is used as a marker between the current
+ * position and the next
+ */
+ task_lock(tsk);
+
+ ptr = tsk->ornaments.next;
+ while (ptr != &tsk->ornaments) {
+ if (ptr==&buoy.to_list)
+ break;
+
+ orn = list_entry(ptr,struct task_ornament,to_list);
+
+ if (!task_ornament_valid(orn))
+ panic("SyscallTrace: invalid task ornament: %p\n",orn);
+
+ if (!orn->to_ops || !orn->to_ops->execve) {
+ ptr = ptr->next;
+ continue;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+
+ /* mark the queue so we know where our next ornament is */
+ buoy.to_order = orn->to_order;
+ __add_orn(tsk,&buoy,orn);
+ task_unlock(tsk);
+
+ /* call the operation */
+ orn->to_ops->syscall_trace(orn,regs,entryexit);
+ ornput(orn);
+
+ /* remove the marker */
+ task_lock(tsk);
+ ptr = buoy.to_list.next;
+ __remove_orn(tsk,&buoy);
+ }
+
+ task_unlock(tsk);
+
+ ktrace("<==task_ornament_notify_syscall()\n");
+} /* end task_ornament_notify_syscall() */
+
+/*****************************************************************************/
+/*
+ * request userspace resumption notification service for an ornament on a task
+ */
+int task_ornament_request_resume_service(struct task_ornament *orn)
+{
+ struct task_struct *tsk;
+ int ret = -ESRCH;
+
+ ktrace("==>task_ornament_request_resume_service(%p)\n",orn);
+
+ read_lock(&tasklist_lock);
+
+ tsk = orn->to_task;
+ printk("-- setting flag on %p\n",tsk);
+ if (tsk) {
+ printk("-- (pid %d)\n",tsk->pid);
+ task_lock(tsk);
+ orn->to_nfy_resume = 1;
+ tsk->work.notify_resume = 1;
+ task_unlock(tsk);
+ ret = 0;
+ }
+
+ read_unlock(&tasklist_lock);
+
+ ktrace("<==task_ornament_request_resume_service() = %d\n",ret);
+
+ return ret;
+} /* end task_ornament_request_resume_service() */
+
+/*****************************************************************************/
+/*
+ * read a list of task ornaments attached to a process
+ * - MSW of file offset is ornament index
+ * - LSW of file offset is line position
+ */
+ssize_t proc_pid_read_taskornlist(struct task_struct *tsk, struct file *file,
+ char *ubuf, size_t count, loff_t *ppos)
+{
+ struct task_ornament *orn;
+ struct list_head *_ptr;
+ char *scratch = NULL, *curr, *buf = NULL;
+ ssize_t ret;
+ loff_t npos;
+ int index, qty, pos;
+
+ /* get some scratch space */
+ ret = -ENOMEM;
+ buf = (char*) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ scratch = (char*) __get_free_page(GFP_KERNEL);
+ if (!scratch)
+ goto out;
+
+ /* get the n'th task ornament next */
+ npos = *ppos;
+ ret = 0;
+ while (count>0) {
+ index = npos >> 16;
+ orn = NULL;
+
+ task_lock(tsk);
+
+ list_for_each(_ptr,&tsk->ornaments) {
+ orn = list_entry(_ptr,struct task_ornament,to_list);
+ if (!orn->to_ops)
+ continue; /* ignore markers inserted into the list */
+ if (index==0)
+ break;
+ index--;
+ }
+
+ if (_ptr==&tsk->ornaments) {
+ task_unlock(tsk);
+ break;
+ }
+
+ /* make sure the ornament doesn't evaporate yet */
+ ornget(orn);
+ task_unlock(tsk);
+
+ /* make notes on this ornament */
+ curr = scratch;
+ curr += sprintf(curr,
+ "%s %p usage=%03d order=%04x\n"
+ "%s",
+ orn->to_ops->name,
+ orn,
+ atomic_read(&orn->to_count),
+ orn->to_order,
+ orn->to_nfy_resume ? " - Resume Service Requested\n" : "");
+
+ /* call the information gathering operation */
+ if (orn->to_ops->proc_read)
+ curr = orn->to_ops->proc_read(orn,curr,scratch+PAGE_SIZE-1);
+ ornput(orn);
+
+ /* distill the appropriate amount to the buffer */
+ qty = curr - scratch;
+ pos = npos & 0xFFFF;
+ npos -= pos;
+ if (pos < qty) {
+ /* some of the current scratch buffer should be transferred */
+ curr = scratch + pos;
+ qty -= pos;
+
+ if (qty > count) {
+ /* partial copy */
+ memcpy(buf+ret,curr,count);
+ ret += count;
+ qty -= count;
+ count = 0;
+ }
+ else {
+ /* complete copy */
+ memcpy(buf+ret,curr,qty);
+ ret += qty;
+ count += qty;
+ qty = 0;
+ }
+
+ }
+ else {
+ /* already read everything this ornament has to offer */
+ qty = 0;
+ }
+
+ if (qty==0)
+ npos += 0x10000;
+ else
+ npos += qty;
+ }
+
+ if (ret>count) BUG();
+
+ /* write the results to userspace */
+ if (ret>0) {
+ if (copy_to_user(ubuf,buf,ret))
+ ret = -EFAULT;
+ else
+ *ppos = npos;
+ }
+
+ out:
+ if (scratch) free_page((unsigned long)scratch);
+ if (buf) free_page((unsigned long)buf);
+ return ret;
+
+} /* end proc_pid_read_taskornlist() */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/