here it is the final full patch:
diff -urNp race/fs/inode.c race-fix/fs/inode.c
--- race/fs/inode.c Tue Jul 23 18:46:47 2002
+++ race-fix/fs/inode.c Tue Jul 23 19:13:02 2002
@@ -111,6 +111,7 @@ static void init_once(void * foo, kmem_c
sema_init(&inode->i_sem, 1);
sema_init(&inode->i_zombie, 1);
spin_lock_init(&inode->i_data.i_shared_lock);
+ i_size_ordered_init(inode);
}
}
diff -urNp race/include/asm-i386/system.h race-fix/include/asm-i386/system.h
--- race/include/asm-i386/system.h Tue Jul 23 18:46:44 2002
+++ race-fix/include/asm-i386/system.h Tue Jul 23 18:47:10 2002
@@ -143,6 +143,8 @@ struct __xchg_dummy { unsigned long a[10
#define __xg(x) ((struct __xchg_dummy *)(x))
+#ifdef CONFIG_X86_CMPXCHG
+#define __ARCH_HAS_GET_SET_64BIT 1
/*
* The semantics of XCHGCMP8B are a bit strange, this is why
* there is a loop and the loading of %%eax and %%edx has to
@@ -167,7 +169,7 @@ static inline void __set_64bit (unsigned
"lock cmpxchg8b (%0)\n\t"
"jnz 1b"
: /* no outputs */
- : "D"(ptr),
+ : "r"(ptr),
"b"(low),
"c"(high)
: "ax","dx","memory");
@@ -197,6 +199,32 @@ static inline void __set_64bit_var (unsi
__set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
__set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * The memory clobber is needed in the read side only if
+ * there is an unsafe writer before the get_64bit, which should
+ * never be the case, but just to be safe.
+ */
+static inline unsigned long long get_64bit(unsigned long long * ptr)
+{
+ unsigned long low, high;
+
+ __asm__ __volatile__ (
+ "\n1:\t"
+ "movl (%2), %%eax\n\t"
+ "movl 4(%2), %%edx\n\t"
+ "movl %%eax, %%ebx\n\t"
+ "movl %%edx, %%ecx\n\t"
+ "lock cmpxchg8b (%2)\n\t"
+ "jnz 1b"
+ : "=&b" (low), "=&c" (high)
+ : "r" (ptr)
+ : "ax","dx","memory");
+
+ return low | ((unsigned long long) high << 32);
+}
+#endif /* CONFIG_X86_CMPXCHG */
+
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
diff -urNp race/include/linux/fs.h race-fix/include/linux/fs.h
--- race/include/linux/fs.h Tue Jul 23 18:46:48 2002
+++ race-fix/include/linux/fs.h Tue Jul 23 19:13:51 2002
@@ -449,6 +449,13 @@ struct block_device {
struct list_head bd_inodes;
};
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP) && !defined(__ARCH_HAS_GET_SET_64BIT)
+#define __NEED_I_SIZE_ORDERED
+#define i_size_ordered_init(inode) do { (inode)->i_size_version1 = (inode)->i_size_version2 = 0; } while (0)
+#else
+#define i_size_ordered_init(inode) do { } while (0)
+#endif
+
struct inode {
struct list_head i_hash;
struct list_head i_list;
@@ -534,8 +541,65 @@ struct inode {
struct xfs_inode_info xfs_i;
void *generic_ip;
} u;
+#ifdef __NEED_I_SIZE_ORDERED
+ volatile int i_size_version1;
+ volatile int i_size_version2;
+#endif
};
+/*
+ * NOTE: in a 32bit arch with a preemptable kernel and
+ * an UP compile the i_size_read/write must be atomic
+ * with respect to the local cpu (unlike with preempt disabled),
+ * but they don't need to be atomic with respect to other cpus like in
+ * true SMP (so they need either to either locally disable irq around
+ * the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles
+ * and 64bit archs it makes no difference if preempt is enabled or not.
+ */
+static inline loff_t i_size_read(struct inode * inode)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#ifdef __ARCH_HAS_GET_SET_64BIT
+ return (loff_t) get_64bit((unsigned long long *) &inode->i_size);
+#else
+ loff_t i_size;
+ int v1, v2;
+
+ /* Retry if i_size was possibly modified while sampling. */
+ do {
+ v1 = inode->i_size_version1;
+ rmb();
+ i_size = inode->i_size;
+ rmb();
+ v2 = inode->i_size_version2;
+ } while (v1 != v2);
+
+ return i_size;
+#endif
+#elif BITS_PER_LONG==64 || !defined(CONFIG_SMP)
+ return inode->i_size;
+#endif
+}
+
+static inline void i_size_write(struct inode * inode, loff_t i_size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#ifdef __ARCH_HAS_GET_SET_64BIT
+ set_64bit((unsigned long long *) &inode->i_size, (unsigned long long) i_size);
+#else
+ inode->i_size_version2++;
+ wmb();
+ inode->i_size = i_size;
+ wmb();
+ inode->i_size_version1++;
+ wmb(); /* make it visible ASAP */
+#endif
+#elif BITS_PER_LONG==64 || !defined(CONFIG_SMP)
+ inode->i_size = i_size;
+#endif
+}
+
static inline void inode_add_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks += bytes >> 9;
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/