> The normal way of solving this fairness problem is to make pending write
> locks block read lock attempts, so that the reader count is guaranteed
> to drop to zero as read locks are released. I haven't looked at the
> Linux implementation of rwlocks, so I don't know how hard this is to
> do. Or perhaps there's some other reason for not implementing it this
> way?
Actually implementing a fair spinlocks and fair rwlocks on the x86 arch are
very easy (at least, if you have XADD it is). Any arch which has CMPXCHG can
also do it, just not so easily.
I've attached an implementation of a fair spinlock and an implementation of a
fair rwlock (which can be compiled and played with in u-space).
David
--Multipart_Fri_Nov__8_17:34:43_2002-1
Content-Type: text/plain; charset=US-ASCII
typedef struct fairlock {
u8 curr;
u8 ticket;
} fairlock_t;
static inline init_fairlock(fairlock_t *lock)
{
memset(lock,0,sizeof(*lock));
}
/*
* spin lock fairly
*/
static inline void fair_lock(fairlock_t *lock)
{
u8 number = 1;
__asm__ __volatile__(
"# beginning fair_lock\n\t"
LOCK_PREFIX " xaddb %b2,%1\n\t" /* number = lock->ticket; lock->ticket++; */
"1:\n\t"
" cmpb %b2,%0\n\t"
" jne 2f\n\t" /* jump if number!=lock->curr */
LOCK_SECTION_START("")
"2:\n\t"
" rep; nop\n\t"
" jmp 1b\n"
LOCK_SECTION_END
"# ending fair_lock\n\t"
: "=m"(lock->curr), "=m"(lock->ticket), "=r"(number)
: "r"(lock), "m"(lock->curr), "m"(lock->ticket), "2"(number)
: "memory", "cc");
}
/*
* spin trylock fairly
*/
static inline void fair_trylock(fairlock_t *lock)
{
u32 tmp;
__asm__ __volatile__(
"# beginning fair_trylock\n\t"
" movw (%3),%%ax\n\t" /* AL=curr, AH=ticket */
"1:\n\t"
" cmpb %%al,%%ah\n\t"
" je 3f\n\t" /* jump if maybe we can get it */
"2:\n\t"
LOCK_SECTION_START("")
"3:\n\t"
" leaw 1(%ax),%w2\n\t" /* [propose] ticket=ticket+1 */
LOCK_PREFIX " cmpxchgw %w2,(%3)\n\t"
" je 3b\n\t" /* jump if worked */
" rep; nop\n\t" /* didn't work; AL & AH have been updated */
" jmp 1b\n"
LOCK_SECTION_END
"# ending fair_trylock\n\t"
: "=m"(lock->curr), "=m"(lock->ticket), "=$r"(tmp)
: "r"(lock), "m"(lock->curr), "m"(lock->ticket)
: "memory", "cc", "eax");
}
/*
* spin unlock fairly
*/
static inline void fair_unlock(fairlock_t *lock)
{
u8 number;
__asm__ __volatile__(
"# beginning fair_unlock\n\t"
LOCK_PREFIX " incb %1\n\t" /* lock->curr++; */
"# ending fair_unlock\n\t"
: "=m"(lock->curr)
: "r"(lock), "m"(lock->curr)
: "memory", "cc");
}
--Multipart_Fri_Nov__8_17:34:43_2002-1
Content-Type: text/plain; charset=US-ASCII
/* rwlock.c: fair read/write spinlocks
*
* Copyright (c) 2002 David Howells (dhowells@redhat.com).
*/
#include <linux/types.h>
typedef unsigned char u8;
typedef unsigned int u32;
#define LOCK_PREFIX "lock;"
typedef struct rwlock {
/* these member variables must be at the beginning and in this order */
u8 rd_curr; /* next reader ticket allowed to become active */
u8 curr; /* currently active ticket */
u8 ticket; /* next ticket to be issued */
u8 __pad;
} __attribute__((aligned(4))) rwlock_t;
#define RWLOCK_UNLOCKED (rwlock_t) { 0, 0, 0, 0 }
#define rwlock_debug(LOCK,WHERE) \
do { \
printf(WHERE"{%02x,%02x,%02x}\n",LOCK->rd_curr,LOCK->curr,LOCK->ticket); \
} while(0)
/*
* obtain a write lock
* - pull the next ticket from lock->ticket (which is subsequently incremented)
* - spin until lock->curr catches up to the value that lock->ticket had before the XADD
* - lock->rd_curr is left equal to the lock->curr (and thus my ticket number) to prevent reads
* getting a lock
*/
static inline void write_lock(rwlock_t *lock)
{
u32 eax;
rwlock_debug(lock,"-->write_lock");
asm volatile(
" # begin write_lock \n"
LOCK_PREFIX " xaddw %%ax,1(%3) \n" /* my ticket in AH */
"0: cmpb %%al,%%ah \n" /* lock->curr in AL */
" jne 2f \n"
" .section .text.lock,\"ax\"\n"
"2: \n"
" rep; nop \n"
" movb 1(%3),%%al \n" /* reload AL from lock->curr */
" jmp 0b \n"
" .previous \n"
" # end write_lock \n"
: "=m"(lock), "=a"(eax)
: "m"(lock), "r"(lock), "a"(0x0100)
: "memory", "cc"
);
rwlock_debug(lock,"<--write_lock");
}
/*
* release a write lock
* - advance both lock->rd_curr and lock->curr by one to enable the next lock to be granted
*/
static inline void write_unlock(rwlock_t *lock)
{
u32 eax;
rwlock_debug(lock,"-->write_unlock");
asm volatile(
" # begin write_unlock \n"
" movw 0(%3),%%ax \n"
" incb %%al \n" /* lock->rd_curr++ */
" incb %%ah \n" /* lock->curr++ */
" movw %%ax,0(%3) \n"
" # end write_unlock \n"
: "=m"(lock), "=&a"(eax)
: "m"(lock), "r"(lock)
: "cc"
);
rwlock_debug(lock,"<--write_unlock");
}
/*
* obtain a read lock
* - pull the next ticket from lock->ticket (which is subsequently incremented)
* - spin until lock->rd_curr catches up to the value that lock->ticket had before the XADD
* - lock->rd_curr is then advanced by one to allow the next read lock to be granted
* - lock->curr is irrelevant
*/
static inline void read_lock(rwlock_t *lock)
{
u32 eax;
rwlock_debug(lock,"-->read_lock");
asm volatile(
" # begin read_lock \n"
LOCK_PREFIX " xaddb %%ah,2(%3) \n" /* my ticket in AH */
"0: movb 0(%3),%%al \n" /* AL = lock->rd_curr */
" cmpb %%al,%%ah \n" /* if (ticket!=lock->rd_curr) */
" jne 2f \n" /* then jump */
" incb 0(%3) \n" /* lock->rd_curr */
" .section .text.lock,\"ax\"\n"
"2: \n"
" rep; nop \n"
" jmp 0b \n"
" .previous \n"
" # end read_lock \n"
: "=m"(lock), "=a"(eax)
: "m"(lock), "r"(lock), "a"(0x0100)
: "memory", "cc"
);
rwlock_debug(lock,"<--read_lock");
}
/*
* release a read lock
* - just advance the lock->curr count so that any spinning write lock can happen
*/
static inline void read_unlock(rwlock_t *lock)
{
rwlock_debug(lock,"-->read_unlock");
asm volatile(
" # begin read_unlock \n"
LOCK_PREFIX " incb %0 \n" /* lock->curr++ */
" # end read_unlock \n"
: "=m"(lock->curr)
: "m"(lock->curr)
: "cc"
);
rwlock_debug(lock,"<--read_unlock");
}
/*****************************************************************************/
/*
* testing stuff
*/
rwlock_t mylock = RWLOCK_UNLOCKED;
#define wibble() asm("nop")
void get_read_lock(void)
{
wibble();
read_lock(&mylock);
read_lock(&mylock);
read_unlock(&mylock);
wibble();
read_lock(&mylock);
read_unlock(&mylock);
read_unlock(&mylock);
wibble();
}
void get_write_lock(void)
{
wibble();
write_lock(&mylock);
wibble();
write_unlock(&mylock);
wibble();
}
int main()
{
get_read_lock();
get_write_lock();
get_write_lock();
get_read_lock();
return 0;
}
--Multipart_Fri_Nov__8_17:34:43_2002-1--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/