Skip to content

Commit

Permalink
Merge branch 'rwsem-optimizations'
Browse files Browse the repository at this point in the history
Merge rwsem optimizations from Michel Lespinasse:
 "These patches extend Alex Shi's work (which added write lock stealing
  on the rwsem slow path) in order to provide rwsem write lock stealing
  on the fast path (that is, without taking the rwsem's wait_lock).

  I have unfortunately been unable to push this through -next before due
  to Ingo Molnar / David Howells / Peter Zijlstra being busy with other
  things.  However, this has gotten some attention from Rik van Riel and
  Davidlohr Bueso who both commented that they felt this was ready for
  v3.10, and Ingo Molnar has said that he was OK with me pushing
  directly to you.  So, here goes :)

  Davidlohr got the following test results from pgbench running on a
  quad-core laptop:

    | db_size | clients  |  tps-vanilla   |   tps-rwsem  |
    +---------+----------+----------------+--------------+
    | 160 MB   |       1 |           5803 |         6906 | + 19.0%
    | 160 MB   |       2 |          13092 |        15931 |
    | 160 MB   |       4 |          29412 |        33021 |
    | 160 MB   |       8 |          32448 |        34626 |
    | 160 MB   |      16 |          32758 |        33098 |
    | 160 MB   |      20 |          26940 |        31343 | + 16.3%
    | 160 MB   |      30 |          25147 |        28961 |
    | 160 MB   |      40 |          25484 |        26902 |
    | 160 MB   |      50 |          24528 |        25760 |
    ------------------------------------------------------
    | 1.6 GB   |       1 |           5733 |         7729 | + 34.8%
    | 1.6 GB   |       2 |           9411 |        19009 | + 101.9%
    | 1.6 GB   |       4 |          31818 |        33185 |
    | 1.6 GB   |       8 |          33700 |        34550 |
    | 1.6 GB   |      16 |          32751 |        33079 |
    | 1.6 GB   |      20 |          30919 |        31494 |
    | 1.6 GB   |      30 |          28540 |        28535 |
    | 1.6 GB   |      40 |          26380 |        27054 |
    | 1.6 GB   |      50 |          25241 |        25591 |
    ------------------------------------------------------
    | 7.6 GB   |       1 |           5779 |         6224 |
    | 7.6 GB   |       2 |          10897 |        13611 | + 24.9%
    | 7.6 GB   |       4 |          32683 |        33108 |
    | 7.6 GB   |       8 |          33968 |        34712 |
    | 7.6 GB   |      16 |          32287 |        32895 |
    | 7.6 GB   |      20 |          27770 |        31689 | + 14.1%
    | 7.6 GB   |      30 |          26739 |        29003 |
    | 7.6 GB   |      40 |          24901 |        26683 |
    | 7.6 GB   |      50 |          17115 |        25925 | + 51.5%
    ------------------------------------------------------

  (Davidlohr also has one additional patch which further improves
  throughput, though I will ask him to send it directly to you as I have
  suggested some minor changes)."

* emailed patches from Michel Lespinasse <walken@google.com>:
  rwsem: no need for explicit signed longs
  x86 rwsem: avoid taking slow path when stealing write lock
  rwsem: do not block readers at head of queue if other readers are active
  rwsem: implement support for write lock stealing on the fastpath
  rwsem: simplify __rwsem_do_wake
  rwsem: skip initial trylock in rwsem_down_write_failed
  rwsem: avoid taking wait_lock in rwsem_down_write_failed
  rwsem: use cmpxchg for trying to steal write lock
  rwsem: more agressive lock stealing in rwsem_down_write_failed
  rwsem: simplify rwsem_down_write_failed
  rwsem: simplify rwsem_down_read_failed
  rwsem: move rwsem_down_failed_common code into rwsem_down_{read,write}_failed
  rwsem: shorter spinlocked section in rwsem_down_failed_common()
  rwsem: make the waiter type an enumeration rather than a bitmask
  • Loading branch information
Linus Torvalds committed May 7, 2013
2 parents 0f47c94 + b5f5418 commit c8de2fa
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 153 deletions.
28 changes: 21 additions & 7 deletions arch/x86/include/asm/rwsem.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
" test %1,%1\n\t"
/* was the count 0 before? */
" test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
/* was the active mask 0 before? */
" jz 1f\n"
" call call_rwsem_down_write_failed\n"
"1:\n"
Expand All @@ -126,11 +126,25 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
long result, tmp;
asm volatile("# beginning __down_write_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
" test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
/* was the active mask 0 before? */
" jnz 2f\n\t"
" mov %1,%2\n\t"
" add %3,%2\n\t"
LOCK_PREFIX " cmpxchg %2,%0\n\t"
" jnz 1b\n\t"
"2:\n\t"
" sete %b1\n\t"
" movzbl %b1, %k1\n\t"
"# ending __down_write_trylock\n\t"
: "+m" (sem->count), "=&a" (result), "=&r" (tmp)
: "er" (RWSEM_ACTIVE_WRITE_BIAS)
: "memory", "cc");
return result;
}

/*
Expand Down
38 changes: 16 additions & 22 deletions lib/rwsem-spinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
#include <linux/sched.h>
#include <linux/export.h>

enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};

struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
unsigned int flags;
#define RWSEM_WAITING_FOR_READ 0x00000001
#define RWSEM_WAITING_FOR_WRITE 0x00000002
enum rwsem_waiter_type type;
};

int rwsem_is_locked(struct rw_semaphore *sem)
Expand Down Expand Up @@ -67,26 +70,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)

waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);

if (!wakewrite) {
if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
goto out;
goto dont_wake_writers;
}

/*
* as we support write lock stealing, we can't set sem->activity
* to -1 here to indicate we get the lock. Instead, we wake it up
* to let it go get it again.
*/
if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
wake_up_process(waiter->task);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wakewrite)
/* Wake up a writer. Note that we do not grant it the
* lock - it will have to acquire it when it runs. */
wake_up_process(waiter->task);
goto out;
}

/* grant an infinite number of read locks to the front of the queue */
dont_wake_writers:
woken = 0;
while (waiter->flags & RWSEM_WAITING_FOR_READ) {
do {
struct list_head *next = waiter->list.next;

list_del(&waiter->list);
Expand All @@ -96,10 +90,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
wake_up_process(tsk);
put_task_struct(tsk);
woken++;
if (list_empty(&sem->wait_list))
if (next == &sem->wait_list)
break;
waiter = list_entry(next, struct rwsem_waiter, list);
}
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);

sem->activity += woken;

Expand Down Expand Up @@ -144,7 +138,7 @@ void __sched __down_read(struct rw_semaphore *sem)

/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.flags = RWSEM_WAITING_FOR_READ;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(tsk);

list_add_tail(&waiter.list, &sem->wait_list);
Expand Down Expand Up @@ -201,7 +195,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
/* set up my own style of waitqueue */
tsk = current;
waiter.task = tsk;
waiter.flags = RWSEM_WAITING_FOR_WRITE;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);

/* wait for someone to release the lock */
Expand Down
Loading

0 comments on commit c8de2fa

Please sign in to comment.