Skip to content

Commit e3f2dde

Browse files
Ingo MolnarLinus Torvalds
authored andcommitted
[PATCH] pi-futex: robust-futex exit
Fix robust PI-futexes to be properly unlocked on unexpected exit. For this to work the kernel has to know whether a futex is a PI or a non-PI one, because the semantics are different. Since the space in relevant glibc data structures is extremely scarce, the best solution is to encode the 'PI' information in bit 0 of the robust list pointer. Existing (non-PI) glibc robust futexes have this bit always zero, so the ABI is kept. New glibc with PI-robust-futexes will set this bit. Further fixes from Thomas Gleixner <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Signed-off-by: Ulrich Drepper <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 627371d commit e3f2dde

File tree

3 files changed

+89
-39
lines changed

3 files changed

+89
-39
lines changed

include/linux/futex.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ struct robust_list_head {
9696
long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
9797
u32 __user *uaddr2, u32 val2, u32 val3);
9898

99-
extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr);
99+
extern int
100+
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
100101

101102
#ifdef CONFIG_FUTEX
102103
extern void exit_robust_list(struct task_struct *curr);

kernel/futex.c

Lines changed: 62 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
495495
}
496496

497497
/*
498-
* We are the first waiter - try to look up the real owner and
499-
* attach the new pi_state to it:
498+
* We are the first waiter - try to look up the real owner and attach
499+
* the new pi_state to it, but bail out when the owner died bit is set
500+
* and TID = 0:
500501
*/
501502
pid = uval & FUTEX_TID_MASK;
503+
if (!pid && (uval & FUTEX_OWNER_DIED))
504+
return -ESRCH;
502505
p = futex_find_get_task(pid);
503506
if (!p)
504507
return -ESRCH;
@@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
579582
* kept enabled while there is PI state around. We must also
580583
* preserve the owner died bit.)
581584
*/
582-
newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid;
583-
584-
inc_preempt_count();
585-
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
586-
dec_preempt_count();
585+
if (!(uval & FUTEX_OWNER_DIED)) {
586+
newval = FUTEX_WAITERS | new_owner->pid;
587587

588-
if (curval == -EFAULT)
589-
return -EFAULT;
590-
if (curval != uval)
591-
return -EINVAL;
588+
inc_preempt_count();
589+
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
590+
dec_preempt_count();
591+
if (curval == -EFAULT)
592+
return -EFAULT;
593+
if (curval != uval)
594+
return -EINVAL;
595+
}
592596

593597
spin_lock_irq(&pi_state->owner->pi_lock);
594598
WARN_ON(list_empty(&pi_state->list));
@@ -1443,9 +1447,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
14431447
* again. If it succeeds then we can return without waking
14441448
* anyone else up:
14451449
*/
1446-
inc_preempt_count();
1447-
uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
1448-
dec_preempt_count();
1450+
if (!(uval & FUTEX_OWNER_DIED)) {
1451+
inc_preempt_count();
1452+
uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
1453+
dec_preempt_count();
1454+
}
14491455

14501456
if (unlikely(uval == -EFAULT))
14511457
goto pi_faulted;
@@ -1478,9 +1484,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
14781484
/*
14791485
* No waiters - kernel unlocks the futex:
14801486
*/
1481-
ret = unlock_futex_pi(uaddr, uval);
1482-
if (ret == -EFAULT)
1483-
goto pi_faulted;
1487+
if (!(uval & FUTEX_OWNER_DIED)) {
1488+
ret = unlock_futex_pi(uaddr, uval);
1489+
if (ret == -EFAULT)
1490+
goto pi_faulted;
1491+
}
14841492

14851493
out_unlock:
14861494
spin_unlock(&hb->lock);
@@ -1699,9 +1707,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
16991707
* Process a futex-list entry, check whether it's owned by the
17001708
* dying task, and do notification if so:
17011709
*/
1702-
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
1710+
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
17031711
{
1704-
u32 uval, nval;
1712+
u32 uval, nval, mval;
17051713

17061714
retry:
17071715
if (get_user(uval, uaddr))
@@ -1718,20 +1726,44 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
17181726
* thread-death.) The rest of the cleanup is done in
17191727
* userspace.
17201728
*/
1721-
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
1722-
uval | FUTEX_OWNER_DIED);
1729+
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1730+
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
1731+
17231732
if (nval == -EFAULT)
17241733
return -1;
17251734

17261735
if (nval != uval)
17271736
goto retry;
17281737

1729-
if (uval & FUTEX_WAITERS)
1730-
futex_wake(uaddr, 1);
1738+
/*
1739+
* Wake robust non-PI futexes here. The wakeup of
1740+
* PI futexes happens in exit_pi_state():
1741+
*/
1742+
if (!pi) {
1743+
if (uval & FUTEX_WAITERS)
1744+
futex_wake(uaddr, 1);
1745+
}
17311746
}
17321747
return 0;
17331748
}
17341749

1750+
/*
1751+
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
1752+
*/
1753+
static inline int fetch_robust_entry(struct robust_list __user **entry,
1754+
struct robust_list __user **head, int *pi)
1755+
{
1756+
unsigned long uentry;
1757+
1758+
if (get_user(uentry, (unsigned long *)head))
1759+
return -EFAULT;
1760+
1761+
*entry = (void *)(uentry & ~1UL);
1762+
*pi = uentry & 1;
1763+
1764+
return 0;
1765+
}
1766+
17351767
/*
17361768
* Walk curr->robust_list (very carefully, it's a userspace list!)
17371769
* and mark any locks found there dead, and notify any waiters.
@@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr)
17421774
{
17431775
struct robust_list_head __user *head = curr->robust_list;
17441776
struct robust_list __user *entry, *pending;
1745-
unsigned int limit = ROBUST_LIST_LIMIT;
1777+
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
17461778
unsigned long futex_offset;
17471779

17481780
/*
17491781
* Fetch the list head (which was registered earlier, via
17501782
* sys_set_robust_list()):
17511783
*/
1752-
if (get_user(entry, &head->list.next))
1784+
if (fetch_robust_entry(&entry, &head->list.next, &pi))
17531785
return;
17541786
/*
17551787
* Fetch the relative futex offset:
@@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr)
17601792
* Fetch any possibly pending lock-add first, and handle it
17611793
* if it exists:
17621794
*/
1763-
if (get_user(pending, &head->list_op_pending))
1795+
if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
17641796
return;
1797+
17651798
if (pending)
1766-
handle_futex_death((void *)pending + futex_offset, curr);
1799+
handle_futex_death((void *)pending + futex_offset, curr, pip);
17671800

17681801
while (entry != &head->list) {
17691802
/*
@@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr)
17721805
*/
17731806
if (entry != pending)
17741807
if (handle_futex_death((void *)entry + futex_offset,
1775-
curr))
1808+
curr, pi))
17761809
return;
17771810
/*
17781811
* Fetch the next entry in the list:
17791812
*/
1780-
if (get_user(entry, &entry->next))
1813+
if (fetch_robust_entry(&entry, &entry->next, &pi))
17811814
return;
17821815
/*
17831816
* Avoid excessively long or circular lists:

kernel/futex_compat.c

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,23 @@
1212

1313
#include <asm/uaccess.h>
1414

15+
16+
/*
17+
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
18+
*/
19+
static inline int
20+
fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
21+
compat_uptr_t *head, int *pi)
22+
{
23+
if (get_user(*uentry, head))
24+
return -EFAULT;
25+
26+
*entry = compat_ptr((*uentry) & ~1);
27+
*pi = (unsigned int)(*uentry) & 1;
28+
29+
return 0;
30+
}
31+
1532
/*
1633
* Walk curr->robust_list (very carefully, it's a userspace list!)
1734
* and mark any locks found there dead, and notify any waiters.
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
2239
{
2340
struct compat_robust_list_head __user *head = curr->compat_robust_list;
2441
struct robust_list __user *entry, *pending;
42+
unsigned int limit = ROBUST_LIST_LIMIT, pi;
2543
compat_uptr_t uentry, upending;
26-
unsigned int limit = ROBUST_LIST_LIMIT;
2744
compat_long_t futex_offset;
2845

2946
/*
3047
* Fetch the list head (which was registered earlier, via
3148
* sys_set_robust_list()):
3249
*/
33-
if (get_user(uentry, &head->list.next))
50+
if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
3451
return;
35-
entry = compat_ptr(uentry);
3652
/*
3753
* Fetch the relative futex offset:
3854
*/
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
4258
* Fetch any possibly pending lock-add first, and handle it
4359
* if it exists:
4460
*/
45-
if (get_user(upending, &head->list_op_pending))
61+
if (fetch_robust_entry(&upending, &pending,
62+
&head->list_op_pending, &pi))
4663
return;
47-
pending = compat_ptr(upending);
4864
if (upending)
49-
handle_futex_death((void *)pending + futex_offset, curr);
65+
handle_futex_death((void *)pending + futex_offset, curr, pi);
5066

5167
while (compat_ptr(uentry) != &head->list) {
5268
/*
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
5571
*/
5672
if (entry != pending)
5773
if (handle_futex_death((void *)entry + futex_offset,
58-
curr))
74+
curr, pi))
5975
return;
6076

6177
/*
6278
* Fetch the next entry in the list:
6379
*/
64-
if (get_user(uentry, (compat_uptr_t *)&entry->next))
80+
if (fetch_robust_entry(&uentry, &entry,
81+
(compat_uptr_t *)&entry->next, &pi))
6582
return;
66-
entry = compat_ptr(uentry);
6783
/*
6884
* Avoid excessively long or circular lists:
6985
*/

0 commit comments

Comments
 (0)