Skip to content

Commit

Permalink
Merge branch 'for-next/seccomp' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/kees/linux.git
  • Loading branch information
Stephen Rothwell committed Aug 1, 2023
2 parents c021380 + 7d5cb68 commit ad4918d
Show file tree
Hide file tree
Showing 18 changed files with 357 additions and 28 deletions.
1 change: 1 addition & 0 deletions include/linux/completion.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);

extern void complete(struct completion *);
extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);

#endif
2 changes: 1 addition & 1 deletion include/linux/swait.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)

extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);

extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/wait.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
}

int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
Expand Down Expand Up @@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
#define wake_up_poll_on_current_cpu(x, m) \
__wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m) \
Expand Down
4 changes: 4 additions & 0 deletions include/uapi/linux/seccomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ struct seccomp_notif_resp {
__u32 flags;
};

#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)

/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
Expand Down Expand Up @@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
struct seccomp_notif_addfd)

#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)

#endif /* _UAPI_LINUX_SECCOMP_H */
26 changes: 18 additions & 8 deletions kernel/sched/completion.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,23 @@
* Waiting for completion is a typically sync point, but not an exclusion point.
*/

static void complete_with_flags(struct completion *x, int wake_flags)
{
unsigned long flags;

raw_spin_lock_irqsave(&x->wait.lock, flags);

if (x->done != UINT_MAX)
x->done++;
swake_up_locked(&x->wait, wake_flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}

void complete_on_current_cpu(struct completion *x)
{
return complete_with_flags(x, WF_CURRENT_CPU);
}

/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
Expand All @@ -27,14 +44,7 @@
*/
void complete(struct completion *x)
{
unsigned long flags;

raw_spin_lock_irqsave(&x->wait.lock, flags);

if (x->done != UINT_MAX)
x->done++;
swake_up_locked(&x->wait);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
complete_with_flags(x, 0);
}
EXPORT_SYMBOL(complete);

Expand Down
5 changes: 2 additions & 3 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
* Return: %true if @p->state changes (an actual wakeup was done),
* %false otherwise.
*/
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
unsigned long flags;
int cpu, success = 0;
Expand Down Expand Up @@ -7032,7 +7031,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
return try_to_wake_up(curr->private, mode, wake_flags);
}
EXPORT_SYMBOL(default_wake_function);
Expand Down
4 changes: 4 additions & 0 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -7821,6 +7821,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (wake_flags & WF_TTWU) {
record_wakee(p);

if ((wake_flags & WF_CURRENT_CPU) &&
cpumask_test_cpu(cpu, p->cpus_ptr))
return cpu;

if (sched_energy_enabled()) {
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
if (new_cpu >= 0)
Expand Down
13 changes: 8 additions & 5 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -2135,12 +2135,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
}

/* Wake flags. The first three directly map to some SD flag value */
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */

#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */

#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
Expand Down Expand Up @@ -3232,6 +3233,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
extern void swake_up_all_locked(struct swait_queue_head *q);
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);

extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);

#ifdef CONFIG_PREEMPT_DYNAMIC
extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
Expand Down
8 changes: 4 additions & 4 deletions kernel/sched/swait.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ EXPORT_SYMBOL(__init_swait_queue_head);
* If for some reason it would return 0, that means the previously waiting
* task is already running, so it will observe condition true (or has already).
*/
void swake_up_locked(struct swait_queue_head *q)
void swake_up_locked(struct swait_queue_head *q, int wake_flags)
{
struct swait_queue *curr;

if (list_empty(&q->task_list))
return;

curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
wake_up_process(curr->task);
try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
list_del_init(&curr->task_list);
}
EXPORT_SYMBOL(swake_up_locked);
Expand All @@ -41,15 +41,15 @@ EXPORT_SYMBOL(swake_up_locked);
void swake_up_all_locked(struct swait_queue_head *q)
{
while (!list_empty(&q->task_list))
swake_up_locked(q);
swake_up_locked(q, 0);
}

void swake_up_one(struct swait_queue_head *q)
{
unsigned long flags;

raw_spin_lock_irqsave(&q->lock, flags);
swake_up_locked(q);
swake_up_locked(q, 0);
raw_spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(swake_up_one);
Expand Down
5 changes: 5 additions & 0 deletions kernel/sched/wait.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
}
EXPORT_SYMBOL(__wake_up);

void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
{
__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
}

/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
*/
Expand Down
72 changes: 65 additions & 7 deletions kernel/seccomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,12 @@ struct seccomp_kaddfd {
* filter->notify_lock.
* @next_id: The id of the next request.
* @notifications: A list of struct seccomp_knotif elements.
* @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
*/

struct notification {
struct semaphore request;
atomic_t requests;
u32 flags;
u64 next_id;
struct list_head notifications;
};
Expand Down Expand Up @@ -1116,8 +1119,11 @@ static int seccomp_do_user_notification(int this_syscall,
list_add_tail(&n.list, &match->notif->notifications);
INIT_LIST_HEAD(&n.addfd);

up(&match->notif->request);
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
atomic_inc(&match->notif->requests);
if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
else
wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);

/*
* This is where we wait for a reply from userspace.
Expand Down Expand Up @@ -1450,6 +1456,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
return NULL;
}

static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
/* Avoid a wakeup if event not interesting for us. */
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}

static int recv_wait_event(struct seccomp_filter *filter)
{
DEFINE_WAIT_FUNC(wait, recv_wake_function);
int ret;

if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
return 0;

for (;;) {
ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);

if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
break;

if (ret)
return ret;

schedule();
}
finish_wait(&filter->wqh, &wait);
return 0;
}

static long seccomp_notify_recv(struct seccomp_filter *filter,
void __user *buf)
Expand All @@ -1467,7 +1504,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,

memset(&unotif, 0, sizeof(unotif));

ret = down_interruptible(&filter->notif->request);
ret = recv_wait_event(filter);
if (ret < 0)
return ret;

Expand Down Expand Up @@ -1515,7 +1552,8 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
if (should_sleep_killable(filter, knotif))
complete(&knotif->ready);
knotif->state = SECCOMP_NOTIFY_INIT;
up(&filter->notif->request);
atomic_inc(&filter->notif->requests);
wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
}
mutex_unlock(&filter->notify_lock);
}
Expand Down Expand Up @@ -1561,7 +1599,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
knotif->error = resp.error;
knotif->val = resp.val;
knotif->flags = resp.flags;
complete(&knotif->ready);
if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
complete_on_current_cpu(&knotif->ready);
else
complete(&knotif->ready);
out:
mutex_unlock(&filter->notify_lock);
return ret;
Expand Down Expand Up @@ -1591,6 +1632,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
return ret;
}

static long seccomp_notify_set_flags(struct seccomp_filter *filter,
unsigned long flags)
{
long ret;

if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
return -EINVAL;

ret = mutex_lock_interruptible(&filter->notify_lock);
if (ret < 0)
return ret;
filter->notif->flags = flags;
mutex_unlock(&filter->notify_lock);
return 0;
}

static long seccomp_notify_addfd(struct seccomp_filter *filter,
struct seccomp_notif_addfd __user *uaddfd,
unsigned int size)
Expand Down Expand Up @@ -1720,6 +1777,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
return seccomp_notify_set_flags(filter, arg);
}

/* Extensible Argument ioctls */
Expand Down Expand Up @@ -1777,7 +1836,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
if (!filter->notif)
goto out;

sema_init(&filter->notif->request, 0);
filter->notif->next_id = get_random_u64();
INIT_LIST_HEAD(&filter->notif->notifications);

Expand Down
3 changes: 3 additions & 0 deletions tools/arch/x86/include/uapi/asm/unistd_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@
#ifndef __NR_setns
#define __NR_setns 346
#endif
#ifdef __NR_seccomp
#define __NR_seccomp 354
#endif
3 changes: 3 additions & 0 deletions tools/arch/x86/include/uapi/asm/unistd_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@
#ifndef __NR_getcpu
#define __NR_getcpu 309
#endif
#ifndef __NR_seccomp
#define __NR_seccomp 317
#endif
1 change: 1 addition & 0 deletions tools/perf/bench/Build
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
perf-y += sched-seccomp-notify.o
perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
Expand Down
1 change: 1 addition & 0 deletions tools/perf/bench/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
int bench_sched_seccomp_notify(int argc, const char **argv);
int bench_syscall_basic(int argc, const char **argv);
int bench_syscall_getpgid(int argc, const char **argv);
int bench_syscall_fork(int argc, const char **argv);
Expand Down
Loading

0 comments on commit ad4918d

Please sign in to comment.