Skip to content

Commit

Permalink
Merge tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/brauner/linux

Pull time namespace updates from Christian Brauner:
 "When time namespaces were introduced we missed to virtualize the
  'btime' field in /proc/stat. This confuses tasks which are in another
  time namespace with a virtualized boottime which is common in some
  container workloads. This contains Michael's series to fix 'btime'
  which Thomas asked me to take through my tree.

  To fix 'btime' virtualization we simply subtract the offset of the
  time namespace's boottime from btime before printing the stats. Note
  that since start_boottime of processes are seconds since boottime and
  the boottime stamp is now shifted according to the time namespace's
  offset, the offset of the time namespace also needs to be applied
  before the process stats are given to userspace. This avoids that
  processes shown by tools such as 'ps' appear as time travelers in the
  corresponding time namespace.

  Selftests are included to verify that btime virtualization in
  /proc/stat works as expected"

* tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  namespace: make timens_on_fork() return nothing
  selftests/timens: added selftest for /proc/stat btime
  fs/proc: apply the time namespace offset to /proc/stat btime
  timens: additional helper functions for boottime offset handling
  • Loading branch information
Linus Torvalds committed Dec 15, 2020
2 parents 0ca2ce8 + 5c62634 commit 6d93a19
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 16 deletions.
6 changes: 4 additions & 2 deletions fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/tty.h>
Expand Down Expand Up @@ -533,8 +534,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task);
nice = task_nice(task);

/* convert nsec -> ticks */
start_time = nsec_to_clock_t(task->start_boottime);
/* apply timens offset for boottime and convert nsec -> ticks */
start_time =
nsec_to_clock_t(timens_add_boottime_ns(task->start_boottime));

seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " (");
Expand Down
3 changes: 3 additions & 0 deletions fs/proc/stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/irqnr.h>
#include <linux/sched/cputime.h>
#include <linux/tick.h>
Expand Down Expand Up @@ -118,6 +119,8 @@ static int show_stat(struct seq_file *p, void *v)
irq = softirq = steal = 0;
guest = guest_nice = 0;
getboottime64(&boottime);
/* shift boot timestamp according to the timens offset */
timens_sub_boottime(&boottime);

for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;
Expand Down
28 changes: 25 additions & 3 deletions include/linux/time_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags,
struct user_namespace *user_ns,
struct time_namespace *old_ns);
void free_time_ns(struct kref *kref);
int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct vdso_data *arch_get_vdso_data(void *vvar_page);

static inline void put_time_ns(struct time_namespace *ns)
Expand Down Expand Up @@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->boottime);
}

static inline u64 timens_add_boottime_ns(u64 nsec)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;

return nsec + timespec64_to_ns(&ns_offsets->boottime);
}

static inline void timens_sub_boottime(struct timespec64 *ts)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;

*ts = timespec64_sub(*ts, ns_offsets->boottime);
}

ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *offsets);

Expand Down Expand Up @@ -122,14 +136,22 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return old_ns;
}

static inline int timens_on_fork(struct nsproxy *nsproxy,
static inline void timens_on_fork(struct nsproxy *nsproxy,
struct task_struct *tsk)
{
return 0;
return;
}

static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { }

static inline u64 timens_add_boottime_ns(u64 nsec)
{
return nsec;
}

static inline void timens_sub_boottime(struct timespec64 *ts) { }

static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
{
return tim;
Expand Down
7 changes: 1 addition & 6 deletions kernel/nsproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
int ret;

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
Expand All @@ -180,11 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns))
return PTR_ERR(new_ns);

ret = timens_on_fork(new_ns, tsk);
if (ret) {
free_nsproxy(new_ns);
return ret;
}
timens_on_fork(new_ns, tsk);

tsk->nsproxy = new_ns;
return 0;
Expand Down
6 changes: 2 additions & 4 deletions kernel/time/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,22 +308,20 @@ static int timens_install(struct nsset *nsset, struct ns_common *new)
return 0;
}

int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
{
struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
struct time_namespace *ns = to_time_ns(nsc);

/* create_new_namespaces() already incremented the ref counter */
if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return 0;
return;

get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;

timens_commit(tsk, ns);

return 0;
}

static struct user_namespace *timens_owner(struct ns_common *ns)
Expand Down
58 changes: 57 additions & 1 deletion tools/testing/selftests/timens/procfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime)
return 0;
}

static int read_proc_stat_btime(unsigned long long *boottime_sec)
{
FILE *proc;
char line_buf[2048];

proc = fopen("/proc/stat", "r");
if (proc == NULL) {
pr_perror("Unable to open /proc/stat");
return -1;
}

while (fgets(line_buf, 2048, proc)) {
if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
continue;
fclose(proc);
return 0;
}
if (errno) {
pr_perror("fscanf");
fclose(proc);
return -errno;
}
pr_err("failed to parse /proc/stat");
fclose(proc);
return -1;
}

static int check_uptime(void)
{
struct timespec uptime_new, uptime_old;
Expand Down Expand Up @@ -123,18 +150,47 @@ static int check_uptime(void)
return 0;
}

static int check_stat_btime(void)
{
unsigned long long btime_new, btime_old;
unsigned long long btime_expected;

if (switch_ns(parent_ns))
return pr_err("switch_ns(%d)", parent_ns);

if (read_proc_stat_btime(&btime_old))
return 1;

if (switch_ns(child_ns))
return pr_err("switch_ns(%d)", child_ns);

if (read_proc_stat_btime(&btime_new))
return 1;

btime_expected = btime_old - TEN_DAYS_IN_SEC;
if (btime_new != btime_expected) {
pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
btime_old, btime_new, btime_expected);
return 1;
}

ksft_test_result_pass("Passed for /proc/stat btime\n");
return 0;
}

int main(int argc, char *argv[])
{
int ret = 0;

nscheck();

ksft_set_plan(1);
ksft_set_plan(2);

if (init_namespaces())
return 1;

ret |= check_uptime();
ret |= check_stat_btime();

if (ret)
ksft_exit_fail();
Expand Down

0 comments on commit 6d93a19

Please sign in to comment.