Skip to content

Commit

Permalink
Merge tag 'mm-hotfixes-stable-2024-11-09-22-40' of git://git.kernel.o…
Browse files Browse the repository at this point in the history
…rg/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "20 hotfixes, 14 of which are cc:stable.

  Three affect DAMON. Lorenzo's five-patch series to address the
  mmap_region error handling is here also.

  Apart from that, various singletons"

* tag 'mm-hotfixes-stable-2024-11-09-22-40' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mailmap: add entry for Thorsten Blum
  ocfs2: remove entry once instead of null-ptr-dereference in ocfs2_xa_remove()
  signal: restore the override_rlimit logic
  fs/proc: fix compile warning about variable 'vmcore_mmap_ops'
  ucounts: fix counter leak in inc_rlimit_get_ucounts()
  selftests: hugetlb_dio: check for initial conditions to skip in the start
  mm: fix docs for the kernel parameter ``thp_anon=``
  mm/damon/core: avoid overflow in damon_feed_loop_next_input()
  mm/damon/core: handle zero schemes apply interval
  mm/damon/core: handle zero {aggregation,ops_update} intervals
  mm/mlock: set the correct prev on failure
  objpool: fix to make percpu slot allocation more robust
  mm/page_alloc: keep track of free highatomic
  mm: resolve faulty mmap_region() error path behaviour
  mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling
  mm: refactor map_deny_write_exec()
  mm: unconditionally close VMAs on error
  mm: avoid unsafe VMA hook invocation when error arises on mmap hook
  mm/thp: fix deferred split unqueue naming and locking
  mm/thp: fix deferred split queue not partially_mapped
  • Loading branch information
Linus Torvalds committed Nov 10, 2024
2 parents a558cc3 + c289f4d commit 28e4319
Show file tree
Hide file tree
Showing 30 changed files with 329 additions and 172 deletions.
1 change: 1 addition & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ Tomeu Vizoso <tomeu@tomeuvizoso.net> <tomeu.vizoso@collabora.com>
Thomas Graf <tgraf@suug.ch>
Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
Thomas Pedersen <twp@codeaurora.org>
Thorsten Blum <thorsten.blum@linux.dev> <thorsten.blum@toblux.com>
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org>
Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
Expand Down
2 changes: 1 addition & 1 deletion Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6688,7 +6688,7 @@
0: no polling (default)

thp_anon= [KNL]
Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>
Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
state is one of "always", "madvise", "never" or "inherit".
Control the default behavior of the system with respect
to anonymous transparent hugepages.
Expand Down
2 changes: 1 addition & 1 deletion Documentation/admin-guide/mm/transhuge.rst
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ control by passing the parameter ``transparent_hugepage=always`` or
kernel command line.

Alternatively, each supported anonymous THP size can be controlled by
passing ``thp_anon=<size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>``,
passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
``never`` or ``inherit``.
Expand Down
10 changes: 7 additions & 3 deletions arch/arm64/include/asm/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#ifndef BUILD_VDSO
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/shmem_fs.h>
#include <linux/types.h>

static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
Expand All @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)

static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
unsigned long flags)
{
/*
* Only allow MTE on anonymous mappings as these are guaranteed to be
* backed by tags-capable memory. The vm_flags may be overridden by a
* filesystem supporting MTE (RAM-based).
*/
if (system_supports_mte() && (flags & MAP_ANONYMOUS))
if (system_supports_mte() &&
((flags & MAP_ANONYMOUS) || shmem_file(file)))
return VM_MTE_ALLOWED;

return 0;
}
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)

static inline bool arch_validate_prot(unsigned long prot,
unsigned long addr __always_unused)
Expand Down
5 changes: 3 additions & 2 deletions arch/parisc/include/asm/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#ifndef __ASM_MMAN_H__
#define __ASM_MMAN_H__

#include <linux/fs.h>
#include <uapi/asm/mman.h>

/* PARISC cannot allow mdwe as it needs writable stacks */
Expand All @@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void)
}
#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported

static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags)
{
/*
* The stack on parisc grows upwards, so if userspace requests memory
Expand All @@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)

return 0;
}
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)

#endif /* __ASM_MMAN_H__ */
3 changes: 1 addition & 2 deletions fs/ocfs2/xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2036,8 +2036,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
rc = 0;
ocfs2_xa_cleanup_value_truncate(loc, "removing",
orig_clusters);
if (rc)
goto out;
goto out;
}
}

Expand Down
9 changes: 5 additions & 4 deletions fs/proc/vmcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,10 +457,6 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
#endif
}

static const struct vm_operations_struct vmcore_mmap_ops = {
.fault = mmap_vmcore_fault,
};

/**
* vmcore_alloc_buf - allocate buffer in vmalloc memory
* @size: size of buffer
Expand Down Expand Up @@ -488,6 +484,11 @@ static inline char *vmcore_alloc_buf(size_t size)
* virtually contiguous user-space in ELF layout.
*/
#ifdef CONFIG_MMU

static const struct vm_operations_struct vmcore_mmap_ops = {
.fault = mmap_vmcore_fault,
};

/*
* remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
* reported as not being ram with the zero page.
Expand Down
28 changes: 22 additions & 6 deletions include/linux/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#ifndef _LINUX_MMAN_H
#define _LINUX_MMAN_H

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/percpu_counter.h>

Expand Down Expand Up @@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages)
#endif

#ifndef arch_calc_vm_flag_bits
#define arch_calc_vm_flag_bits(flags) 0
#define arch_calc_vm_flag_bits(file, flags) 0
#endif

#ifndef arch_validate_prot
Expand Down Expand Up @@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
* Combine the mmap "flags" argument into "vm_flags" used internally.
*/
static inline unsigned long
calc_vm_flag_bits(unsigned long flags)
calc_vm_flag_bits(struct file *file, unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
_calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
arch_calc_vm_flag_bits(flags);
arch_calc_vm_flag_bits(file, flags);
}

unsigned long vm_commit_limit(void);
Expand Down Expand Up @@ -188,16 +189,31 @@ static inline bool arch_memory_deny_write_exec_supported(void)
*
* d) mmap(PROT_READ | PROT_EXEC)
* mmap(PROT_READ | PROT_EXEC | PROT_BTI)
*
* This is only applicable if the user has set the Memory-Deny-Write-Execute
* (MDWE) protection mask for the current process.
*
* @old specifies the VMA flags the VMA originally possessed, and @new the ones
* we propose to set.
*
* Return: false if proposed change is OK, true if not ok and should be denied.
*/
static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags)
static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
{
/* If MDWE is disabled, we have nothing to deny. */
if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
return false;

if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
/* If the new VMA is not executable, we have nothing to deny. */
if (!(new & VM_EXEC))
return false;

/* Under MDWE we do not accept newly writably executable VMAs... */
if (new & VM_WRITE)
return true;

if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC))
/* ...nor previously non-executable VMAs becoming executable. */
if (!(old & VM_EXEC))
return true;

return false;
Expand Down
1 change: 1 addition & 0 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ struct zone {
unsigned long watermark_boost;

unsigned long nr_reserved_highatomic;
unsigned long nr_free_highatomic;

/*
* We don't know if the memory that we're going to allocate will be
Expand Down
3 changes: 2 additions & 1 deletion include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty

long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
bool override_rlimit);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);

Expand Down
3 changes: 2 additions & 1 deletion kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
*/
rcu_read_lock();
ucounts = task_ucounts(t);
sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING,
override_rlimit);
rcu_read_unlock();
if (!sigpending)
return NULL;
Expand Down
9 changes: 5 additions & 4 deletions kernel/ucount.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type)
do_dec_rlimit_put_ucounts(ucounts, NULL, type);
}

long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type)
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
bool override_rlimit)
{
/* Caller must hold a reference to ucounts */
struct ucounts *iter;
Expand All @@ -317,10 +318,11 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type)
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long new = atomic_long_add_return(1, &iter->rlimit[type]);
if (new < 0 || new > max)
goto unwind;
goto dec_unwind;
if (iter == ucounts)
ret = new;
max = get_userns_rlimit_max(iter->ns, type);
if (!override_rlimit)
max = get_userns_rlimit_max(iter->ns, type);
/*
* Grab an extra ucount reference for the caller when
* the rlimit count was previously 0.
Expand All @@ -334,7 +336,6 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type)
dec_unwind:
dec = atomic_long_sub_return(1, &iter->rlimit[type]);
WARN_ON_ONCE(dec < 0);
unwind:
do_dec_rlimit_put_ucounts(ucounts, iter, type);
return 0;
}
Expand Down
18 changes: 12 additions & 6 deletions lib/objpool.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
* warm caches and TLB hits. in default vmalloc is used to
* reduce the pressure of kernel slab system. as we know,
* mimimal size of vmalloc is one page since vmalloc would
* always align the requested size to page size
* always align the requested size to page size.
* but if vmalloc fails or it is not available (e.g. GFP_ATOMIC)
* allocate percpu slot with kmalloc.
*/
if ((pool->gfp & GFP_ATOMIC) == GFP_ATOMIC)
slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
else
slot = NULL;

if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC)
slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
cpu_to_node(i), __builtin_return_address(0));
if (!slot)
return -ENOMEM;

if (!slot) {
slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
if (!slot)
return -ENOMEM;
}
memset(slot, 0, size);
pool->cpu_slots[i] = slot;

Expand Down
42 changes: 28 additions & 14 deletions mm/damon/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_for_each_scheme(s, c) {
struct damos_quota *quota = &s->quota;

if (c->passed_sample_intervals != s->next_apply_sis)
if (c->passed_sample_intervals < s->next_apply_sis)
continue;

if (!s->wmarks.activated)
Expand Down Expand Up @@ -1456,17 +1456,31 @@ static unsigned long damon_feed_loop_next_input(unsigned long last_input,
unsigned long score)
{
const unsigned long goal = 10000;
unsigned long score_goal_diff = max(goal, score) - min(goal, score);
unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal;
unsigned long compensation = last_input * score_goal_diff_bp / 10000;
/* Set minimum input as 10000 to avoid compensation be zero */
const unsigned long min_input = 10000;
unsigned long score_goal_diff, compensation;
bool over_achieving = score > goal;

if (goal > score)
if (score == goal)
return last_input;
if (score >= goal * 2)
return min_input;

if (over_achieving)
score_goal_diff = score - goal;
else
score_goal_diff = goal - score;

if (last_input < ULONG_MAX / score_goal_diff)
compensation = last_input * score_goal_diff / goal;
else
compensation = last_input / goal * score_goal_diff;

if (over_achieving)
return max(last_input - compensation, min_input);
if (last_input < ULONG_MAX - compensation)
return last_input + compensation;
if (last_input > compensation + min_input)
return last_input - compensation;
return min_input;
return ULONG_MAX;
}

#ifdef CONFIG_PSI
Expand Down Expand Up @@ -1622,7 +1636,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
bool has_schemes_to_apply = false;

damon_for_each_scheme(s, c) {
if (c->passed_sample_intervals != s->next_apply_sis)
if (c->passed_sample_intervals < s->next_apply_sis)
continue;

if (!s->wmarks.activated)
Expand All @@ -1642,9 +1656,9 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
}

damon_for_each_scheme(s, c) {
if (c->passed_sample_intervals != s->next_apply_sis)
if (c->passed_sample_intervals < s->next_apply_sis)
continue;
s->next_apply_sis +=
s->next_apply_sis = c->passed_sample_intervals +
(s->apply_interval_us ? s->apply_interval_us :
c->attrs.aggr_interval) / sample_interval;
}
Expand Down Expand Up @@ -2000,7 +2014,7 @@ static int kdamond_fn(void *data)
if (ctx->ops.check_accesses)
max_nr_accesses = ctx->ops.check_accesses(ctx);

if (ctx->passed_sample_intervals == next_aggregation_sis) {
if (ctx->passed_sample_intervals >= next_aggregation_sis) {
kdamond_merge_regions(ctx,
max_nr_accesses / 10,
sz_limit);
Expand All @@ -2018,7 +2032,7 @@ static int kdamond_fn(void *data)

sample_interval = ctx->attrs.sample_interval ?
ctx->attrs.sample_interval : 1;
if (ctx->passed_sample_intervals == next_aggregation_sis) {
if (ctx->passed_sample_intervals >= next_aggregation_sis) {
ctx->next_aggregation_sis = next_aggregation_sis +
ctx->attrs.aggr_interval / sample_interval;

Expand All @@ -2028,7 +2042,7 @@ static int kdamond_fn(void *data)
ctx->ops.reset_aggregated(ctx);
}

if (ctx->passed_sample_intervals == next_ops_update_sis) {
if (ctx->passed_sample_intervals >= next_ops_update_sis) {
ctx->next_ops_update_sis = next_ops_update_sis +
ctx->attrs.ops_update_interval /
sample_interval;
Expand Down
Loading

0 comments on commit 28e4319

Please sign in to comment.