Skip to content

Commit

Permalink
per-netns ipv4 sysctl_tcp_mem
Browse files Browse the repository at this point in the history
This patch allows each namespace to independently set up
its levels for tcp memory pressure thresholds. This patch
alone does not buy much: we need to make this values
per group of process somehow. This is achieved in the
patches that follows in this patchset.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Glauber Costa authored and David S. Miller committed Dec 13, 2011
1 parent d1a4c0b commit 3dc43e3
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 22 deletions.
1 change: 1 addition & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct netns_ipv4 {
int current_rt_cache_rebuild_count;

unsigned int sysctl_ping_group_range[2];
long sysctl_tcp_mem[3];

atomic_t rt_genid;
atomic_t dev_addr_genid;
Expand Down
1 change: 0 additions & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
Expand Down
2 changes: 2 additions & 0 deletions net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,8 @@ static int __init inet_init(void)
ip_static_sysctl_init();
#endif

tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;

/*
* Add all the base protocols.
*/
Expand Down
51 changes: 44 additions & 7 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
#include <linux/swap.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
Expand Down Expand Up @@ -174,6 +175,36 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
return ret;
}

static int ipv4_tcp_mem(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned long vec[3];
struct net *net = current->nsproxy->net_ns;

ctl_table tmp = {
.data = &vec,
.maxlen = sizeof(vec),
.mode = ctl->mode,
};

if (!write) {
ctl->data = &net->ipv4.sysctl_tcp_mem;
return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
}

ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
if (ret)
return ret;

net->ipv4.sysctl_tcp_mem[0] = vec[0];
net->ipv4.sysctl_tcp_mem[1] = vec[1];
net->ipv4.sysctl_tcp_mem[2] = vec[2];

return 0;
}

static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
Expand Down Expand Up @@ -432,13 +463,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_mem",
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
Expand Down Expand Up @@ -721,6 +745,12 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
{
.procname = "tcp_mem",
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
.mode = 0644,
.proc_handler = ipv4_tcp_mem,
},
{ }
};

Expand All @@ -734,6 +764,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
unsigned long limit;

table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
Expand Down Expand Up @@ -769,6 +800,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)

net->ipv4.sysctl_rt_cache_rebuild_count = 4;

limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
net->ipv4.sysctl_tcp_mem[1] = limit;
net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;

net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
net_ipv4_ctl_path, table);
if (net->ipv4.ipv4_hdr == NULL)
Expand Down
11 changes: 2 additions & 9 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);

long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;

EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);

Expand Down Expand Up @@ -3278,14 +3276,9 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);

limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
sysctl_tcp_mem[0] = limit / 4 * 3;
sysctl_tcp_mem[1] = limit;
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;

/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
<< (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);

sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
Expand Down
1 change: 0 additions & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -2623,7 +2623,6 @@ struct proto tcp_prot = {
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
Expand Down
9 changes: 6 additions & 3 deletions net/ipv4/tcp_memcontrol.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <net/tcp.h>
#include <net/tcp_memcontrol.h>
#include <net/sock.h>
#include <net/ip.h>
#include <linux/nsproxy.h>
#include <linux/memcontrol.h>
#include <linux/module.h>

Expand Down Expand Up @@ -28,16 +30,17 @@ int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
struct tcp_memcontrol *tcp;
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
struct net *net = current->nsproxy->net_ns;

cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;

tcp = tcp_from_cgproto(cg_proto);

tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
tcp->tcp_memory_pressure = 0;

parent_cg = tcp_prot.proto_cgroup(parent);
Expand Down
2 changes: 2 additions & 0 deletions net/ipv6/af_inet6.c
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,8 @@ static int __init inet6_init(void)
if (err)
goto static_sysctl_fail;
#endif
tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;

/*
* ipngwg API draft makes clear that the correct semantics
* for TCP and UDP is to consider one TCP and UDP instance
Expand Down
1 change: 0 additions & 1 deletion net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -2215,7 +2215,6 @@ struct proto tcpv6_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
Expand Down

0 comments on commit 3dc43e3

Please sign in to comment.