Skip to content

Commit

Permalink
Merge branch 'net-ipv4-fixes-for-PMTU-when-link-MTU-changes'
Browse files Browse the repository at this point in the history
Sabrina Dubroca says:

====================
net: ipv4: fixes for PMTU when link MTU changes

The first patch adapts the changes that commit e9fa149 ("ipv6:
Reflect MTU changes on PMTU of exceptions for MTU-less routes") did in
IPv6 to IPv4: lower PMTU when the first hop's MTU drops below it, and
raise PMTU when the first hop was limiting PMTU discovery and its MTU
is increased.

The second patch fixes bugs introduced in commit d52e5a7 ("ipv4:
lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu") that
only appear once the first patch is applied.

Selftests for these cases were introduced in net-next commit
e44e428 ("selftests: pmtu: add basic IPv4 and IPv6 PMTU tests")

v2: add cover letter, and fix a few small things in patch 1
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 11, 2018
2 parents 7abab7b + 28d35bc commit 28b6bfe
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 9 deletions.
7 changes: 7 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2458,6 +2458,13 @@ struct netdev_notifier_info {
struct netlink_ext_ack *extack;
};

struct netdev_notifier_info_ext {
struct netdev_notifier_info info; /* must be first */
union {
u32 mtu;
} ext;
};

struct netdev_notifier_change_info {
struct netdev_notifier_info info; /* must be first */
unsigned int flags_changed;
Expand Down
1 change: 1 addition & 0 deletions include/net/ip_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);

#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Expand Down
28 changes: 26 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);

/**
* call_netdevice_notifiers_mtu - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @dev: net_device pointer passed unmodified to notifier function
* @arg: additional u32 argument passed to the notifier function
*
* Call all network notifier blocks. Parameters and return value
* are as for raw_notifier_call_chain().
*/
static int call_netdevice_notifiers_mtu(unsigned long val,
struct net_device *dev, u32 arg)
{
struct netdev_notifier_info_ext info = {
.info.dev = dev,
.ext.mtu = arg,
};

BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);

return call_netdevice_notifiers_info(val, &info.info);
}

#ifdef CONFIG_NET_INGRESS
static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);

Expand Down Expand Up @@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
err = __dev_set_mtu(dev, new_mtu);

if (!err) {
err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
orig_mtu);
err = notifier_to_errno(err);
if (err) {
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
__dev_set_mtu(dev, orig_mtu);
call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
new_mtu);
}
}
return err;
Expand Down
12 changes: 8 additions & 4 deletions net/ipv4/fib_frontend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct netdev_notifier_changeupper_info *upper_info = ptr;
struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
Expand Down Expand Up @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
fib_sync_down_dev(dev, event, false);
/* fall through */
rt_cache_flush(net);
break;
case NETDEV_CHANGEMTU:
fib_sync_mtu(dev, info_ext->ext.mtu);
rt_cache_flush(net);
break;
case NETDEV_CHANGEUPPER:
info = ptr;
upper_info = ptr;
/* flush all routes if dev is linked to or unlinked from
* an L3 master device (e.g., VRF)
*/
if (info->upper_dev && netif_is_l3_master(info->upper_dev))
if (upper_info->upper_dev &&
netif_is_l3_master(upper_info->upper_dev))
fib_disable_ip(dev, NETDEV_DOWN, true);
break;
}
Expand Down
50 changes: 50 additions & 0 deletions net/ipv4/fib_semantics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
return NOTIFY_DONE;
}

/* Update the PMTU of exceptions when:
* - the new MTU of the first hop becomes smaller than the PMTU
* - the old MTU was the same as the PMTU, and it limited discovery of
* larger MTUs on the path. With that limit raised, we can now
* discover larger MTUs
* A special case is locked exceptions, for which the PMTU is smaller
* than the minimal accepted PMTU:
* - if the new MTU is greater than the PMTU, don't make any change
* - otherwise, unlock and set PMTU
*/
static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
{
struct fnhe_hash_bucket *bucket;
int i;

bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
if (!bucket)
return;

for (i = 0; i < FNHE_HASH_SIZE; i++) {
struct fib_nh_exception *fnhe;

for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
fnhe;
fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
if (fnhe->fnhe_mtu_locked) {
if (new <= fnhe->fnhe_pmtu) {
fnhe->fnhe_pmtu = new;
fnhe->fnhe_mtu_locked = false;
}
} else if (new < fnhe->fnhe_pmtu ||
orig == fnhe->fnhe_pmtu) {
fnhe->fnhe_pmtu = new;
}
}
}
}

void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
struct fib_nh *nh;

hlist_for_each_entry(nh, head, nh_hash) {
if (nh->nh_dev == dev)
nh_update_mtu(nh, dev->mtu, orig_mtu);
}
}

/* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
Expand Down
7 changes: 4 additions & 3 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1001,21 +1001,22 @@ out: kfree_skb(skb);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
u32 old_mtu = ipv4_mtu(dst);
struct fib_result res;
bool lock = false;

if (ip_mtu_locked(dst))
return;

if (ipv4_mtu(dst) < mtu)
if (old_mtu < mtu)
return;

if (mtu < ip_rt_min_pmtu) {
lock = true;
mtu = ip_rt_min_pmtu;
mtu = min(old_mtu, ip_rt_min_pmtu);
}

if (rt->rt_pmtu == mtu &&
if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
return;

Expand Down

0 comments on commit 28b6bfe

Please sign in to comment.