Skip to content

Commit

Permalink
Merge branch 'redirect_via_sock'
Browse files Browse the repository at this point in the history
As described in my patch series from the other day, we need to
rearrange redirect handling so that the local initiators of packets
(sockets, tunnels, xfrms, etc.) that implement the protocols compute
the route and pass this down into the ipv4/ipv6 routing code.

These changes here do so by implementing a new dst_ops->redirect
method.

No more do we have this funny code that tries several different sets
of routing keys to try and figure out which route the redirect should
actually be applied to.

No more do we have the problem wherein TOS rewriting causes problems
for us.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jul 12, 2012
2 parents 46d3cea + 1ed5c48 commit 3ec5a26
Show file tree
Hide file tree
Showing 36 changed files with 447 additions and 351 deletions.
1 change: 1 addition & 0 deletions include/net/dst_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct dst_ops {
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
void (*redirect)(struct dst_entry *dst, struct sk_buff *skb);
int (*local_out)(struct sk_buff *skb);
struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
struct sk_buff *skb,
Expand Down
9 changes: 2 additions & 7 deletions include/net/ip6_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,12 @@ extern int rt6_route_rcv(struct net_device *dev,
u8 *opt, int len,
const struct in6_addr *gwaddr);

extern void rt6_redirect(const struct in6_addr *dest,
const struct in6_addr *src,
const struct in6_addr *saddr,
struct neighbour *neigh,
u8 *lladdr,
int on_link);

extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
int oif, u32 mark);
extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
__be32 mtu);
extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);

struct netlink_callback;

Expand Down
2 changes: 2 additions & 0 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
atomic_dec(&fl->users);
}

extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);

extern int ip6_ra_control(struct sock *sk, int sel);

extern int ipv6_parse_hopopts(struct sk_buff *skb);
Expand Down
50 changes: 50 additions & 0 deletions include/net/ndisc.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum {
#include <linux/icmpv6.h>
#include <linux/in6.h>
#include <linux/types.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>

#include <net/neighbour.h>

Expand Down Expand Up @@ -80,6 +82,54 @@ struct nd_opt_hdr {
__u8 nd_opt_len;
} __packed;

/* ND options */
struct ndisc_options {
struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
#ifdef CONFIG_IPV6_ROUTE_INFO
struct nd_opt_hdr *nd_opts_ri;
struct nd_opt_hdr *nd_opts_ri_end;
#endif
struct nd_opt_hdr *nd_useropts;
struct nd_opt_hdr *nd_useropts_end;
};

#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]

#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)

extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
struct ndisc_options *ndopts);

/*
* Return the padding between the option length and the start of the
* link addr. Currently only IP-over-InfiniBand needs this, although
* if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
* also need a pad of 2.
*/
static int ndisc_addr_option_pad(unsigned short type)
{
switch (type) {
case ARPHRD_INFINIBAND: return 2;
default: return 0;
}
}

static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
struct net_device *dev)
{
u8 *lladdr = (u8 *)(p + 1);
int lladdrlen = p->nd_opt_len << 3;
int prepad = ndisc_addr_option_pad(dev->type);
if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
return NULL;
return lladdr + prepad;
}

static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd)
{
const u32 *p32 = pkey;
Expand Down
5 changes: 3 additions & 2 deletions include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;

struct in_device;
extern int ip_rt_init(void);
extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
__be32 src, struct net_device *dev);
extern void rt_cache_flush(struct net *net, int how);
extern void rt_cache_flush_batch(struct net *net);
extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
Expand Down Expand Up @@ -181,6 +179,9 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s
extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
int oif, u32 mark, u8 protocol, int flow_flags);
extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
extern void ipv4_redirect(struct sk_buff *skb, struct net *net,
int oif, u32 mark, u8 protocol, int flow_flags);
extern void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
extern void ip_rt_send_redirect(struct sk_buff *skb);

extern unsigned int inet_addr_type(struct net *net, __be32 addr);
Expand Down
2 changes: 2 additions & 0 deletions include/net/sctp/sctp.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *,
void sctp_err_finish(struct sock *, struct sctp_association *);
void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
struct sctp_transport *t, __u32 pmtu);
void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
struct sk_buff *);
void sctp_icmp_proto_unreachable(struct sock *sk,
struct sctp_association *asoc,
struct sctp_transport *t);
Expand Down
5 changes: 5 additions & 0 deletions net/bridge/br_netfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
{
}

static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb)
{
}

static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
{
return NULL;
Expand All @@ -136,6 +140,7 @@ static struct dst_ops fake_dst_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.update_pmtu = fake_update_pmtu,
.redirect = fake_redirect,
.cow_metrics = fake_cow_metrics,
.neigh_lookup = fake_neigh_lookup,
.mtu = fake_mtu,
Expand Down
11 changes: 11 additions & 0 deletions net/dccp/ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
} /* else let the usual retransmit timer handle it */
}

static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
{
struct dst_entry *dst = __sk_dst_check(sk, 0);

if (dst)
dst->ops->redirect(dst, skb);
}

/*
* This routine is called by the ICMP module when it gets some sort of error
* condition. If err < 0 then the socket should be closed and the error
Expand Down Expand Up @@ -259,6 +267,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
}

switch (type) {
case ICMP_REDIRECT:
dccp_do_redirect(skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
goto out;
Expand Down
7 changes: 7 additions & 0 deletions net/dccp/ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,

np = inet6_sk(sk);

if (type == NDISC_REDIRECT) {
struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);

if (dst)
dst->ops->redirect(dst, skb);
}

if (type == ICMPV6_PKT_TOOBIG) {
struct dst_entry *dst = NULL;

Expand Down
6 changes: 6 additions & 0 deletions net/decnet/dn_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb);
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr);
Expand Down Expand Up @@ -145,6 +146,7 @@ static struct dst_ops dn_dst_ops = {
.negative_advice = dn_dst_negative_advice,
.link_failure = dn_dst_link_failure,
.update_pmtu = dn_dst_update_pmtu,
.redirect = dn_dst_redirect,
.neigh_lookup = dn_dst_neigh_lookup,
};

Expand Down Expand Up @@ -292,6 +294,10 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
}
}

static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb)
{
}

/*
* When a route has been marked obsolete. (e.g. routing cache flush)
*/
Expand Down
18 changes: 13 additions & 5 deletions net/ipv4/ah4.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,17 +398,25 @@ static void ah4_err(struct sk_buff *skb, u32 info)
struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;

if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
case ICMP_REDIRECT:
break;
default:
return;
}

x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
ah->spi, IPPROTO_AH, AF_INET);
if (!x)
return;
pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
ntohl(ah->spi), ntohl(iph->daddr));
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);

if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
}

Expand Down
18 changes: 13 additions & 5 deletions net/ipv4/esp4.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,17 +484,25 @@ static void esp4_err(struct sk_buff *skb, u32 info)
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;

if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
case ICMP_REDIRECT:
break;
default:
return;
}

x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
ntohl(esph->spi), ntohl(iph->daddr));
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);

if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
}

Expand Down
74 changes: 22 additions & 52 deletions net/ipv4/icmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,18 +634,31 @@ out:;
EXPORT_SYMBOL(icmp_send);


static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
const struct net_protocol *ipprot;
int protocol = iph->protocol;

raw_icmp_error(skb, protocol, info);

rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
rcu_read_unlock();
}

/*
* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
*/

static void icmp_unreach(struct sk_buff *skb)
{
const struct net_protocol *ipprot;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
u32 info = 0;
int protocol;

net = dev_net(skb_dst(skb)->dev);

Expand Down Expand Up @@ -726,19 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
goto out;

iph = (const struct iphdr *)skb->data;
protocol = iph->protocol;

/*
* Deliver ICMP message to raw sockets. Pretty useless feature?
*/
raw_icmp_error(skb, protocol, info);

rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
rcu_read_unlock();
icmp_socket_deliver(skb, info);

out:
return;
Expand All @@ -754,46 +755,15 @@ static void icmp_unreach(struct sk_buff *skb)

static void icmp_redirect(struct sk_buff *skb)
{
const struct iphdr *iph;

if (skb->len < sizeof(struct iphdr))
goto out_err;

/*
* Get the copied header of the packet that caused the redirect
*/
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;

iph = (const struct iphdr *)skb->data;

switch (icmp_hdr(skb)->code & 7) {
case ICMP_REDIR_NET:
case ICMP_REDIR_NETTOS:
/*
* As per RFC recommendations now handle it as a host redirect.
*/
case ICMP_REDIR_HOST:
case ICMP_REDIR_HOSTTOS:
ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
icmp_hdr(skb)->un.gateway,
iph->saddr, skb->dev);
break;
if (skb->len < sizeof(struct iphdr)) {
ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
return;
}

/* Ping wants to see redirects.
* Let's pretend they are errors of sorts... */
if (iph->protocol == IPPROTO_ICMP &&
iph->ihl >= 5 &&
pskb_may_pull(skb, (iph->ihl<<2)+8)) {
ping_err(skb, icmp_hdr(skb)->un.gateway);
}
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return;

out:
return;
out_err:
ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
goto out;
icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
}

/*
Expand Down
Loading

0 comments on commit 3ec5a26

Please sign in to comment.