Skip to content

Commit

Permalink
Merge branch 'nf_hook_netns'
Browse files Browse the repository at this point in the history
Eric W. Biederman says:

====================
Passing net through the netfilter hooks

My primary goal with this patchset and it's follow ups is to cleanup the
network routing paths so that we do not look at the output device to
derive the network namespace.  My plan is to pass the network namespace
of the transmitting socket through the output path, to replace code that
looks at the output network device today.  Once that is done we can have
routes with output devices outside of the current network namespace.
Which should allow reception and transmission of packets in network
namespaces to be as fast as normal packet reception and transmission
with early demux disabled, because it will same code path.

Once skb_dst(skb)->dev is a little better under control I think it will
also be possible to use rcu to cleanup the ancient hack that sets
dst->dev to loopback_dev when a network device is removed.

The work to get there is a series of code cleanups.  I am starting with
passing net into the netfilter hooks and into the functions that are
called after the netfilter hooks.  This removes from netfilter the
need to guess which network namespace it is working on.

To get there I perform a series of minor prep patches so the big changes
at the end are possible to audit without getting lost in the noise.  In
particular I have a lot of patches computing net into a local variable
and then using it through out the function.

So this patchset encompases removing dead code, sorting out the _sk
functions that were added last time someone pushed a prototype change
through the post netfilter functions.  Cleaning up individual functions
use of the network namespace.  Passing net into the netfilter hooks.
Passing net into the post netfilter functions.  Using state->net in
the netfilter code where it is available and trivially usable.

Pablo, Dave I don't know whose tree this makes more sense to go
through.  I am assuming at least initially Pablos as netfilter is
involved.  From what I have seen there will be a lot of back and forth
between the netfilter code paths and the routing code paths.

The patches are also available (against 4.3-rc1) at:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/net-next.git master
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 18, 2015
2 parents a2f23e0 + be10de0 commit bbe8373
Show file tree
Hide file tree
Showing 67 changed files with 379 additions and 358 deletions.
9 changes: 5 additions & 4 deletions drivers/net/vrf.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
}

/* modelled after ip_finish_output2 */
static int vrf_finish_output(struct sock *sk, struct sk_buff *skb)
static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
Expand Down Expand Up @@ -298,14 +298,15 @@ static int vrf_finish_output(struct sock *sk, struct sk_buff *skb)
static int vrf_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(dev);

IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);

skb->dev = dev;
skb->protocol = htons(ETH_P_IP);

return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
NULL, dev,
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, skb, NULL, dev,
vrf_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
Expand Down
14 changes: 3 additions & 11 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2212,12 +2212,8 @@ int dev_open(struct net_device *dev);
int dev_close(struct net_device *dev);
int dev_close_many(struct list_head *head, bool unlink);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb);
static inline int dev_queue_xmit(struct sk_buff *skb)
{
return dev_queue_xmit_sk(skb->sk, skb);
}
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
int dev_queue_xmit(struct sk_buff *skb);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
Expand Down Expand Up @@ -2989,11 +2985,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)

int netif_rx(struct sk_buff *skb);
int netif_rx_ni(struct sk_buff *skb);
int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb);
static inline int netif_receive_skb(struct sk_buff *skb)
{
return netif_receive_skb_sk(skb->sk, skb);
}
int netif_receive_skb(struct sk_buff *skb);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
Expand Down
68 changes: 32 additions & 36 deletions include/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ struct nf_hook_state {
struct net_device *in;
struct net_device *out;
struct sock *sk;
struct net *net;
struct list_head *hook_list;
int (*okfn)(struct sock *, struct sk_buff *);
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};

static inline void nf_hook_state_init(struct nf_hook_state *p,
Expand All @@ -65,14 +66,16 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
struct net_device *indev,
struct net_device *outdev,
struct sock *sk,
int (*okfn)(struct sock *, struct sk_buff *))
struct net *net,
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
p->hook = hook;
p->thresh = thresh;
p->pf = pf;
p->in = indev;
p->out = outdev;
p->sk = sk;
p->net = net;
p->hook_list = hook_list;
p->okfn = okfn;
}
Expand Down Expand Up @@ -167,32 +170,32 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
* value indicates the packet has been consumed by the hook.
*/
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
struct net *net,
struct sock *sk,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sock *, struct sk_buff *),
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
int thresh)
{
struct net *net = dev_net(indev ? indev : outdev);
struct list_head *hook_list = &net->nf.hooks[pf][hook];

if (nf_hook_list_active(hook_list, pf, hook)) {
struct nf_hook_state state;

nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, okfn);
pf, indev, outdev, sk, net, okfn);
return nf_hook_slow(skb, &state);
}
return 1;
}

static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
struct sk_buff *skb, struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sock *, struct sk_buff *))
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
struct sock *sk, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN);
return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
}

/* Activate hook; either okfn or kfree_skb called, unless a hook
Expand All @@ -213,36 +216,38 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
*/

static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk,
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *in,
struct net_device *out,
int (*okfn)(struct sock *, struct sk_buff *), int thresh)
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
int thresh)
{
int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh);
int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
if (ret == 1)
ret = okfn(sk, skb);
ret = okfn(net, sk, skb);
return ret;
}

static inline int
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk,
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *in, struct net_device *out,
int (*okfn)(struct sock *, struct sk_buff *), bool cond)
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
bool cond)
{
int ret;

if (!cond ||
((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1))
ret = okfn(sk, skb);
((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
ret = okfn(net, sk, skb);
return ret;
}

static inline int
NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb,
NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sock *, struct sk_buff *))
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN);
return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
}

/* Call setsockopt() */
Expand Down Expand Up @@ -342,21 +347,12 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
}

#else /* !CONFIG_NETFILTER */
#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
struct sock *sk,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sock *sk, struct sk_buff *), int thresh)
{
return okfn(sk, skb);
}
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk,
struct sk_buff *skb, struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sock *, struct sk_buff *))
#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb)
#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb)
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
struct sock *sk, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion include/linux/netfilter_bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ enum nf_br_hook_priorities {

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)

int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);

static inline void br_drop_fake_rtable(struct sk_buff *skb)
{
Expand Down
2 changes: 1 addition & 1 deletion include/linux/netfilter_ingress.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)

nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
skb->dev, NULL, NULL);
skb->dev, NULL, dev_net(skb->dev), NULL);
return nf_hook_slow(skb, &state);
}

Expand Down
6 changes: 3 additions & 3 deletions include/net/dn_neigh.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ struct dn_neigh {

void dn_neigh_init(void);
void dn_neigh_cleanup(void);
int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb);
int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb);
int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
void dn_neigh_pointopoint_hello(struct sk_buff *skb);
int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb);
int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);

extern struct neigh_table dn_neigh_table;

Expand Down
6 changes: 3 additions & 3 deletions include/net/dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -454,13 +454,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
}

/* Output packet to network from transport. */
static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb)
static inline int dst_output(struct sock *sk, struct sk_buff *skb)
{
return skb_dst(skb)->output(sk, skb);
}
static inline int dst_output(struct sk_buff *skb)
static inline int dst_output_okfn(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return dst_output_sk(skb->sk, skb);
return dst_output(sk, skb);
}

/* Input packet from network to transport. */
Expand Down
2 changes: 1 addition & 1 deletion include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ static inline u8 ip6_tclass(__be32 flowinfo)
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev);

int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb);
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);

/*
* upper-layer output functions
Expand Down
2 changes: 1 addition & 1 deletion include/net/netfilter/br_netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
skb->network_header -= len;
}

int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb);
int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb);

static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
Expand Down
2 changes: 0 additions & 2 deletions include/net/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,6 @@ struct xfrm_policy_afinfo {
struct flowi *fl,
int reverse);
int (*get_tos)(const struct flowi *fl);
void (*init_dst)(struct net *net,
struct xfrm_dst *dst);
int (*init_path)(struct xfrm_dst *path,
struct dst_entry *dst,
int nfheader_len);
Expand Down
16 changes: 8 additions & 8 deletions net/bridge/br_forward.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
p->state == BR_STATE_FORWARDING;
}

int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
Expand Down Expand Up @@ -65,10 +65,10 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);

int br_forward_finish(struct sock *sk, struct sk_buff *skb)
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb,
NULL, skb->dev,
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);

}
Expand All @@ -92,8 +92,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
return;
}

NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
NULL, skb->dev,
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(skb->dev), NULL, skb,NULL, skb->dev,
br_forward_finish);
}

Expand All @@ -114,8 +114,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
skb->dev = to->dev;
skb_forward_csum(skb);

NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb,
indev, skb->dev,
NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
dev_net(indev), NULL, skb, indev, skb->dev,
br_forward_finish);
}

Expand Down
25 changes: 16 additions & 9 deletions net/bridge/br_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
EXPORT_SYMBOL(br_should_route_hook);

static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return netif_receive_skb(skb);
}

static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
Expand Down Expand Up @@ -55,9 +61,9 @@ static int br_pass_frame_up(struct sk_buff *skb)
if (!skb)
return NET_RX_DROP;

return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
indev, NULL,
netif_receive_skb_sk);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
}

static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
Expand Down Expand Up @@ -120,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
}

/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
Expand Down Expand Up @@ -208,7 +214,7 @@ int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(br_handle_frame_finish);

/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
Expand Down Expand Up @@ -278,8 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
}

/* Deliver packet to local host only */
if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
skb->dev, NULL, br_handle_local_finish)) {
if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
} else {
*pskb = skb;
Expand All @@ -303,8 +310,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;

NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb,
skb->dev, NULL,
NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
br_handle_frame_finish);
break;
default:
Expand Down
Loading

0 comments on commit bbe8373

Please sign in to comment.