Skip to content

Commit

Permalink
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
The following patchset contains Netfilter and IPVS updates for
your net-next tree, most relevantly they are:

* Add net namespace support to NFLOG, ULOG and ebt_ulog and NFQUEUE.
  The LOG and ebt_log target has been also adapted, but they still
  depend on the syslog netnamespace that seems to be missing, from
  Gao Feng.

* Don't lose indications of congestion in IPv6 fragmentation handling,
  from Hannes Frederic Sowa.i

* IPVS conversion to use RCU, including some code consolidation patches
  and optimizations, also some from Julian Anastasov.

* cpu fanout support for NFQUEUE, from Holger Eitzenberger.

* Better error reporting to userspace when dropping packets from
  all our _*_[xfrm|route]_me_harder functions, from Patrick McHardy.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 7, 2013
2 parents 19952cc + b8dd6a2 commit d166582
Show file tree
Hide file tree
Showing 54 changed files with 2,436 additions and 2,000 deletions.
5 changes: 0 additions & 5 deletions include/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif
}

#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
extern struct proc_dir_entry *proc_net_netfilter;
#endif

#else /* !CONFIG_NETFILTER */
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
Expand Down
35 changes: 34 additions & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst;
}

extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
bool force);

/**
* skb_dst_set_noref - sets skb dst, hopefully, without taking reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst.
* If dst entry is cached, we do not take reference and dst_release
* will be avoided by refdst_drop. If dst entry is not cached, we take
* reference, so that last dst_release can destroy the dst immediately.
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
__skb_dst_set_noref(skb, dst, false);
}

/**
* skb_dst_set_noref_force - sets skb dst, without taking reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst.
* No reference is taken and no dst_release will be called. While for
* cached dsts deferred reclaim is a basic feature, for entries that are
* not cached it is caller's job to guarantee that last dst_release for
* provided dst happens when nobody uses it, eg. after a RCU grace period.
*/
static inline void skb_dst_set_noref_force(struct sk_buff *skb,
struct dst_entry *dst)
{
__skb_dst_set_noref(skb, dst, true);
}

/**
* skb_dst_is_noref - Test if skb dst isn't refcounted
Expand Down
130 changes: 88 additions & 42 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
dst->ip = src->ip;
}

static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
const union nf_inet_addr *src)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
dst->in6 = src->in6;
return;
}
#endif
dst->ip = src->ip;
dst->all[1] = 0;
dst->all[2] = 0;
dst->all[3] = 0;
}

static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
const union nf_inet_addr *b)
{
Expand Down Expand Up @@ -344,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level) do {} while (0)
#endif

#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); }


/*
* The port number of FTP service (in network order).
Expand Down Expand Up @@ -566,20 +579,19 @@ struct ip_vs_conn_param {
*/
struct ip_vs_conn {
struct hlist_node c_list; /* hashed list heads */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
/* Protocol, addresses and port numbers */
u16 af; /* address family */
__be16 cport;
__be16 vport;
__be16 dport;
__u32 fwmark; /* Fire wall mark from skb */
__be16 vport;
u16 af; /* address family */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif

/* counter and timer */
atomic_t refcnt; /* reference count */
Expand All @@ -593,6 +605,7 @@ struct ip_vs_conn {
* state transition triggerd
* synchronization
*/
__u32 fwmark; /* Fire wall mark from skb */
unsigned long sync_endtime; /* jiffies + sent_retries */

/* Control members */
Expand Down Expand Up @@ -620,6 +633,8 @@ struct ip_vs_conn {
const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;

struct rcu_head rcu_head;
};

/*
Expand Down Expand Up @@ -695,10 +710,9 @@ struct ip_vs_dest_user_kern {
* and the forwarding entries
*/
struct ip_vs_service {
struct list_head s_list; /* for normal service table */
struct list_head f_list; /* for fwmark-based service table */
struct hlist_node s_list; /* for normal service table */
struct hlist_node f_list; /* for fwmark-based service table */
atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */

u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
Expand All @@ -713,25 +727,35 @@ struct ip_vs_service {
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
struct ip_vs_stats stats; /* statistics for the service */
struct ip_vs_app *inc; /* bind conns to this app inc */

/* for scheduling */
struct ip_vs_scheduler *scheduler; /* bound scheduler object */
rwlock_t sched_lock; /* lock sched_data */
struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
spinlock_t sched_lock; /* lock sched_data */
void *sched_data; /* scheduler application data */

/* alternate persistence engine */
struct ip_vs_pe *pe;
struct ip_vs_pe __rcu *pe;

struct rcu_head rcu_head;
};

/* Information for cached dst */
struct ip_vs_dest_dst {
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct rcu_head rcu_head;
};

/* In grace period after removing */
#define IP_VS_DEST_STATE_REMOVING 0x01
/*
* The real server destination forwarding entry
* with ip address, port number, and so on.
*/
struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
struct list_head d_list; /* for table with all the dests */
struct hlist_node d_list; /* for table with all the dests */

u16 af; /* address family */
__be16 port; /* port number of the server */
Expand All @@ -742,6 +766,7 @@ struct ip_vs_dest {

atomic_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
unsigned long state; /* state flags */

/* connection counters and thresholds */
atomic_t activeconns; /* active connections */
Expand All @@ -752,17 +777,18 @@ struct ip_vs_dest {

/* for destination cache */
spinlock_t dst_lock; /* lock of dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_rtos; /* RT_TOS(tos) for dst */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */

/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
__be16 vport; /* virtual port number */
union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */

struct list_head t_list; /* in dest_trash */
struct rcu_head rcu_head;
unsigned int in_rs_table:1; /* we are in rs_table */
};


Expand All @@ -778,9 +804,13 @@ struct ip_vs_scheduler {
/* scheduler initializing service */
int (*init_service)(struct ip_vs_service *svc);
/* scheduling service finish */
int (*done_service)(struct ip_vs_service *svc);
/* scheduler updating service */
int (*update_service)(struct ip_vs_service *svc);
void (*done_service)(struct ip_vs_service *svc);
/* dest is linked */
int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
/* dest is unlinked */
int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
/* dest is updated */
int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);

/* selecting a server from the given service */
struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
Expand Down Expand Up @@ -819,6 +849,7 @@ struct ip_vs_app {
struct ip_vs_app *app; /* its real application */
__be16 port; /* port number in net order */
atomic_t usecnt; /* usage counter */
struct rcu_head rcu_head;

/*
* output hook: Process packet in inout direction, diff set for TCP.
Expand Down Expand Up @@ -881,6 +912,9 @@ struct ipvs_master_sync_state {
struct netns_ipvs *ipvs;
};

/* How much time to keep dests in trash */
#define IP_VS_DEST_TRASH_PERIOD (120 * HZ)

/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
Expand All @@ -892,7 +926,7 @@ struct netns_ipvs {
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)

struct list_head rs_table[IP_VS_RTAB_SIZE];
struct hlist_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
/* ip_vs_proto */
Expand All @@ -904,15 +938,13 @@ struct netns_ipvs {
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
#define UDP_APP_TAB_BITS 4
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
spinlock_t udp_app_lock;
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
Expand All @@ -921,7 +953,6 @@ struct netns_ipvs {
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
Expand All @@ -931,9 +962,10 @@ struct netns_ipvs {

int num_services; /* no of virtual services */

rwlock_t rs_lock; /* real services table */
/* Trash for destinations */
struct list_head dest_trash;
spinlock_t dest_trash_lock;
struct timer_list dest_trash_timer; /* expiration timer */
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
Expand Down Expand Up @@ -1181,9 +1213,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
int inverse);

/* Get reference to gain full access to conn.
* By default, RCU read-side critical sections have access only to
* conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
*/
static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
{
return atomic_inc_not_zero(&cp->refcnt);
}

/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
{
smp_mb__before_atomic_dec();
atomic_dec(&cp->refcnt);
}
extern void ip_vs_conn_put(struct ip_vs_conn *cp);
Expand Down Expand Up @@ -1298,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);

void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
Expand Down Expand Up @@ -1346,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler);
extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
Expand All @@ -1366,17 +1407,12 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver;

extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);

static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
atomic_dec(&svc->usecnt);
}

extern struct ip_vs_dest *
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern bool
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);

extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
Expand All @@ -1388,8 +1424,18 @@ extern struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark, __u32 flags);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp);

static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
{
atomic_inc(&dest->refcnt);
}

static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
{
smp_mb__before_atomic_dec();
atomic_dec(&dest->refcnt);
}

/*
* IPVS sync daemon data and function prototypes
Expand Down Expand Up @@ -1428,7 +1474,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset,
unsigned int hooknum, struct ip_vs_iphdr *iph);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);

#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Expand Down
2 changes: 2 additions & 0 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <net/netns/ipv6.h>
#include <net/netns/sctp.h>
#include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
Expand Down Expand Up @@ -94,6 +95,7 @@ struct net {
struct netns_dccp dccp;
#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
Expand Down
Loading

0 comments on commit d166582

Please sign in to comment.