Skip to content

Commit

Permalink
IB/hfi1: Add functions to receive accelerated ipoib packets
Browse files Browse the repository at this point in the history
Ipoib netdev will share receive contexts with existing VNIC netdev.
To achieve that, a dummy netdev is allocated with hfi1_devdata to
own the receive contexts, and ipoib and VNIC netdevs will be put
on top of it. Each receive context is associated with a single
NAPI object.

This patch adds the functions to receive incoming packets for
accelerated ipoib.

Link: https://lore.kernel.org/r/20200511160631.173205.54184.stgit@awfm-01.aw.intel.com
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Kaike Wan authored and Jason Gunthorpe committed May 21, 2020
1 parent 89dcaa3 commit 6991abc
Show file tree
Hide file tree
Showing 7 changed files with 355 additions and 2 deletions.
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/hfi1/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ hfi1-y := \
intr.o \
iowait.o \
ipoib_main.o \
ipoib_rx.o \
ipoib_tx.o \
mad.o \
mmu_rb.o \
msix.o \
netdev_rx.o \
opfn.o \
pcie.o \
pio.o \
Expand Down
92 changes: 91 additions & 1 deletion drivers/infiniband/hw/hfi1/driver.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2015-2018 Intel Corporation.
* Copyright(c) 2015-2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
Expand Down Expand Up @@ -54,6 +54,7 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
#include <linux/etherdevice.h>

#include "hfi.h"
#include "trace.h"
Expand All @@ -63,6 +64,9 @@
#include "vnic.h"
#include "fault.h"

#include "ipoib.h"
#include "netdev.h"

#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt

Expand Down Expand Up @@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet)
show_eflags_errs(packet);
}

static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp;
struct net_device *netdev;
struct hfi1_ctxtdata *rcd = packet->rcd;
struct napi_struct *napi = rcd->napi;
struct sk_buff *skb;
struct hfi1_netdev_rxq *rxq = container_of(napi,
struct hfi1_netdev_rxq, napi);
u32 extra_bytes;
u32 tlen, qpnum;
bool do_work, do_cnp;
struct hfi1_ipoib_dev_priv *priv;

trace_hfi1_rcvhdr(packet);

hfi1_setup_ib_header(packet);

packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
packet->grh = NULL;

if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return;
}

qpnum = ib_bth_get_qpn(packet->ohdr);
netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
if (!netdev)
goto drop_no_nd;

trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));

/* handle congestion notifications */
do_work = hfi1_may_ecn(packet);
if (unlikely(do_work)) {
do_cnp = (packet->opcode != IB_OPCODE_CNP);
(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
packet, do_cnp);
}

/*
* We have split point after last byte of DETH
* lets strip padding and CRC and ICRC.
* tlen is whole packet len so we need to
* subtract header size as well.
*/
tlen = packet->tlen;
extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
packet->hlen;
if (unlikely(tlen < extra_bytes))
goto drop;

tlen -= extra_bytes;

skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
if (unlikely(!skb))
goto drop;

priv = hfi1_ipoib_priv(netdev);
hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);

skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
netif_receive_skb(skb);

return;

drop:
++netdev->stats.rx_dropped;
drop_no_nd:
ibp = rcd_to_iport(packet->rcd);
++ibp->rvp.n_pkt_drops;
}

/*
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
Expand Down Expand Up @@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};

const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
[RHF_RCV_TYPE_ERROR] = process_receive_error,
[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};
5 changes: 4 additions & 1 deletion drivers/infiniband/hw/hfi1/hfi.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
intr_handler fast_handler;
/** slow handler */
intr_handler slow_handler;
/* napi pointer assiociated with netdev */
struct napi_struct *napi;
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
/* clear interrupt mask */
Expand Down Expand Up @@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
struct hfi1_pkt_state *ps,
struct rvt_swqe *wqe);
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];

extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];

/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE 0 /* keep going */
Expand Down Expand Up @@ -1417,6 +1419,7 @@ struct hfi1_devdata {
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
struct net_device *dummy_netdev;

/* Keeps track of IPoIB RSM rule users */
atomic_t ipoib_rsm_usr_num;
Expand Down
18 changes: 18 additions & 0 deletions drivers/infiniband/hw/hfi1/ipoib.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@

#include "hfi.h"
#include "iowait.h"
#include "netdev.h"

#include <rdma/ib_verbs.h>

#define HFI1_IPOIB_ENTROPY_SHIFT 24

#define HFI1_IPOIB_TXREQ_NAME_LEN 32

#define HFI1_IPOIB_PSEUDO_LEN 20
#define HFI1_IPOIB_ENCAP_LEN 4

struct hfi1_ipoib_dev_priv;
Expand Down Expand Up @@ -118,6 +120,19 @@ hfi1_ipoib_priv(const struct net_device *dev)
return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
}

static inline void
hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
u64 packets,
u64 bytes)
{
struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);

u64_stats_update_begin(&netstats->syncp);
netstats->rx_packets += packets;
netstats->rx_bytes += bytes;
u64_stats_update_end(&netstats->syncp);
}

static inline void
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
u64 packets,
Expand All @@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);

struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
int size, void *data);

int hfi1_ipoib_rn_get_params(struct ib_device *device,
u8 port_num,
enum rdma_netdev_t type,
Expand Down
71 changes: 71 additions & 0 deletions drivers/infiniband/hw/hfi1/ipoib_rx.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2020 Intel Corporation.
*
*/

#include "netdev.h"
#include "ipoib.h"

#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))

static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
{
void *dst_data;

skb_checksum_none_assert(skb);
skb->protocol = *((__be16 *)data);

dst_data = skb_put(skb, size);
memcpy(dst_data, data, size);
skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
}

static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
{
struct sk_buff *skb;
int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
void *frag;

skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb_size = SKB_DATA_ALIGN(skb_size);
frag = napi_alloc_frag(skb_size);

if (unlikely(!frag))
return napi_alloc_skb(napi, size);

skb = build_skb(frag, skb_size);

if (unlikely(!skb)) {
skb_free_frag(frag);
return NULL;
}

skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
return skb;
}

struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
int size, void *data)
{
struct napi_struct *napi = &rxq->napi;
int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
struct sk_buff *skb;

/*
* For smaller(4k + skb overhead) allocations we will go using
* napi cache. Otherwise we will try to use napi frag cache.
*/
if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
skb = napi_alloc_skb(napi, skb_size);
else
skb = prepare_frag_skb(napi, skb_size);

if (unlikely(!skb))
return NULL;

copy_ipoib_buf(skb, data, size);

return skb;
}
90 changes: 90 additions & 0 deletions drivers/infiniband/hw/hfi1/netdev.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2020 Intel Corporation.
*
*/

#ifndef HFI1_NETDEV_H
#define HFI1_NETDEV_H

#include "hfi.h"

#include <linux/netdevice.h>
#include <linux/xarray.h>

/**
* struct hfi1_netdev_rxq - Receive Queue for HFI
* dummy netdev. Both IPoIB and VNIC netdevices will be working on
* top of this device.
* @napi: napi object
* @priv: ptr to netdev_priv
* @rcd: ptr to receive context data
*/
struct hfi1_netdev_rxq {
struct napi_struct napi;
struct hfi1_netdev_priv *priv;
struct hfi1_ctxtdata *rcd;
};

/*
* Number of netdev contexts used. Ensure it is less than or equal to
* max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
*/
#define HFI1_MAX_NETDEV_CTXTS 8

/* Number of NETDEV RSM entries */
#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS

/**
* struct hfi1_netdev_priv: data required to setup and run HFI netdev.
* @dd: hfi1_devdata
* @rxq: pointer to dummy netdev receive queues.
* @num_rx_q: number of receive queues
* @rmt_index: first free index in RMT Array
* @msix_start: first free MSI-X interrupt vector.
* @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
* @enabled: atomic counter of netdevs enabling receive queues.
* When 0 NAPI will be disabled.
* @netdevs: atomic counter of netdevs using dummy netdev.
* When 0 receive queues will be freed.
*/
struct hfi1_netdev_priv {
struct hfi1_devdata *dd;
struct hfi1_netdev_rxq *rxq;
int num_rx_q;
int rmt_start;
struct xarray dev_tbl;
/* count of enabled napi polls */
atomic_t enabled;
/* count of netdevs on top */
atomic_t netdevs;
};

static inline
struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
{
return (struct hfi1_netdev_priv *)&dev[1];
}

static inline
int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);

return priv->num_rx_q;
}

static inline
struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);

return priv->rxq[ctxt].rcd;
}

int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);

#endif /* HFI1_NETDEV_H */
Loading

0 comments on commit 6991abc

Please sign in to comment.