Skip to content

Commit

Permalink
mlxsw: spectrum_acl_bloom_filter: Add support for Spectrum-4 calculation
Browse files Browse the repository at this point in the history
Spectrum-4 will calculate hash function for bloom filter differently
from the existing ASICs.

First, two hash functions will be used to calculate 16 bits result.
The final result will be combination of the two results - 6 bits which
are result of CRC-6 will be used as MSB and 10 bits which are result of
CRC-10 will be used as LSB.

Second, while in Spectrum{2,3}, there is a padding in each chunk, so the
chunks use a sequence of whole bytes, in Spectrum-4 there is no padding,
so each chunk use 20 bytes minus 2 bits, so it is necessary to align the
chunks to be without holes.

Add dedicated 'mlxsw_sp_acl_bf_ops' for Spectrum-4 and add the required
tables for CRC calculations.

All the details are documented as part of the code for future use.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Amit Cohen authored and Jakub Kicinski committed Jan 7, 2022
1 parent 58723d2 commit 852ee41
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 16 deletions.
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.h
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,7 @@ extern const struct mlxsw_afk_ops mlxsw_sp4_afk_ops;

/* spectrum_acl_bloom_filter.c */
extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops;
extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops;

/* spectrum_matchall.c */
struct mlxsw_sp_mall_ops {
Expand Down
267 changes: 251 additions & 16 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,18 @@ struct mlxsw_sp_acl_bf {
};

/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
* blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
* 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
* depending on the actual number of key blocks used in the region.
* blocks, eRP ID and region ID. In Spectrum-2 and above, region key is combined
* of up to 12 key blocks, so there can be up to 3 chunks in the Bloom filter
* key, depending on the actual number of key blocks used in the region.
* The layout of the Bloom filter key is as follows:
*
* +-------------------------+------------------------+------------------------+
* | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 |
* +-------------------------+------------------------+------------------------+
*/
#define MLXSW_BLOOM_KEY_CHUNKS 3

/* Spectrum-2 and Spectrum-3 chunks */
#define MLXSW_SP2_BLOOM_KEY_LEN 69

/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
Expand All @@ -51,19 +53,9 @@ struct mlxsw_sp_acl_bf {
*/
#define MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET 5

/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
* and we need to populate it with 4 key blocks copied from the entry encoded
* key. Since the encoded key contains a padding, key block 11 starts at offset
* 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
* 18 bytes.
* This array defines key offsets for easy access when copying key blocks from
* entry key to Bloom filter chunk.
*/
static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};

/* This table is just the CRC of each possible byte. It is
* computed, Msbit first, for the Bloom filter polynomial
* which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
/* This table is just the CRC of each possible byte which is used for
* Spectrum-{2-3}. It is computed, Msbit first, for the Bloom filter
* polynomial which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
* the implicit x^16).
*/
static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
Expand Down Expand Up @@ -101,6 +93,127 @@ static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
};

/* Spectrum-4 chunks */
#define MLXSW_SP4_BLOOM_KEY_LEN 60

/* In Spectrum-4, there is no padding. Each chunk size is 20 bytes.
* 18 bytes of it contain 4 key blocks, each is 36 bits, and 2 bytes which hold
* eRP ID and region ID.
* The layout of each chunk is as follows:
*
* +----------------------+-----------------------------------+
* | 2 bytes | 18 bytes |
* +-----------+----------+-----------------------------------+
* | 157:148 | 147:144 | 143:0 |
* +---------+-----------+----------+-------------------------+
* | region ID | eRP ID | 4 Key blocks (18 Bytes) |
* +-----------+----------+-----------------------------------+
*/

#define MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES 0
#define MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES 18
#define MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES 20

/* The offset of the key block within a chunk is 2 bytes as it comes after
* 16 bits of region ID and eRP ID.
*/
#define MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET 2

/* For Spectrum-4, two hash functions are used, CRC-10 and CRC-6 based.
* The result is combination of the two calculations -
* 6 bit column are MSB (result of CRC-6),
* 10 bit row are LSB (result of CRC-10).
*/

/* This table is just the CRC of each possible byte which is used for
* Spectrum-4. It is computed, Msbit first, for the Bloom filter
* polynomial which is 0x1b (1 + x^1 + x^3 + x^4 and the implicit x^10).
*/
static const u16 mlxsw_sp4_acl_bf_crc10_tab[256] = {
0x0000, 0x001b, 0x0036, 0x002d, 0x006c, 0x0077, 0x005a, 0x0041,
0x00d8, 0x00c3, 0x00ee, 0x00f5, 0x00b4, 0x00af, 0x0082, 0x0099,
0x01b0, 0x01ab, 0x0186, 0x019d, 0x01dc, 0x01c7, 0x01ea, 0x01f1,
0x0168, 0x0173, 0x015e, 0x0145, 0x0104, 0x011f, 0x0132, 0x0129,
0x0360, 0x037b, 0x0356, 0x034d, 0x030c, 0x0317, 0x033a, 0x0321,
0x03b8, 0x03a3, 0x038e, 0x0395, 0x03d4, 0x03cf, 0x03e2, 0x03f9,
0x02d0, 0x02cb, 0x02e6, 0x02fd, 0x02bc, 0x02a7, 0x028a, 0x0291,
0x0208, 0x0213, 0x023e, 0x0225, 0x0264, 0x027f, 0x0252, 0x0249,
0x02db, 0x02c0, 0x02ed, 0x02f6, 0x02b7, 0x02ac, 0x0281, 0x029a,
0x0203, 0x0218, 0x0235, 0x022e, 0x026f, 0x0274, 0x0259, 0x0242,
0x036b, 0x0370, 0x035d, 0x0346, 0x0307, 0x031c, 0x0331, 0x032a,
0x03b3, 0x03a8, 0x0385, 0x039e, 0x03df, 0x03c4, 0x03e9, 0x03f2,
0x01bb, 0x01a0, 0x018d, 0x0196, 0x01d7, 0x01cc, 0x01e1, 0x01fa,
0x0163, 0x0178, 0x0155, 0x014e, 0x010f, 0x0114, 0x0139, 0x0122,
0x000b, 0x0010, 0x003d, 0x0026, 0x0067, 0x007c, 0x0051, 0x004a,
0x00d3, 0x00c8, 0x00e5, 0x00fe, 0x00bf, 0x00a4, 0x0089, 0x0092,
0x01ad, 0x01b6, 0x019b, 0x0180, 0x01c1, 0x01da, 0x01f7, 0x01ec,
0x0175, 0x016e, 0x0143, 0x0158, 0x0119, 0x0102, 0x012f, 0x0134,
0x001d, 0x0006, 0x002b, 0x0030, 0x0071, 0x006a, 0x0047, 0x005c,
0x00c5, 0x00de, 0x00f3, 0x00e8, 0x00a9, 0x00b2, 0x009f, 0x0084,
0x02cd, 0x02d6, 0x02fb, 0x02e0, 0x02a1, 0x02ba, 0x0297, 0x028c,
0x0215, 0x020e, 0x0223, 0x0238, 0x0279, 0x0262, 0x024f, 0x0254,
0x037d, 0x0366, 0x034b, 0x0350, 0x0311, 0x030a, 0x0327, 0x033c,
0x03a5, 0x03be, 0x0393, 0x0388, 0x03c9, 0x03d2, 0x03ff, 0x03e4,
0x0376, 0x036d, 0x0340, 0x035b, 0x031a, 0x0301, 0x032c, 0x0337,
0x03ae, 0x03b5, 0x0398, 0x0383, 0x03c2, 0x03d9, 0x03f4, 0x03ef,
0x02c6, 0x02dd, 0x02f0, 0x02eb, 0x02aa, 0x02b1, 0x029c, 0x0287,
0x021e, 0x0205, 0x0228, 0x0233, 0x0272, 0x0269, 0x0244, 0x025f,
0x0016, 0x000d, 0x0020, 0x003b, 0x007a, 0x0061, 0x004c, 0x0057,
0x00ce, 0x00d5, 0x00f8, 0x00e3, 0x00a2, 0x00b9, 0x0094, 0x008f,
0x01a6, 0x01bd, 0x0190, 0x018b, 0x01ca, 0x01d1, 0x01fc, 0x01e7,
0x017e, 0x0165, 0x0148, 0x0153, 0x0112, 0x0109, 0x0124, 0x013f,
};

/* This table is just the CRC of each possible byte which is used for
* Spectrum-4. It is computed, Msbit first, for the Bloom filter
* polynomial which is 0x2d (1 + x^2+ x^3 + x^5 and the implicit x^6).
*/
static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = {
0x00, 0x2d, 0x37, 0x1a, 0x03, 0x2e, 0x34, 0x19,
0x06, 0x2b, 0x31, 0x1c, 0x05, 0x28, 0x32, 0x1f,
0x0c, 0x21, 0x3b, 0x16, 0x0f, 0x22, 0x38, 0x15,
0x0a, 0x27, 0x3d, 0x10, 0x09, 0x24, 0x3e, 0x13,
0x18, 0x35, 0x2f, 0x02, 0x1b, 0x36, 0x2c, 0x01,
0x1e, 0x33, 0x29, 0x04, 0x1d, 0x30, 0x2a, 0x07,
0x14, 0x39, 0x23, 0x0e, 0x17, 0x3a, 0x20, 0x0d,
0x12, 0x3f, 0x25, 0x08, 0x11, 0x3c, 0x26, 0x0b,
0x30, 0x1d, 0x07, 0x2a, 0x33, 0x1e, 0x04, 0x29,
0x36, 0x1b, 0x01, 0x2c, 0x35, 0x18, 0x02, 0x2f,
0x3c, 0x11, 0x0b, 0x26, 0x3f, 0x12, 0x08, 0x25,
0x3a, 0x17, 0x0d, 0x20, 0x39, 0x14, 0x0e, 0x23,
0x28, 0x05, 0x1f, 0x32, 0x2b, 0x06, 0x1c, 0x31,
0x2e, 0x03, 0x19, 0x34, 0x2d, 0x00, 0x1a, 0x37,
0x24, 0x09, 0x13, 0x3e, 0x27, 0x0a, 0x10, 0x3d,
0x22, 0x0f, 0x15, 0x38, 0x21, 0x0c, 0x16, 0x3b,
0x0d, 0x20, 0x3a, 0x17, 0x0e, 0x23, 0x39, 0x14,
0x0b, 0x26, 0x3c, 0x11, 0x08, 0x25, 0x3f, 0x12,
0x01, 0x2c, 0x36, 0x1b, 0x02, 0x2f, 0x35, 0x18,
0x07, 0x2a, 0x30, 0x1d, 0x04, 0x29, 0x33, 0x1e,
0x15, 0x38, 0x22, 0x0f, 0x16, 0x3b, 0x21, 0x0c,
0x13, 0x3e, 0x24, 0x09, 0x10, 0x3d, 0x27, 0x0a,
0x19, 0x34, 0x2e, 0x03, 0x1a, 0x37, 0x2d, 0x00,
0x1f, 0x32, 0x28, 0x05, 0x1c, 0x31, 0x2b, 0x06,
0x3d, 0x10, 0x0a, 0x27, 0x3e, 0x13, 0x09, 0x24,
0x3b, 0x16, 0x0c, 0x21, 0x38, 0x15, 0x0f, 0x22,
0x31, 0x1c, 0x06, 0x2b, 0x32, 0x1f, 0x05, 0x28,
0x37, 0x1a, 0x00, 0x2d, 0x34, 0x19, 0x03, 0x2e,
0x25, 0x08, 0x12, 0x3f, 0x26, 0x0b, 0x11, 0x3c,
0x23, 0x0e, 0x14, 0x39, 0x20, 0x0d, 0x17, 0x3a,
0x29, 0x04, 0x1e, 0x33, 0x2a, 0x07, 0x1d, 0x30,
0x2f, 0x02, 0x18, 0x35, 0x2c, 0x01, 0x1b, 0x36,
};

/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
* and we need to populate it with 4 key blocks copied from the entry encoded
* key. The original keys layout is same for Spectrum-{2,3,4}.
* Since the encoded key contains a 2 bytes padding, key block 11 starts at
* offset 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks
* take 18 bytes. See 'MLXSW_SP2_AFK_BLOCK_LAYOUT' for more details.
* This array defines key offsets for easy access when copying key blocks from
* entry key to Bloom filter chunk.
*/
static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};

static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c)
{
return (crc << 8) ^ mlxsw_sp2_acl_bf_crc16_tab[(crc >> 8) ^ c];
Expand Down Expand Up @@ -168,6 +281,124 @@ mlxsw_sp2_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
return mlxsw_sp2_acl_bf_crc(bf_key, bf_size);
}

static u16 mlxsw_sp4_acl_bf_crc10_byte(u16 crc, u8 c)
{
u8 index = ((crc >> 2) ^ c) & 0xff;

return ((crc << 8) ^ mlxsw_sp4_acl_bf_crc10_tab[index]) & 0x3ff;
}

static u16 mlxsw_sp4_acl_bf_crc6_byte(u16 crc, u8 c)
{
u8 index = (crc ^ c) & 0xff;

return ((crc << 6) ^ (mlxsw_sp4_acl_bf_crc6_tab[index] << 2)) & 0xfc;
}

static u16 mlxsw_sp4_acl_bf_crc(const u8 *buffer, size_t len)
{
u16 crc_row = 0, crc_col = 0;

while (len--) {
crc_row = mlxsw_sp4_acl_bf_crc10_byte(crc_row, *buffer);
crc_col = mlxsw_sp4_acl_bf_crc6_byte(crc_col, *buffer);
buffer++;
}

crc_col >>= 2;

/* 6 bit column are MSB, 10 bit row are LSB */
return (crc_col << 10) | crc_row;
}

static void right_shift_array(char *arr, u8 len, u8 shift_bits)
{
u8 byte_mask = 0xff >> shift_bits;
int i;

if (WARN_ON(!shift_bits || shift_bits >= 8))
return;

for (i = len - 1; i >= 0; i--) {
/* The first iteration looks like out-of-bounds access,
* but actually references a buffer that the array is shifted
* into. This move is legal as we never send the last chunk to
* this function.
*/
arr[i + 1] &= byte_mask;
arr[i + 1] |= arr[i] << (8 - shift_bits);
arr[i] = arr[i] >> shift_bits;
}
}

static void mlxsw_sp4_bf_key_shift_chunks(u8 chunk_count, char *output)
{
/* The chunks are suppoosed to be continuous, with no padding.
* Since region ID and eRP ID use 14 bits, and not fully 2 bytes,
* and in Spectrum-4 there is no padding, it is necessary to shift some
* chunks 2 bits right.
*/
switch (chunk_count) {
case 2:
/* The chunks are copied as follow:
* +-------------+-----------------+
* | Chunk 0 | Chunk 1 |
* | IDs | keys |(**) IDs | keys |
* +-------------+-----------------+
* In (**), there are two unused bits, therefore, chunk 0 needs
* to be shifted two bits right.
*/
right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
break;
case 3:
/* The chunks are copied as follow:
* +-------------+-----------------+-----------------+
* | Chunk 0 | Chunk 1 | Chunk 2 |
* | IDs | keys |(**) IDs | keys |(**) IDs | keys |
* +-------------+-----------------+-----------------+
* In (**), there are two unused bits, therefore, chunk 1 needs
* to be shifted two bits right and chunk 0 needs to be shifted
* four bits right.
*/
right_shift_array(output + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES,
MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 4);
break;
default:
WARN_ON(chunk_count > MLXSW_BLOOM_KEY_CHUNKS);
}
}

static void
mlxsw_sp4_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
struct mlxsw_sp_acl_atcam_entry *aentry,
char *output, u8 *len)
{
struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
u8 block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
u8 chunk_count = 1 + ((block_count - 1) >> 2);

__mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len,
MLXSW_BLOOM_KEY_CHUNKS,
MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES,
MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET,
MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES,
MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES);
mlxsw_sp4_bf_key_shift_chunks(chunk_count, output);
}

static unsigned int
mlxsw_sp4_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
struct mlxsw_sp_acl_atcam_region *aregion,
struct mlxsw_sp_acl_atcam_entry *aentry)
{
char bf_key[MLXSW_SP4_BLOOM_KEY_LEN] = {};
u8 bf_size;

mlxsw_sp4_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
return mlxsw_sp4_acl_bf_crc(bf_key, bf_size);
}

static unsigned int
mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
unsigned int erp_bank,
Expand Down Expand Up @@ -285,3 +516,7 @@ void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops = {
.index_get = mlxsw_sp2_acl_bf_index_get,
};

const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops = {
.index_get = mlxsw_sp4_acl_bf_index_get,
};

0 comments on commit 852ee41

Please sign in to comment.