summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Mi <cmi@nvidia.com>2024-06-14 00:00:32 +0300
committerJakub Kicinski <kuba@kernel.org>2024-06-14 18:53:23 -0700
commit49d37d05f216ce026ebd297b76a2db5bff7a4b4d (patch)
treecf76a6448608a2b7503db7a41dbb2deb39631b9b
parente575d3a6dd22123888defb622b1742aa2d45b942 (diff)
net/mlx5: CT: Separate CT and CT-NAT tuple entries
Currently a ct entry is stored in both ct and ct-nat tables. ct action is directed to the ct table, while ct nat action is directed to the nat table. ct-nat entries perform the nat header rewrites, if required. The current design assures that a ct action will match in hardware even if the tuple has nat configured, it will just not execute it. However, storing each connection in two tables increases the system's memory consumption while reducing its insertion rate. Offload a connection to either ct or the ct-nat table. Add a miss fall-through rule from ct-nat table to the ct table allowing ct(nat) action on non-natted connections. ct action on natted connections, by default, will be handled by the software miss path. Signed-off-by: Paul Blakey <paulb@nvidia.com> Signed-off-by: Chris Mi <cmi@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://lore.kernel.org/r/20240613210036.1125203-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c187
1 files changed, 143 insertions, 44 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index fadfa8b50beb..b49d87a51f21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -69,6 +69,8 @@ struct mlx5_tc_ct_priv {
struct rhashtable ct_tuples_nat_ht;
struct mlx5_flow_table *ct;
struct mlx5_flow_table *ct_nat;
+ struct mlx5_flow_group *ct_nat_miss_group;
+ struct mlx5_flow_handle *ct_nat_miss_rule;
struct mlx5e_post_act *post_act;
struct mutex control_lock; /* guards parallel adds/dels */
struct mapping_ctx *zone_mapping;
@@ -141,6 +143,8 @@ struct mlx5_ct_counter {
enum {
MLX5_CT_ENTRY_FLAG_VALID,
+ MLX5_CT_ENTRY_IN_CT_TABLE,
+ MLX5_CT_ENTRY_IN_CT_NAT_TABLE,
};
struct mlx5_ct_entry {
@@ -198,9 +202,15 @@ static const struct rhashtable_params tuples_nat_ht_params = {
};
static bool
-mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+mlx5_tc_ct_entry_in_ct_table(struct mlx5_ct_entry *entry)
{
- return !!(entry->tuple_nat_node.next);
+ return test_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
+}
+
+static bool
+mlx5_tc_ct_entry_in_ct_nat_table(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
}
static int
@@ -526,8 +536,10 @@ static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry)
{
- mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
- mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+ if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ if (mlx5_tc_ct_entry_in_ct_table(entry))
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
atomic_dec(&ct_priv->debugfs.stats.offloaded);
}
@@ -814,7 +826,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
&zone_rule->mh,
zone_restore_id,
nat,
- mlx5_tc_ct_entry_has_nat(entry));
+ mlx5_tc_ct_entry_in_ct_nat_table(entry));
if (err) {
ct_dbg("Failed to create ct entry mod hdr");
goto err_mod_hdr;
@@ -888,7 +900,7 @@ mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
*old_attr = *attr;
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
- nat, mlx5_tc_ct_entry_has_nat(entry));
+ nat, mlx5_tc_ct_entry_in_ct_nat_table(entry));
if (err) {
ct_dbg("Failed to create ct entry mod hdr");
goto err_mod_hdr;
@@ -957,11 +969,13 @@ static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
{
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
- rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node,
- tuples_nat_ht_params);
- rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
- tuples_ht_params);
+ if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ if (mlx5_tc_ct_entry_in_ct_table(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
}
static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
@@ -1100,21 +1114,26 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
return err;
}
- err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
- zone_restore_id);
- if (err)
- goto err_orig;
+ if (mlx5_tc_ct_entry_in_ct_table(entry)) {
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ goto err_orig;
+ }
- err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
- zone_restore_id);
- if (err)
- goto err_nat;
+ if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err)
+ goto err_nat;
+ }
atomic_inc(&ct_priv->debugfs.stats.offloaded);
return 0;
err_nat:
- mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+ if (mlx5_tc_ct_entry_in_ct_table(entry))
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
err_orig:
mlx5_tc_ct_counter_put(ct_priv, entry);
return err;
@@ -1128,15 +1147,19 @@ mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
{
int err;
- err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
- zone_restore_id);
- if (err)
- return err;
+ if (mlx5_tc_ct_entry_in_ct_table(entry)) {
+ err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
+ zone_restore_id);
+ if (err)
+ return err;
+ }
- err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
- zone_restore_id);
- if (err)
- mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+ if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+ err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
+ zone_restore_id);
+ if (err && mlx5_tc_ct_entry_in_ct_table(entry))
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+ }
return err;
}
@@ -1224,18 +1247,24 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (err)
goto err_entries;
- err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
- &entry->tuple_node,
- tuples_ht_params);
- if (err)
- goto err_tuple;
-
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
if (err)
goto err_tuple_nat;
+
+ set_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
+ }
+
+ if (!mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
+ if (err)
+ goto err_tuple;
+
+ set_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
}
spin_unlock_bh(&ct_priv->ht_lock);
@@ -1251,17 +1280,10 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
err_rules:
spin_lock_bh(&ct_priv->ht_lock);
- if (mlx5_tc_ct_entry_has_nat(entry))
- rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node, tuples_nat_ht_params);
-err_tuple_nat:
- rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
- &entry->tuple_node,
- tuples_ht_params);
err_tuple:
- rhashtable_remove_fast(&ft->ct_entries_ht,
- &entry->node,
- cts_ht_params);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+err_tuple_nat:
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
err_entries:
spin_unlock_bh(&ct_priv->ht_lock);
err_set:
@@ -2149,6 +2171,76 @@ mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
debugfs_remove_recursive(ct_priv->debugfs.root);
}
+static struct mlx5_flow_handle *
+tc_ct_add_miss_rule(struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act act = {};
+
+ act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = next_ft;
+
+ return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+tc_ct_add_ct_table_miss_rule(struct mlx5_flow_table *from,
+ struct mlx5_flow_table *to,
+ struct mlx5_flow_group **miss_group,
+ struct mlx5_flow_handle **miss_rule)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *group;
+ struct mlx5_flow_handle *rule;
+ unsigned int max_fte = from->max_fte;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ /* create miss group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+ max_fte - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ max_fte - 1);
+ group = mlx5_create_flow_group(from, flow_group_in);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto err_miss_grp;
+ }
+
+ /* add miss rule to next fdb */
+ rule = tc_ct_add_miss_rule(from, to);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto err_miss_rule;
+ }
+
+ *miss_group = group;
+ *miss_rule = rule;
+ kvfree(flow_group_in);
+ return 0;
+
+err_miss_rule:
+ mlx5_destroy_flow_group(group);
+err_miss_grp:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+tc_ct_del_ct_table_miss_rule(struct mlx5_flow_group *miss_group,
+ struct mlx5_flow_handle *miss_rule)
+{
+ mlx5_del_flow_rules(miss_rule);
+ mlx5_destroy_flow_group(miss_group);
+}
+
#define INIT_ERR_PREFIX "tc ct offload init failed"
struct mlx5_tc_ct_priv *
@@ -2212,6 +2304,12 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_ct_nat_tbl;
}
+ err = tc_ct_add_ct_table_miss_rule(ct_priv->ct_nat, ct_priv->ct,
+ &ct_priv->ct_nat_miss_group,
+ &ct_priv->ct_nat_miss_rule);
+ if (err)
+ goto err_ct_zone_ht;
+
ct_priv->post_act = post_act;
mutex_init(&ct_priv->control_lock);
if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
@@ -2273,6 +2371,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
ct_priv->fs_ops->destroy(ct_priv->fs);
kfree(ct_priv->fs);
+ tc_ct_del_ct_table_miss_rule(ct_priv->ct_nat_miss_group, ct_priv->ct_nat_miss_rule);
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);