diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e97cb55a6bf..b0ec175cc6ba 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -43,6 +43,9 @@ #include #include #include +#include +#include +#include #include "user.h" #include "mlx5_ib.h" @@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static int parse_flow_attr(u32 *match_c, u32 *match_v, + union ib_flow_spec *ib_spec) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + if (ib_spec->size != sizeof(ib_spec->eth)) + return -EINVAL; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (ib_spec->size != sizeof(ib_spec->ipv4)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + break; + case IB_FLOW_SPEC_TCP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +{ + struct ib_flow_spec_eth *eth_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->size < sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_eth) || + ib_attr->num_of_specs < 1) + return false; + + eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); + if (eth_spec->type != IB_FLOW_SPEC_ETH || + eth_spec->size != sizeof(*eth_spec)) + return false; + + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); +} + +static bool is_valid_attr(struct ib_flow_attr *flow_attr) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + bool has_ipv4_spec = false; + bool eth_type_ipv4 = true; + unsigned int spec_index; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if (ib_spec->type == IB_FLOW_SPEC_ETH && + ib_spec->eth.mask.ether_type) { + if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && + ib_spec->eth.val.ether_type == htons(ETH_P_IP))) + eth_type_ipv4 = false; + } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { + has_ipv4_spec = true; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + return !has_ipv4_spec || eth_type_ipv4; +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + + mutex_lock(&dev->flow_db.lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rule(iter->rule); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rule(handler->rule); + put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + mutex_unlock(&dev->flow_db.lock); + + kfree(handler); + + return 0; +} + +#define MLX5_FS_MAX_TYPES 10 +#define MLX5_FS_MAX_ENTRIES 32000UL +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int num_entries; + int num_groups; + int priority; + int err = 0; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + if (flow_is_multicast_only(flow_attr)) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = flow_attr->priority; + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_BYPASS); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + prio = &dev->flow_db.prios[priority]; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } + + if (!ns) + return ERR_PTR(-ENOTSUPP); + + ft = prio->flow_table; + if (!ft) { + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups); + + if (!IS_ERR(ft)) { + prio->refcount = 0; + prio->flow_table = ft; + } else { + err = PTR_ERR(ft); + } + } + + return err ? ERR_PTR(err) : prio; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + void *ib_flow = flow_attr + 1; + u8 match_criteria_enable = 0; + unsigned int spec_index; + u32 *match_c; + u32 *match_v; + int err = 0; + + if (!is_valid_attr(flow_attr)) + return ERR_PTR(-EINVAL); + + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !match_c || !match_v) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(match_c, match_v, ib_flow); + if (err < 0) + goto free; + + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + /* Outer header support only */ + match_criteria_enable = (!outer_header_zero(match_c)) << 0; + handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, + match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dst); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + handler->prio = ft_prio - dev->flow_db.prios; + + ft_prio->flow_table = ft; +free: + if (err) + kfree(handler); + kfree(match_c); + kfree(match_v); + return err ? ERR_PTR(err) : handler; +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio; + int err; + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOSPC); + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || + flow_attr->flags) + return ERR_PTR(-EINVAL); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mutex_lock(&dev->flow_db.lock); + + ft_prio = get_flow_table(dev, flow_attr); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + ft_prio->refcount++; + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + kfree(handler); + return ERR_PTR(err); +} + static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (mlx5_ib_port_link_layer(&dev->ib_dev) == + IB_LINK_LAYER_ETHERNET) { + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + } err = init_node_data(dev); if (err) goto err_dealloc; + mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); err = create_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 633347260b79..1474cccd1e0f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -105,6 +105,36 @@ struct mlx5_ib_pd { u32 pdn; }; +#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#if (MLX5_IB_FLOW_LAST_PRIO <= 0) +#error "Invalid number of bypass priorities" +#endif +#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) + +#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) +struct mlx5_ib_flow_prio { + struct mlx5_flow_table *flow_table; + unsigned int refcount; +}; + +struct mlx5_ib_flow_handler { + struct list_head list; + struct ib_flow ibflow; + unsigned int prio; + struct mlx5_flow_rule *rule; +}; + +struct mlx5_ib_flow_db { + struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + /* Protect flow steering bypass flow tables + * when add/del flow rules. + * only single add/removal of flow steering rule could be done + * simultaneously. + */ + struct mutex lock; +}; + /* Use macros here so that don't have to duplicate * enum ib_send_flags and enum ib_qp_type for low-level driver */ @@ -171,9 +201,21 @@ struct mlx5_ib_pfault { struct mlx5_pagefault mpfault; }; +struct mlx5_ib_rq { + u32 tirn; +}; + +struct mlx5_ib_raw_packet_qp { + struct mlx5_ib_rq rq; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; - struct mlx5_core_qp mqp; + union { + struct mlx5_core_qp mqp; + struct mlx5_ib_raw_packet_qp raw_packet_qp; + }; + struct mlx5_buf buf; struct mlx5_db db; @@ -431,6 +473,7 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; #endif + struct mlx5_ib_flow_db flow_db; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 5096f4f336bd..a9894d2e8e26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -38,9 +38,28 @@ #include "fs_cmd.h" #include "mlx5_core.h" +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_type type, unsigned int level, - unsigned int log_size, unsigned int *table_id) + unsigned int log_size, struct mlx5_flow_table + *next_ft, unsigned int *table_id) { u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; @@ -51,6 +70,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + if (next_ft) { + MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); + } MLX5_SET(create_flow_table_in, in, table_type, type); MLX5_SET(create_flow_table_in, in, level, level); MLX5_SET(create_flow_table_in, in, log_size, log_size); @@ -83,6 +106,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, sizeof(out)); } +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + } + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 *in, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index f39304ede186..9814d4784803 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -35,11 +35,16 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_type type, unsigned int level, - unsigned int log_size, unsigned int *table_id); + unsigned int log_size, struct mlx5_flow_table + *next_ft, unsigned int *table_id); int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); +int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 *in, unsigned int *group_id); @@ -62,4 +67,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int index); +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f7d62fe595f6..6f68dba8d7ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,51 +40,83 @@ #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) -#define INIT_PRIO(min_level_val, max_ft_val,\ - start_level_val, ...) {.type = FS_TYPE_PRIO,\ +#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ + ...) {.type = FS_TYPE_PRIO,\ .min_ft_level = min_level_val,\ - .start_level = start_level_val,\ .max_ft = max_ft_val,\ + .num_leaf_prios = num_prios_val,\ + .caps = caps_val,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\ - INIT_PRIO(min_level_val, max_ft_val, start_level_val,\ - __VA_ARGS__)\ - -#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\ - INIT_PRIO(0, max_ft_val, start_level_val,\ - __VA_ARGS__)\ +#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ + ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ + __VA_ARGS__)\ #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define KERNEL_START_LEVEL 0 -#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__} } + +#define LEFTOVERS_MAX_FT 1 +#define LEFTOVERS_NUM_PRIOS 1 +#define BY_PASS_PRIO_MAX_FT 1 +#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_MAX_FT) + #define KERNEL_MAX_FT 2 +#define KERNEL_NUM_PRIOS 1 #define KENREL_MIN_LEVEL 2 + +struct node_caps { + size_t arr_sz; + long *caps; +}; static struct init_tree_node { enum fs_node_type type; struct init_tree_node *children; int ar_size; + struct node_caps caps; int min_ft_level; + int num_leaf_prios; int prio; int max_ft; - int start_level; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 1, + .ar_size = 3, .children = (struct init_tree_node[]) { - ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT, - KERNEL_START_LEVEL, - ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT, - KERNEL_P0_START_LEVEL))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), + ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root), + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), } }; +enum fs_i_mutex_lock_class { + FS_MUTEX_GRANDPARENT, + FS_MUTEX_PARENT, + FS_MUTEX_CHILD +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -119,10 +151,11 @@ static void tree_get_node(struct fs_node *node) atomic_inc(&node->refcount); } -static void nested_lock_ref_node(struct fs_node *node) +static void nested_lock_ref_node(struct fs_node *node, + enum fs_i_mutex_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&node->lock, class); atomic_inc(&node->refcount); } } @@ -436,10 +469,162 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, return ft; } +/* If reverse is false, then we search for the first flow table in the + * root sub-tree from start(closest from right), else we search for the + * last flow table in the root sub-tree till start(closest from left). + */ +static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, + struct list_head *start, + bool reverse) +{ +#define list_advance_entry(pos, reverse) \ + ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list)) + +#define list_for_each_advance_continue(pos, head, reverse) \ + for (pos = list_advance_entry(pos, reverse); \ + &pos->list != (head); \ + pos = list_advance_entry(pos, reverse)) + + struct fs_node *iter = list_entry(start, struct fs_node, list); + struct mlx5_flow_table *ft = NULL; + + if (!root) + return NULL; + + list_for_each_advance_continue(iter, &root->children, reverse) { + if (iter->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, iter); + return ft; + } + ft = find_closest_ft_recursive(iter, &iter->children, reverse); + if (ft) + return ft; + } + + return ft; +} + +/* If reverse if false then return the first flow table in next priority of + * prio in the tree, else return the last flow table in the previous priority + * of prio in the tree. + */ +static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_node *curr_node; + struct fs_node *parent; + + parent = prio->node.parent; + curr_node = &prio->node; + while (!ft && parent) { + ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + curr_node = parent; + parent = curr_node->parent; + } + return ft; +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, false); +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +{ + return find_closest_ft(prio, true); +} + +static int connect_fts_in_prio(struct mlx5_core_dev *dev, + struct fs_prio *prio, + struct mlx5_flow_table *ft) +{ + struct mlx5_flow_table *iter; + int i = 0; + int err; + + fs_for_each_ft(iter, prio) { + i++; + err = mlx5_cmd_modify_flow_table(dev, + iter, + ft); + if (err) { + mlx5_core_warn(dev, "Failed to modify flow table %d\n", + iter->id); + /* The driver is out of sync with the FW */ + if (i > 1) + WARN_ON(true); + return err; + } + } + return 0; +} + +/* Connect flow tables from previous priority of prio to ft */ +static int connect_prev_fts(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *prev_ft; + + prev_ft = find_prev_chained_ft(prio); + if (prev_ft) { + struct fs_prio *prev_prio; + + fs_get_obj(prev_prio, prev_ft->node.parent); + return connect_fts_in_prio(dev, prev_prio, ft); + } + return 0; +} + +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + int min_level = INT_MAX; + int err; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + err = mlx5_cmd_update_root_ft(root->dev, ft); + if (err) + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + else + root->root_ft = ft; + + return err; +} + +static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + int err = 0; + + /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ + + if (list_empty(&prio->node.children)) { + err = connect_prev_fts(dev, ft, prio); + if (err) + return err; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.modify_root)) + err = update_root_ft_create(ft, prio); + return err; +} + struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte) { + struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; int err; int log_table_sz; @@ -452,14 +637,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENODEV); } + mutex_lock(&root->chain_lock); fs_prio = find_prio(ns, prio); - if (!fs_prio) - return ERR_PTR(-EINVAL); - - lock_ref_node(&fs_prio->node); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } if (fs_prio->num_ft == fs_prio->max_ft) { err = -ENOSPC; - goto unlock_prio; + goto unlock_root; } ft = alloc_flow_table(find_next_free_level(fs_prio), @@ -467,32 +653,63 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, root->table_type); if (!ft) { err = -ENOMEM; - goto unlock_prio; + goto unlock_root; } tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ilog2(ft->max_fte); + next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level, - log_table_sz, &ft->id); + log_table_sz, next_ft, &ft->id); if (err) goto free_ft; + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_tail(&ft->node.list, &fs_prio->node.children); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); - + mutex_unlock(&root->chain_lock); return ft; - +destroy_ft: + mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: kfree(ft); -unlock_prio: - unlock_ref_node(&fs_prio->node); +unlock_root: + mutex_unlock(&root->chain_lock); return ERR_PTR(err); } -struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, - u32 *fg_in) +struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups) +{ + struct mlx5_flow_table *ft; + + if (max_num_groups > num_flow_table_entries) + return ERR_PTR(-EINVAL); + + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + if (IS_ERR(ft)) + return ft; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + + return ft; +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +/* Flow table should be locked */ +static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, + u32 *fg_in, + struct list_head + *prev_fg, + bool is_auto_fg) { struct mlx5_flow_group *fg; struct mlx5_core_dev *dev = get_dev(&ft->node); @@ -505,18 +722,33 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (IS_ERR(fg)) return fg; - lock_ref_node(&ft->node); err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); if (err) { kfree(fg); - unlock_ref_node(&ft->node); return ERR_PTR(err); } + + if (ft->autogroup.active) + ft->autogroup.num_groups++; /* Add node to tree */ - tree_init_node(&fg->node, 1, del_flow_group); + tree_init_node(&fg->node, !is_auto_fg, del_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ list_add(&fg->node.list, ft->node.children.prev); + + return fg; +} + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_group *fg; + + if (ft->autogroup.active) + return ERR_PTR(-EPERM); + + lock_ref_node(&ft->node); + fg = create_flow_group_common(ft, fg_in, &ft->node.children, false); unlock_ref_node(&ft->node); return fg; @@ -614,7 +846,63 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, return fte; } -/* Assuming parent fg(flow table) is locked */ +static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct list_head *prev = &ft->node.children; + unsigned int candidate_index = 0; + struct mlx5_flow_group *fg; + void *match_criteria_addr; + unsigned int group_size = 0; + u32 *in; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + in = mlx5_vzalloc(inlen); + if (!in) + return ERR_PTR(-ENOMEM); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + /* We save place for flow groups in addition to max types */ + group_size = ft->max_fte / (ft->autogroup.required_groups + 1); + + /* ft->max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > ft->max_fte) { + fg = ERR_PTR(-ENOSPC); + goto out; + } + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + + group_size - 1); + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + fg = create_flow_group_common(ft, in, prev, true); +out: + kvfree(in); + return fg; +} + static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, u8 action, @@ -626,9 +914,9 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, struct mlx5_flow_table *ft; struct list_head *prev; - lock_ref_node(&fg->node); + nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); fs_for_each_fte(fte, fg) { - nested_lock_ref_node(&fte->node); + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && action == fte->action && flow_tag == fte->flow_tag) { rule = add_rule_fte(fte, fg, dest); @@ -669,6 +957,33 @@ unlock_fg: return rule; } +static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_group *g; + + g = create_autogroup(ft, match_criteria_enable, match_criteria); + if (IS_ERR(g)) + return (void *)g; + + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + if (IS_ERR(rule)) { + /* Remove assumes refcount > 0 and autogroup creates a group + * with a refcount = 0. + */ + tree_get_node(&g->node); + tree_remove_node(&g->node); + } + return rule; +} + struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, u8 match_criteria_enable, @@ -679,39 +994,115 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest) { struct mlx5_flow_group *g; - struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL); + struct mlx5_flow_rule *rule; - tree_get_node(&ft->node); - lock_ref_node(&ft->node); + nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, match_criteria_enable, g->mask.match_criteria, match_criteria)) { - unlock_ref_node(&ft->node); rule = add_rule_fg(g, match_value, action, flow_tag, dest); - goto put; + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) + goto unlock; } + + rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); +unlock: unlock_ref_node(&ft->node); -put: - tree_put_node(&ft->node); return rule; } +EXPORT_SYMBOL(mlx5_add_flow_rule); void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) { tree_remove_node(&rule->node); } +EXPORT_SYMBOL(mlx5_del_flow_rule); + +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + return find_next_chained_ft(prio); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_table *new_root_ft = NULL; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (new_root_ft) { + int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft); + + if (err) { + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + return err; + } + root->root_ft = new_root_ft; + } + return 0; +} + +/* Connect flow table from previous priority to + * the next flow table. + */ +static int disconnect_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_table *next_ft; + struct fs_prio *prio; + int err = 0; + + err = update_root_ft_destroy(ft); + if (err) + return err; + + fs_get_obj(prio, ft->node.parent); + if (!(list_first_entry(&prio->node.children, + struct mlx5_flow_table, + node.list) == ft)) + return 0; + + next_ft = find_next_chained_ft(prio); + err = connect_prev_fts(dev, next_ft, prio); + if (err) + mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", + ft->id); + return err; +} int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) { + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + err = disconnect_flow_table(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } if (tree_remove_node(&ft->node)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", ft->id); + mutex_unlock(&root->chain_lock); - return 0; + return err; } +EXPORT_SYMBOL(mlx5_destroy_flow_table); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) { @@ -732,8 +1123,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return NULL; switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: case MLX5_FLOW_NAMESPACE_KERNEL: - prio = 0; + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: if (dev->priv.fdb_root_ns) @@ -754,10 +1147,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return ns; } +EXPORT_SYMBOL(mlx5_get_flow_namespace); static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned prio, int max_ft, - int start_level) + unsigned prio, int max_ft) { struct fs_prio *fs_prio; @@ -770,7 +1163,6 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, tree_add_node(&fs_prio->node, &ns->node); fs_prio->max_ft = max_ft; fs_prio->prio = prio; - fs_prio->start_level = start_level; list_add_tail(&fs_prio->node.list, &ns->node.children); return fs_prio; @@ -800,11 +1192,45 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ns; } -static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node, +static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node + *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int init_root_tree_recursive(struct mlx5_core_dev *dev, + struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, int index) { + int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + max_ft_level); struct mlx5_flow_namespace *fs_ns; struct fs_prio *fs_prio; struct fs_node *base; @@ -812,12 +1238,14 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini int err; if (init_node->type == FS_TYPE_PRIO) { - if (init_node->min_ft_level > max_ft_level) - return -ENOTSUPP; + if ((init_node->min_ft_level > max_ft_level) || + !has_required_caps(dev, &init_node->caps)) + return 0; fs_get_obj(fs_ns, fs_parent_node); - fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft, - init_node->start_level); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, init_node); + fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); base = &fs_prio->node; @@ -831,9 +1259,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini return -EINVAL; } for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(max_ft_level, - &init_node->children[i], base, - init_node, i); + err = init_root_tree_recursive(dev, &init_node->children[i], + base, init_node, i); if (err) return err; } @@ -841,7 +1268,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini return 0; } -static int init_root_tree(int max_ft_level, struct init_tree_node *init_node, +static int init_root_tree(struct mlx5_core_dev *dev, + struct init_tree_node *init_node, struct fs_node *fs_parent_node) { int i; @@ -850,8 +1278,7 @@ static int init_root_tree(int max_ft_level, struct init_tree_node *init_node, fs_get_obj(fs_ns, fs_parent_node); for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(max_ft_level, - &init_node->children[i], + err = init_root_tree_recursive(dev, &init_node->children[i], &fs_ns->node, init_node, i); if (err) @@ -877,25 +1304,65 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev ns = &root_ns->ns; fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); tree_init_node(&ns->node, 1, NULL); tree_add_node(&ns->node, NULL); return root_ns; } +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level); + +static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) +{ + struct fs_prio *prio; + + fs_for_each_prio(prio, ns) { + /* This updates prio start_level and max_ft */ + set_prio_attrs_in_prio(prio, acc_level); + acc_level += prio->max_ft; + } + return acc_level; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) +{ + struct mlx5_flow_namespace *ns; + int acc_level_ns = acc_level; + + prio->start_level = acc_level; + fs_for_each_ns(ns, prio) + /* This updates start_level and max_ft of ns's priority descendants */ + acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + if (!prio->max_ft) + prio->max_ft = acc_level_ns - prio->start_level; + WARN_ON(prio->max_ft < acc_level_ns - prio->start_level); +} + +static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) +{ + struct mlx5_flow_namespace *ns = &root_ns->ns; + struct fs_prio *prio; + int start_level = 0; + + fs_for_each_prio(prio, ns) { + set_prio_attrs_in_prio(prio, start_level); + start_level += prio->max_ft; + } +} + static int init_root_ns(struct mlx5_core_dev *dev) { - int max_ft_level = MLX5_CAP_FLOWTABLE(dev, - flow_table_properties_nic_receive. - max_ft_level); dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); if (IS_ERR_OR_NULL(dev->priv.root_ns)) goto cleanup; - if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node)) + if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node)) goto cleanup; + set_prio_attrs(dev->priv.root_ns); + return 0; cleanup: @@ -1019,7 +1486,7 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev) return -ENOMEM; /* Create single prio */ - prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0); + prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1); if (IS_ERR(prio)) { cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); return PTR_ERR(prio); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 4ebb97fd5544..00245fd7e4bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -77,6 +77,11 @@ struct mlx5_flow_table { unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; + struct { + bool active; + unsigned int required_groups; + unsigned int num_groups; + } autogroup; }; /* Type of children is mlx5_flow_rule */ @@ -124,6 +129,9 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; }; int mlx5_init_fs(struct mlx5_core_dev *dev); @@ -143,6 +151,12 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_prio(pos, ns) \ fs_list_for_each_entry(pos, &(ns)->node.children) +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + #define fs_for_each_fg(pos, ft) \ fs_list_for_each_entry(pos, &(ft)->node.children) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index df2f79ef3cac..7be845e30689 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } +#define MLX5_BY_PASS_NUM_PRIOS 9 + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index bc7ad019afde..8230caa3fb6e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,8 +38,20 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +#define LEFTOVERS_RULE_NUM 2 +static inline void build_leftovers_ft_param(int *priority, + int *n_ent, + int *n_grp) +{ + *priority = 0; /* Priority of leftovers_prio-0 */ + *n_ent = LEFTOVERS_RULE_NUM; + *n_grp = LEFTOVERS_RULE_NUM; +} + enum mlx5_flow_namespace_type { + MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_KERNEL, + MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_FDB, }; @@ -61,6 +73,12 @@ struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); +struct mlx5_flow_table * +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups); + struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1780a85a8797..68d73f82e009 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -185,6 +185,7 @@ enum { MLX5_CMD_OP_MODIFY_RQT = 0x917, MLX5_CMD_OP_DESTROY_RQT = 0x918, MLX5_CMD_OP_QUERY_RQT = 0x919, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f, MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, @@ -193,7 +194,8 @@ enum { MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, - MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938 + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938, + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c }; struct mlx5_ifc_flow_table_fields_supported_bits { @@ -258,7 +260,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; u8 reserved_0[0x2]; u8 flow_modify_en[0x1]; - u8 reserved_1[0x1c]; + u8 modify_root[0x1]; + u8 identified_miss_table_mode[0x1]; + u8 flow_table_modify[0x1]; + u8 reserved_1[0x19]; u8 reserved_2[0x2]; u8 log_max_ft_size[0x6]; @@ -293,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 reserved_1[0x1a]; }; +struct mlx5_ifc_ipv4_layout_bits { + u8 reserved_0[0x60]; + + u8 ipv4[0x20]; +}; + +struct mlx5_ifc_ipv6_layout_bits { + u8 ipv6[16][0x8]; +}; + +union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_layout_bits ipv6_layout; + struct mlx5_ifc_ipv4_layout_bits ipv4_layout; + u8 reserved_0[0x80]; +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -323,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 udp_sport[0x10]; u8 udp_dport[0x10]; - u8 src_ip[4][0x20]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - u8 dst_ip[4][0x20]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; }; struct mlx5_ifc_fte_match_set_misc_bits { @@ -5667,12 +5688,16 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_4[0x20]; - u8 reserved_5[0x8]; + u8 reserved_5[0x4]; + u8 table_miss_mode[0x4]; u8 level[0x8]; u8 reserved_6[0x8]; u8 log_size[0x8]; - u8 reserved_7[0x120]; + u8 reserved_7[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_8[0x100]; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -6946,4 +6971,72 @@ union mlx5_ifc_uplink_pci_interface_document_bits { u8 reserved_0[0x20060]; }; +struct mlx5_ifc_set_flow_table_root_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_flow_table_root_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +enum { + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1, +}; + +struct mlx5_ifc_modify_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 modify_field_select[0x10]; + + u8 table_type[0x8]; + u8 reserved_4[0x18]; + + u8 reserved_5[0x8]; + u8 table_id[0x18]; + + u8 reserved_6[0x4]; + u8 table_miss_mode[0x4]; + u8 reserved_7[0x18]; + + u8 reserved_8[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_9[0x100]; +}; + #endif /* MLX5_IFC_H */