diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7e97cb55a6bf..b0ec175cc6ba 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -43,6 +43,9 @@
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/fs.h>
 #include "user.h"
 #include "mlx5_ib.h"
 
@@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
 	return 0;
 }
 
+static bool outer_header_zero(u32 *match_criteria)
+{
+	int size = MLX5_ST_SZ_BYTES(fte_match_param);
+	char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+					     outer_headers);
+
+	return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+						  outer_headers_c + 1,
+						  size - 1);
+}
+
+static int parse_flow_attr(u32 *match_c, u32 *match_v,
+			   union ib_flow_spec *ib_spec)
+{
+	void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+	switch (ib_spec->type) {
+	case IB_FLOW_SPEC_ETH:
+		if (ib_spec->size != sizeof(ib_spec->eth))
+			return -EINVAL;
+
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+					     dmac_47_16),
+				ib_spec->eth.mask.dst_mac);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+					     dmac_47_16),
+				ib_spec->eth.val.dst_mac);
+
+		if (ib_spec->eth.mask.vlan_tag) {
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+				 vlan_tag, 1);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+				 vlan_tag, 1);
+
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+				 first_cfi,
+				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+				 first_cfi,
+				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+				 first_prio,
+				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+				 first_prio,
+				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+		}
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 ethertype, ntohs(ib_spec->eth.val.ether_type));
+		break;
+	case IB_FLOW_SPEC_IPV4:
+		if (ib_spec->size != sizeof(ib_spec->ipv4))
+			return -EINVAL;
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			 ethertype, 0xffff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			 ethertype, ETH_P_IP);
+
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &ib_spec->ipv4.mask.src_ip,
+		       sizeof(ib_spec->ipv4.mask.src_ip));
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &ib_spec->ipv4.val.src_ip,
+		       sizeof(ib_spec->ipv4.val.src_ip));
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &ib_spec->ipv4.mask.dst_ip,
+		       sizeof(ib_spec->ipv4.mask.dst_ip));
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &ib_spec->ipv4.val.dst_ip,
+		       sizeof(ib_spec->ipv4.val.dst_ip));
+		break;
+	case IB_FLOW_SPEC_TCP:
+		if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+			return -EINVAL;
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_TCP);
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+			 ntohs(ib_spec->tcp_udp.mask.src_port));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+			 ntohs(ib_spec->tcp_udp.val.src_port));
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+			 ntohs(ib_spec->tcp_udp.mask.dst_port));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+			 ntohs(ib_spec->tcp_udp.val.dst_port));
+		break;
+	case IB_FLOW_SPEC_UDP:
+		if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+			return -EINVAL;
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+			 0xff);
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+			 IPPROTO_UDP);
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+			 ntohs(ib_spec->tcp_udp.mask.src_port));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+			 ntohs(ib_spec->tcp_udp.val.src_port));
+
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+			 ntohs(ib_spec->tcp_udp.mask.dst_port));
+		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+			 ntohs(ib_spec->tcp_udp.val.dst_port));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+{
+	struct ib_flow_spec_eth *eth_spec;
+
+	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+	    ib_attr->size < sizeof(struct ib_flow_attr) +
+	    sizeof(struct ib_flow_spec_eth) ||
+	    ib_attr->num_of_specs < 1)
+		return false;
+
+	eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
+	if (eth_spec->type != IB_FLOW_SPEC_ETH ||
+	    eth_spec->size != sizeof(*eth_spec))
+		return false;
+
+	return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+	       is_multicast_ether_addr(eth_spec->val.dst_mac);
+}
+
+static bool is_valid_attr(struct ib_flow_attr *flow_attr)
+{
+	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+	bool has_ipv4_spec = false;
+	bool eth_type_ipv4 = true;
+	unsigned int spec_index;
+
+	/* Validate that ethertype is correct */
+	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+		if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+		    ib_spec->eth.mask.ether_type) {
+			if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
+			      ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
+				eth_type_ipv4 = false;
+		} else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
+			has_ipv4_spec = true;
+		}
+		ib_spec = (void *)ib_spec + ib_spec->size;
+	}
+	return !has_ipv4_spec || eth_type_ipv4;
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+			   struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+	prio->refcount -= !!ft_added;
+	if (!prio->refcount) {
+		mlx5_destroy_flow_table(prio->flow_table);
+		prio->flow_table = NULL;
+	}
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+	struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
+	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+							  struct mlx5_ib_flow_handler,
+							  ibflow);
+	struct mlx5_ib_flow_handler *iter, *tmp;
+
+	mutex_lock(&dev->flow_db.lock);
+
+	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+		mlx5_del_flow_rule(iter->rule);
+		list_del(&iter->list);
+		kfree(iter);
+	}
+
+	mlx5_del_flow_rule(handler->rule);
+	put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
+	mutex_unlock(&dev->flow_db.lock);
+
+	kfree(handler);
+
+	return 0;
+}
+
+#define MLX5_FS_MAX_TYPES	 10
+#define MLX5_FS_MAX_ENTRIES	 32000UL
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+						struct ib_flow_attr *flow_attr)
+{
+	struct mlx5_flow_namespace *ns = NULL;
+	struct mlx5_ib_flow_prio *prio;
+	struct mlx5_flow_table *ft;
+	int num_entries;
+	int num_groups;
+	int priority;
+	int err = 0;
+
+	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+		if (flow_is_multicast_only(flow_attr))
+			priority = MLX5_IB_FLOW_MCAST_PRIO;
+		else
+			priority = flow_attr->priority;
+		ns = mlx5_get_flow_namespace(dev->mdev,
+					     MLX5_FLOW_NAMESPACE_BYPASS);
+		num_entries = MLX5_FS_MAX_ENTRIES;
+		num_groups = MLX5_FS_MAX_TYPES;
+		prio = &dev->flow_db.prios[priority];
+	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+		ns = mlx5_get_flow_namespace(dev->mdev,
+					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
+		build_leftovers_ft_param(&priority,
+					 &num_entries,
+					 &num_groups);
+		prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+	}
+
+	if (!ns)
+		return ERR_PTR(-ENOTSUPP);
+
+	ft = prio->flow_table;
+	if (!ft) {
+		ft = mlx5_create_auto_grouped_flow_table(ns, priority,
+							 num_entries,
+							 num_groups);
+
+		if (!IS_ERR(ft)) {
+			prio->refcount = 0;
+			prio->flow_table = ft;
+		} else {
+			err = PTR_ERR(ft);
+		}
+	}
+
+	return err ? ERR_PTR(err) : prio;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+						     struct mlx5_ib_flow_prio *ft_prio,
+						     struct ib_flow_attr *flow_attr,
+						     struct mlx5_flow_destination *dst)
+{
+	struct mlx5_flow_table	*ft = ft_prio->flow_table;
+	struct mlx5_ib_flow_handler *handler;
+	void *ib_flow = flow_attr + 1;
+	u8 match_criteria_enable = 0;
+	unsigned int spec_index;
+	u32 *match_c;
+	u32 *match_v;
+	int err = 0;
+
+	if (!is_valid_attr(flow_attr))
+		return ERR_PTR(-EINVAL);
+
+	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+	if (!handler || !match_c || !match_v) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	INIT_LIST_HEAD(&handler->list);
+
+	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+		err = parse_flow_attr(match_c, match_v, ib_flow);
+		if (err < 0)
+			goto free;
+
+		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+	}
+
+	/* Outer header support only */
+	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
+					   match_c, match_v,
+					   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					   MLX5_FS_DEFAULT_FLOW_TAG,
+					   dst);
+
+	if (IS_ERR(handler->rule)) {
+		err = PTR_ERR(handler->rule);
+		goto free;
+	}
+
+	handler->prio = ft_prio - dev->flow_db.prios;
+
+	ft_prio->flow_table = ft;
+free:
+	if (err)
+		kfree(handler);
+	kfree(match_c);
+	kfree(match_v);
+	return err ? ERR_PTR(err) : handler;
+}
+
+enum {
+	LEFTOVERS_MC,
+	LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+							  struct mlx5_ib_flow_prio *ft_prio,
+							  struct ib_flow_attr *flow_attr,
+							  struct mlx5_flow_destination *dst)
+{
+	struct mlx5_ib_flow_handler *handler_ucast = NULL;
+	struct mlx5_ib_flow_handler *handler = NULL;
+
+	static struct {
+		struct ib_flow_attr	flow_attr;
+		struct ib_flow_spec_eth eth_flow;
+	} leftovers_specs[] = {
+		[LEFTOVERS_MC] = {
+			.flow_attr = {
+				.num_of_specs = 1,
+				.size = sizeof(leftovers_specs[0])
+			},
+			.eth_flow = {
+				.type = IB_FLOW_SPEC_ETH,
+				.size = sizeof(struct ib_flow_spec_eth),
+				.mask = {.dst_mac = {0x1} },
+				.val =  {.dst_mac = {0x1} }
+			}
+		},
+		[LEFTOVERS_UC] = {
+			.flow_attr = {
+				.num_of_specs = 1,
+				.size = sizeof(leftovers_specs[0])
+			},
+			.eth_flow = {
+				.type = IB_FLOW_SPEC_ETH,
+				.size = sizeof(struct ib_flow_spec_eth),
+				.mask = {.dst_mac = {0x1} },
+				.val = {.dst_mac = {} }
+			}
+		}
+	};
+
+	handler = create_flow_rule(dev, ft_prio,
+				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
+				   dst);
+	if (!IS_ERR(handler) &&
+	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+		handler_ucast = create_flow_rule(dev, ft_prio,
+						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
+						 dst);
+		if (IS_ERR(handler_ucast)) {
+			kfree(handler);
+			handler = handler_ucast;
+		} else {
+			list_add(&handler_ucast->list, &handler->list);
+		}
+	}
+
+	return handler;
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+					   struct ib_flow_attr *flow_attr,
+					   int domain)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_ib_flow_handler *handler = NULL;
+	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_ib_flow_prio *ft_prio;
+	int err;
+
+	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
+		return ERR_PTR(-ENOSPC);
+
+	if (domain != IB_FLOW_DOMAIN_USER ||
+	    flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
+	    flow_attr->flags)
+		return ERR_PTR(-EINVAL);
+
+	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+	if (!dst)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&dev->flow_db.lock);
+
+	ft_prio = get_flow_table(dev, flow_attr);
+	if (IS_ERR(ft_prio)) {
+		err = PTR_ERR(ft_prio);
+		goto unlock;
+	}
+
+	dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+
+	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+		handler = create_flow_rule(dev, ft_prio, flow_attr,
+					   dst);
+	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+		handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+						dst);
+	} else {
+		err = -EINVAL;
+		goto destroy_ft;
+	}
+
+	if (IS_ERR(handler)) {
+		err = PTR_ERR(handler);
+		handler = NULL;
+		goto destroy_ft;
+	}
+
+	ft_prio->refcount++;
+	mutex_unlock(&dev->flow_db.lock);
+	kfree(dst);
+
+	return &handler->ibflow;
+
+destroy_ft:
+	put_flow_table(dev, ft_prio, false);
+unlock:
+	mutex_unlock(&dev->flow_db.lock);
+	kfree(dst);
+	kfree(handler);
+	return ERR_PTR(err);
+}
+
 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
 	}
 
+	if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+	    IB_LINK_LAYER_ETHERNET) {
+		dev->ib_dev.create_flow	= mlx5_ib_create_flow;
+		dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+		dev->ib_dev.uverbs_ex_cmd_mask |=
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+	}
 	err = init_node_data(dev);
 	if (err)
 		goto err_dealloc;
 
+	mutex_init(&dev->flow_db.lock);
 	mutex_init(&dev->cap_mask_mutex);
 
 	err = create_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 633347260b79..1474cccd1e0f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -105,6 +105,36 @@ struct mlx5_ib_pd {
 	u32			pdn;
 };
 
+#define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
+#define MLX5_IB_FLOW_LAST_PRIO		(MLX5_IB_FLOW_MCAST_PRIO - 1)
+#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
+#error "Invalid number of bypass priorities"
+#endif
+#define MLX5_IB_FLOW_LEFTOVERS_PRIO	(MLX5_IB_FLOW_MCAST_PRIO + 1)
+
+#define MLX5_IB_NUM_FLOW_FT		(MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
+struct mlx5_ib_flow_prio {
+	struct mlx5_flow_table		*flow_table;
+	unsigned int			refcount;
+};
+
+struct mlx5_ib_flow_handler {
+	struct list_head		list;
+	struct ib_flow			ibflow;
+	unsigned int			prio;
+	struct mlx5_flow_rule	*rule;
+};
+
+struct mlx5_ib_flow_db {
+	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
+	/* Protect flow steering bypass flow tables
+	 * when add/del flow rules.
+	 * only single add/removal of flow steering rule could be done
+	 * simultaneously.
+	 */
+	struct mutex			lock;
+};
+
 /* Use macros here so that don't have to duplicate
  * enum ib_send_flags and enum ib_qp_type for low-level driver
  */
@@ -171,9 +201,21 @@ struct mlx5_ib_pfault {
 	struct mlx5_pagefault	mpfault;
 };
 
+struct mlx5_ib_rq {
+	u32			tirn;
+};
+
+struct mlx5_ib_raw_packet_qp {
+	struct mlx5_ib_rq rq;
+};
+
 struct mlx5_ib_qp {
 	struct ib_qp		ibqp;
-	struct mlx5_core_qp	mqp;
+	union {
+		struct mlx5_core_qp		mqp;
+		struct mlx5_ib_raw_packet_qp	raw_packet_qp;
+	};
+
 	struct mlx5_buf		buf;
 
 	struct mlx5_db		db;
@@ -431,6 +473,7 @@ struct mlx5_ib_dev {
 	 */
 	struct srcu_struct      mr_srcu;
 #endif
+	struct mlx5_ib_flow_db	flow_db;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 5096f4f336bd..a9894d2e8e26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -38,9 +38,28 @@
 #include "fs_cmd.h"
 #include "mlx5_core.h"
 
+int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+			    struct mlx5_flow_table *ft)
+{
+	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
+	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(set_flow_table_root_in, in, opcode,
+		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+	MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
+	MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+
+	memset(out, 0, sizeof(out));
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					  sizeof(out));
+}
+
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, unsigned int *table_id)
+			       unsigned int log_size, struct mlx5_flow_table
+			       *next_ft, unsigned int *table_id)
 {
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
@@ -51,6 +70,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	MLX5_SET(create_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
+	if (next_ft) {
+		MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+		MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+	}
 	MLX5_SET(create_flow_table_in, in, table_type, type);
 	MLX5_SET(create_flow_table_in, in, level, level);
 	MLX5_SET(create_flow_table_in, in, log_size, log_size);
@@ -83,6 +106,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 					  sizeof(out));
 }
 
+int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct mlx5_flow_table *next_ft)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
+	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(modify_flow_table_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+	MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
+	MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
+	MLX5_SET(modify_flow_table_in, in, modify_field_select,
+		 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+	if (next_ft) {
+		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+		MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+	} else {
+		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+	}
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					  sizeof(out));
+}
+
 int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
 			       struct mlx5_flow_table *ft,
 			       u32 *in,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index f39304ede186..9814d4784803 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -35,11 +35,16 @@
 
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, unsigned int *table_id);
+			       unsigned int log_size, struct mlx5_flow_table
+			       *next_ft, unsigned int *table_id);
 
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 				struct mlx5_flow_table *ft);
 
+int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct mlx5_flow_table *next_ft);
+
 int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
 			       struct mlx5_flow_table *ft,
 			       u32 *in, unsigned int *group_id);
@@ -62,4 +67,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 			struct mlx5_flow_table *ft,
 			unsigned int index);
 
+int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+			    struct mlx5_flow_table *ft);
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f7d62fe595f6..6f68dba8d7ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,51 +40,83 @@
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
 
-#define INIT_PRIO(min_level_val, max_ft_val,\
-		  start_level_val, ...) {.type = FS_TYPE_PRIO,\
+#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\
+		 ...) {.type = FS_TYPE_PRIO,\
 	.min_ft_level = min_level_val,\
-	.start_level = start_level_val,\
 	.max_ft = max_ft_val,\
+	.num_leaf_prios = num_prios_val,\
+	.caps = caps_val,\
 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
 }
 
-#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\
-	INIT_PRIO(min_level_val, max_ft_val, start_level_val,\
-		  __VA_ARGS__)\
-
-#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\
-	INIT_PRIO(0, max_ft_val, start_level_val,\
-		  __VA_ARGS__)\
+#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\
+	ADD_PRIO(num_prios_val, 0, max_ft_val, {},\
+		 __VA_ARGS__)\
 
 #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
 }
 
-#define KERNEL_START_LEVEL 0
-#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL
+#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
+				   sizeof(long))
+
+#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
+
+#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
+			       .caps = (long[]) {__VA_ARGS__} }
+
+#define LEFTOVERS_MAX_FT 1
+#define LEFTOVERS_NUM_PRIOS 1
+#define BY_PASS_PRIO_MAX_FT 1
+#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+			   LEFTOVERS_MAX_FT)
+
 #define KERNEL_MAX_FT 2
+#define KERNEL_NUM_PRIOS 1
 #define KENREL_MIN_LEVEL 2
+
+struct node_caps {
+	size_t	arr_sz;
+	long	*caps;
+};
 static struct init_tree_node {
 	enum fs_node_type	type;
 	struct init_tree_node *children;
 	int ar_size;
+	struct node_caps caps;
 	int min_ft_level;
+	int num_leaf_prios;
 	int prio;
 	int max_ft;
-	int start_level;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 1,
+	.ar_size = 3,
 	.children = (struct init_tree_node[]) {
-		ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT,
-			 KERNEL_START_LEVEL,
-			 ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT,
-					    KERNEL_P0_START_LEVEL))),
+		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
+					  FS_CAP(flow_table_properties_nic_receive.modify_root),
+					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
+					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))),
+		ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {},
+			 ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))),
+		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
+					  FS_CAP(flow_table_properties_nic_receive.modify_root),
+					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
+					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
+			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
 	}
 };
 
+enum fs_i_mutex_lock_class {
+	FS_MUTEX_GRANDPARENT,
+	FS_MUTEX_PARENT,
+	FS_MUTEX_CHILD
+};
+
 static void del_rule(struct fs_node *node);
 static void del_flow_table(struct fs_node *node);
 static void del_flow_group(struct fs_node *node);
@@ -119,10 +151,11 @@ static void tree_get_node(struct fs_node *node)
 	atomic_inc(&node->refcount);
 }
 
-static void nested_lock_ref_node(struct fs_node *node)
+static void nested_lock_ref_node(struct fs_node *node,
+				 enum fs_i_mutex_lock_class class)
 {
 	if (node) {
-		mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING);
+		mutex_lock_nested(&node->lock, class);
 		atomic_inc(&node->refcount);
 	}
 }
@@ -436,10 +469,162 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
 	return ft;
 }
 
+/* If reverse is false, then we search for the first flow table in the
+ * root sub-tree from start(closest from right), else we search for the
+ * last flow table in the root sub-tree till start(closest from left).
+ */
+static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
+							 struct list_head *start,
+							 bool reverse)
+{
+#define list_advance_entry(pos, reverse)		\
+	((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
+
+#define list_for_each_advance_continue(pos, head, reverse)	\
+	for (pos = list_advance_entry(pos, reverse);		\
+	     &pos->list != (head);				\
+	     pos = list_advance_entry(pos, reverse))
+
+	struct fs_node *iter = list_entry(start, struct fs_node, list);
+	struct mlx5_flow_table *ft = NULL;
+
+	if (!root)
+		return NULL;
+
+	list_for_each_advance_continue(iter, &root->children, reverse) {
+		if (iter->type == FS_TYPE_FLOW_TABLE) {
+			fs_get_obj(ft, iter);
+			return ft;
+		}
+		ft = find_closest_ft_recursive(iter, &iter->children, reverse);
+		if (ft)
+			return ft;
+	}
+
+	return ft;
+}
+
+/* If reverse if false then return the first flow table in next priority of
+ * prio in the tree, else return the last flow table in the previous priority
+ * of prio in the tree.
+ */
+static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+{
+	struct mlx5_flow_table *ft = NULL;
+	struct fs_node *curr_node;
+	struct fs_node *parent;
+
+	parent = prio->node.parent;
+	curr_node = &prio->node;
+	while (!ft && parent) {
+		ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+		curr_node = parent;
+		parent = curr_node->parent;
+	}
+	return ft;
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+{
+	return find_closest_ft(prio, false);
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+{
+	return find_closest_ft(prio, true);
+}
+
+static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+			       struct fs_prio *prio,
+			       struct mlx5_flow_table *ft)
+{
+	struct mlx5_flow_table *iter;
+	int i = 0;
+	int err;
+
+	fs_for_each_ft(iter, prio) {
+		i++;
+		err = mlx5_cmd_modify_flow_table(dev,
+						 iter,
+						 ft);
+		if (err) {
+			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
+				       iter->id);
+			/* The driver is out of sync with the FW */
+			if (i > 1)
+				WARN_ON(true);
+			return err;
+		}
+	}
+	return 0;
+}
+
+/* Connect flow tables from previous priority of prio to ft */
+static int connect_prev_fts(struct mlx5_core_dev *dev,
+			    struct mlx5_flow_table *ft,
+			    struct fs_prio *prio)
+{
+	struct mlx5_flow_table *prev_ft;
+
+	prev_ft = find_prev_chained_ft(prio);
+	if (prev_ft) {
+		struct fs_prio *prev_prio;
+
+		fs_get_obj(prev_prio, prev_ft->node.parent);
+		return connect_fts_in_prio(dev, prev_prio, ft);
+	}
+	return 0;
+}
+
+static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+				 *prio)
+{
+	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+	int min_level = INT_MAX;
+	int err;
+
+	if (root->root_ft)
+		min_level = root->root_ft->level;
+
+	if (ft->level >= min_level)
+		return 0;
+
+	err = mlx5_cmd_update_root_ft(root->dev, ft);
+	if (err)
+		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
+			       ft->id);
+	else
+		root->root_ft = ft;
+
+	return err;
+}
+
+static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+			      struct fs_prio *prio)
+{
+	int err = 0;
+
+	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
+
+	if (list_empty(&prio->node.children)) {
+		err = connect_prev_fts(dev, ft, prio);
+		if (err)
+			return err;
+	}
+
+	if (MLX5_CAP_FLOWTABLE(dev,
+			       flow_table_properties_nic_receive.modify_root))
+		err = update_root_ft_create(ft, prio);
+	return err;
+}
+
 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 					       int prio,
 					       int max_fte)
 {
+	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_table *ft;
 	int err;
 	int log_table_sz;
@@ -452,14 +637,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 		return ERR_PTR(-ENODEV);
 	}
 
+	mutex_lock(&root->chain_lock);
 	fs_prio = find_prio(ns, prio);
-	if (!fs_prio)
-		return ERR_PTR(-EINVAL);
-
-	lock_ref_node(&fs_prio->node);
+	if (!fs_prio) {
+		err = -EINVAL;
+		goto unlock_root;
+	}
 	if (fs_prio->num_ft == fs_prio->max_ft) {
 		err = -ENOSPC;
-		goto unlock_prio;
+		goto unlock_root;
 	}
 
 	ft = alloc_flow_table(find_next_free_level(fs_prio),
@@ -467,32 +653,63 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 			      root->table_type);
 	if (!ft) {
 		err = -ENOMEM;
-		goto unlock_prio;
+		goto unlock_root;
 	}
 
 	tree_init_node(&ft->node, 1, del_flow_table);
 	log_table_sz = ilog2(ft->max_fte);
+	next_ft = find_next_chained_ft(fs_prio);
 	err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level,
-					 log_table_sz, &ft->id);
+					 log_table_sz, next_ft, &ft->id);
 	if (err)
 		goto free_ft;
 
+	err = connect_flow_table(root->dev, ft, fs_prio);
+	if (err)
+		goto destroy_ft;
+	lock_ref_node(&fs_prio->node);
 	tree_add_node(&ft->node, &fs_prio->node);
 	list_add_tail(&ft->node.list, &fs_prio->node.children);
 	fs_prio->num_ft++;
 	unlock_ref_node(&fs_prio->node);
-
+	mutex_unlock(&root->chain_lock);
 	return ft;
-
+destroy_ft:
+	mlx5_cmd_destroy_flow_table(root->dev, ft);
 free_ft:
 	kfree(ft);
-unlock_prio:
-	unlock_ref_node(&fs_prio->node);
+unlock_root:
+	mutex_unlock(&root->chain_lock);
 	return ERR_PTR(err);
 }
 
-struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
-					       u32 *fg_in)
+struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+							    int prio,
+							    int num_flow_table_entries,
+							    int max_num_groups)
+{
+	struct mlx5_flow_table *ft;
+
+	if (max_num_groups > num_flow_table_entries)
+		return ERR_PTR(-EINVAL);
+
+	ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries);
+	if (IS_ERR(ft))
+		return ft;
+
+	ft->autogroup.active = true;
+	ft->autogroup.required_groups = max_num_groups;
+
+	return ft;
+}
+EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
+
+/* Flow table should be locked */
+static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
+							u32 *fg_in,
+							struct list_head
+							*prev_fg,
+							bool is_auto_fg)
 {
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev = get_dev(&ft->node);
@@ -505,18 +722,33 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	if (IS_ERR(fg))
 		return fg;
 
-	lock_ref_node(&ft->node);
 	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
 	if (err) {
 		kfree(fg);
-		unlock_ref_node(&ft->node);
 		return ERR_PTR(err);
 	}
+
+	if (ft->autogroup.active)
+		ft->autogroup.num_groups++;
 	/* Add node to tree */
-	tree_init_node(&fg->node, 1, del_flow_group);
+	tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
 	tree_add_node(&fg->node, &ft->node);
 	/* Add node to group list */
 	list_add(&fg->node.list, ft->node.children.prev);
+
+	return fg;
+}
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+					       u32 *fg_in)
+{
+	struct mlx5_flow_group *fg;
+
+	if (ft->autogroup.active)
+		return ERR_PTR(-EPERM);
+
+	lock_ref_node(&ft->node);
+	fg = create_flow_group_common(ft, fg_in, &ft->node.children, false);
 	unlock_ref_node(&ft->node);
 
 	return fg;
@@ -614,7 +846,63 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
 	return fte;
 }
 
-/* Assuming parent fg(flow table) is locked */
+static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
+						u8 match_criteria_enable,
+						u32 *match_criteria)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct list_head *prev = &ft->node.children;
+	unsigned int candidate_index = 0;
+	struct mlx5_flow_group *fg;
+	void *match_criteria_addr;
+	unsigned int group_size = 0;
+	u32 *in;
+
+	if (!ft->autogroup.active)
+		return ERR_PTR(-ENOENT);
+
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
+		/* We save place for flow groups in addition to max types */
+		group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
+
+	/*  ft->max_fte == ft->autogroup.max_types */
+	if (group_size == 0)
+		group_size = 1;
+
+	/* sorted by start_index */
+	fs_for_each_fg(fg, ft) {
+		if (candidate_index + group_size > fg->start_index)
+			candidate_index = fg->start_index + fg->max_ftes;
+		else
+			break;
+		prev = &fg->node.list;
+	}
+
+	if (candidate_index + group_size > ft->max_fte) {
+		fg = ERR_PTR(-ENOSPC);
+		goto out;
+	}
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+		 match_criteria_enable);
+	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
+		 group_size - 1);
+	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
+					   in, match_criteria);
+	memcpy(match_criteria_addr, match_criteria,
+	       MLX5_ST_SZ_BYTES(fte_match_param));
+
+	fg = create_flow_group_common(ft, in, prev, true);
+out:
+	kvfree(in);
+	return fg;
+}
+
 static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 					  u32 *match_value,
 					  u8 action,
@@ -626,9 +914,9 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 	struct mlx5_flow_table *ft;
 	struct list_head *prev;
 
-	lock_ref_node(&fg->node);
+	nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
 	fs_for_each_fte(fte, fg) {
-		nested_lock_ref_node(&fte->node);
+		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
 		    action == fte->action && flow_tag == fte->flow_tag) {
 			rule = add_rule_fte(fte, fg, dest);
@@ -669,6 +957,33 @@ unlock_fg:
 	return rule;
 }
 
+static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
+						  u8 match_criteria_enable,
+						  u32 *match_criteria,
+						  u32 *match_value,
+						  u8 action,
+						  u32 flow_tag,
+						  struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_group *g;
+
+	g = create_autogroup(ft, match_criteria_enable, match_criteria);
+	if (IS_ERR(g))
+		return (void *)g;
+
+	rule = add_rule_fg(g, match_value,
+			   action, flow_tag, dest);
+	if (IS_ERR(rule)) {
+		/* Remove assumes refcount > 0 and autogroup creates a group
+		 * with a refcount = 0.
+		 */
+		tree_get_node(&g->node);
+		tree_remove_node(&g->node);
+	}
+	return rule;
+}
+
 struct mlx5_flow_rule *
 mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		   u8 match_criteria_enable,
@@ -679,39 +994,115 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		   struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_group *g;
-	struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL);
+	struct mlx5_flow_rule *rule;
 
-	tree_get_node(&ft->node);
-	lock_ref_node(&ft->node);
+	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
 	fs_for_each_fg(g, ft)
 		if (compare_match_criteria(g->mask.match_criteria_enable,
 					   match_criteria_enable,
 					   g->mask.match_criteria,
 					   match_criteria)) {
-			unlock_ref_node(&ft->node);
 			rule = add_rule_fg(g, match_value,
 					   action, flow_tag, dest);
-			goto put;
+			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
+				goto unlock;
 		}
+
+	rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria,
+				   match_value, action, flow_tag, dest);
+unlock:
 	unlock_ref_node(&ft->node);
-put:
-	tree_put_node(&ft->node);
 	return rule;
 }
+EXPORT_SYMBOL(mlx5_add_flow_rule);
 
 void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
 {
 	tree_remove_node(&rule->node);
 }
+EXPORT_SYMBOL(mlx5_del_flow_rule);
+
+/* Assuming prio->node.children(flow tables) is sorted by level */
+static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+{
+	struct fs_prio *prio;
+
+	fs_get_obj(prio, ft->node.parent);
+
+	if (!list_is_last(&ft->node.list, &prio->node.children))
+		return list_next_entry(ft, node.list);
+	return find_next_chained_ft(prio);
+}
+
+static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+{
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+	struct mlx5_flow_table *new_root_ft = NULL;
+
+	if (root->root_ft != ft)
+		return 0;
+
+	new_root_ft = find_next_ft(ft);
+	if (new_root_ft) {
+		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft);
+
+		if (err) {
+			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
+				       ft->id);
+			return err;
+		}
+		root->root_ft = new_root_ft;
+	}
+	return 0;
+}
+
+/* Connect flow table from previous priority to
+ * the next flow table.
+ */
+static int disconnect_flow_table(struct mlx5_flow_table *ft)
+{
+	struct mlx5_core_dev *dev = get_dev(&ft->node);
+	struct mlx5_flow_table *next_ft;
+	struct fs_prio *prio;
+	int err = 0;
+
+	err = update_root_ft_destroy(ft);
+	if (err)
+		return err;
+
+	fs_get_obj(prio, ft->node.parent);
+	if  (!(list_first_entry(&prio->node.children,
+				struct mlx5_flow_table,
+				node.list) == ft))
+		return 0;
+
+	next_ft = find_next_chained_ft(prio);
+	err = connect_prev_fts(dev, next_ft, prio);
+	if (err)
+		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
+			       ft->id);
+	return err;
+}
 
 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+	int err = 0;
+
+	mutex_lock(&root->chain_lock);
+	err = disconnect_flow_table(ft);
+	if (err) {
+		mutex_unlock(&root->chain_lock);
+		return err;
+	}
 	if (tree_remove_node(&ft->node))
 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
 			       ft->id);
+	mutex_unlock(&root->chain_lock);
 
-	return 0;
+	return err;
 }
+EXPORT_SYMBOL(mlx5_destroy_flow_table);
 
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 {
@@ -732,8 +1123,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		return NULL;
 
 	switch (type) {
+	case MLX5_FLOW_NAMESPACE_BYPASS:
 	case MLX5_FLOW_NAMESPACE_KERNEL:
-		prio = 0;
+	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+		prio = type;
 		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
 		if (dev->priv.fdb_root_ns)
@@ -754,10 +1147,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 
 	return ns;
 }
+EXPORT_SYMBOL(mlx5_get_flow_namespace);
 
 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
-				      unsigned prio, int max_ft,
-				      int start_level)
+				      unsigned prio, int max_ft)
 {
 	struct fs_prio *fs_prio;
 
@@ -770,7 +1163,6 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->max_ft = max_ft;
 	fs_prio->prio = prio;
-	fs_prio->start_level = start_level;
 	list_add_tail(&fs_prio->node.list, &ns->node.children);
 
 	return fs_prio;
@@ -800,11 +1192,45 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
 	return ns;
 }
 
-static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node,
+static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node
+			     *prio_metadata)
+{
+	struct fs_prio *fs_prio;
+	int i;
+
+	for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
+		fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft);
+		if (IS_ERR(fs_prio))
+			return PTR_ERR(fs_prio);
+	}
+	return 0;
+}
+
+#define FLOW_TABLE_BIT_SZ 1
+#define GET_FLOW_TABLE_CAP(dev, offset) \
+	((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) +	\
+			offset / 32)) >>					\
+	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
+static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
+{
+	int i;
+
+	for (i = 0; i < caps->arr_sz; i++) {
+		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
+			return false;
+	}
+	return true;
+}
+
+static int init_root_tree_recursive(struct mlx5_core_dev *dev,
+				    struct init_tree_node *init_node,
 				    struct fs_node *fs_parent_node,
 				    struct init_tree_node *init_parent_node,
 				    int index)
 {
+	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+					      flow_table_properties_nic_receive.
+					      max_ft_level);
 	struct mlx5_flow_namespace *fs_ns;
 	struct fs_prio *fs_prio;
 	struct fs_node *base;
@@ -812,12 +1238,14 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
 	int err;
 
 	if (init_node->type == FS_TYPE_PRIO) {
-		if (init_node->min_ft_level > max_ft_level)
-			return -ENOTSUPP;
+		if ((init_node->min_ft_level > max_ft_level) ||
+		    !has_required_caps(dev, &init_node->caps))
+			return 0;
 
 		fs_get_obj(fs_ns, fs_parent_node);
-		fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft,
-					 init_node->start_level);
+		if (init_node->num_leaf_prios)
+			return create_leaf_prios(fs_ns, init_node);
+		fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft);
 		if (IS_ERR(fs_prio))
 			return PTR_ERR(fs_prio);
 		base = &fs_prio->node;
@@ -831,9 +1259,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
 		return -EINVAL;
 	}
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(max_ft_level,
-					       &init_node->children[i], base,
-					       init_node, i);
+		err = init_root_tree_recursive(dev, &init_node->children[i],
+					       base, init_node, i);
 		if (err)
 			return err;
 	}
@@ -841,7 +1268,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
 	return 0;
 }
 
-static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
+static int init_root_tree(struct mlx5_core_dev *dev,
+			  struct init_tree_node *init_node,
 			  struct fs_node *fs_parent_node)
 {
 	int i;
@@ -850,8 +1278,7 @@ static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
 
 	fs_get_obj(fs_ns, fs_parent_node);
 	for (i = 0; i < init_node->ar_size; i++) {
-		err = init_root_tree_recursive(max_ft_level,
-					       &init_node->children[i],
+		err = init_root_tree_recursive(dev, &init_node->children[i],
 					       &fs_ns->node,
 					       init_node, i);
 		if (err)
@@ -877,25 +1304,65 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev
 
 	ns = &root_ns->ns;
 	fs_init_namespace(ns);
+	mutex_init(&root_ns->chain_lock);
 	tree_init_node(&ns->node, 1, NULL);
 	tree_add_node(&ns->node, NULL);
 
 	return root_ns;
 }
 
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
+
+static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
+{
+	struct fs_prio *prio;
+
+	fs_for_each_prio(prio, ns) {
+		 /* This updates prio start_level and max_ft */
+		set_prio_attrs_in_prio(prio, acc_level);
+		acc_level += prio->max_ft;
+	}
+	return acc_level;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
+{
+	struct mlx5_flow_namespace *ns;
+	int acc_level_ns = acc_level;
+
+	prio->start_level = acc_level;
+	fs_for_each_ns(ns, prio)
+		/* This updates start_level and max_ft of ns's priority descendants */
+		acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+	if (!prio->max_ft)
+		prio->max_ft = acc_level_ns - prio->start_level;
+	WARN_ON(prio->max_ft < acc_level_ns - prio->start_level);
+}
+
+static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
+{
+	struct mlx5_flow_namespace *ns = &root_ns->ns;
+	struct fs_prio *prio;
+	int start_level = 0;
+
+	fs_for_each_prio(prio, ns) {
+		set_prio_attrs_in_prio(prio, start_level);
+		start_level += prio->max_ft;
+	}
+}
+
 static int init_root_ns(struct mlx5_core_dev *dev)
 {
-	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
-					      flow_table_properties_nic_receive.
-					      max_ft_level);
 
 	dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
 	if (IS_ERR_OR_NULL(dev->priv.root_ns))
 		goto cleanup;
 
-	if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node))
+	if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node))
 		goto cleanup;
 
+	set_prio_attrs(dev->priv.root_ns);
+
 	return 0;
 
 cleanup:
@@ -1019,7 +1486,7 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev)
 		return -ENOMEM;
 
 	/* Create single prio */
-	prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0);
+	prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1);
 	if (IS_ERR(prio)) {
 		cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
 		return PTR_ERR(prio);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 4ebb97fd5544..00245fd7e4bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -77,6 +77,11 @@ struct mlx5_flow_table {
 	unsigned int			max_fte;
 	unsigned int			level;
 	enum fs_flow_table_type		type;
+	struct {
+		bool			active;
+		unsigned int		required_groups;
+		unsigned int		num_groups;
+	} autogroup;
 };
 
 /* Type of children is mlx5_flow_rule */
@@ -124,6 +129,9 @@ struct mlx5_flow_root_namespace {
 	struct mlx5_flow_namespace	ns;
 	enum   fs_flow_table_type	table_type;
 	struct mlx5_core_dev		*dev;
+	struct mlx5_flow_table		*root_ft;
+	/* Should be held when chaining flow tables */
+	struct mutex			chain_lock;
 };
 
 int mlx5_init_fs(struct mlx5_core_dev *dev);
@@ -143,6 +151,12 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 #define fs_for_each_prio(pos, ns)			\
 	fs_list_for_each_entry(pos, &(ns)->node.children)
 
+#define fs_for_each_ns(pos, prio)			\
+	fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft(pos, prio)			\
+	fs_list_for_each_entry(pos, &(prio)->node.children)
+
 #define fs_for_each_fg(pos, ft)			\
 	fs_list_for_each_entry(pos, &(ft)->node.children)
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index df2f79ef3cac..7be845e30689 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
 }
 
+#define MLX5_BY_PASS_NUM_PRIOS 9
+
 #endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index bc7ad019afde..8230caa3fb6e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,8 +38,20 @@
 
 #define MLX5_FS_DEFAULT_FLOW_TAG 0x0
 
+#define LEFTOVERS_RULE_NUM	 2
+static inline void build_leftovers_ft_param(int *priority,
+					    int *n_ent,
+					    int *n_grp)
+{
+	*priority = 0; /* Priority of leftovers_prio-0 */
+	*n_ent = LEFTOVERS_RULE_NUM;
+	*n_grp = LEFTOVERS_RULE_NUM;
+}
+
 enum mlx5_flow_namespace_type {
+	MLX5_FLOW_NAMESPACE_BYPASS,
 	MLX5_FLOW_NAMESPACE_KERNEL,
+	MLX5_FLOW_NAMESPACE_LEFTOVERS,
 	MLX5_FLOW_NAMESPACE_FDB,
 };
 
@@ -61,6 +73,12 @@ struct mlx5_flow_namespace *
 mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			enum mlx5_flow_namespace_type type);
 
+struct mlx5_flow_table *
+mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+				    int prio,
+				    int num_flow_table_entries,
+				    int max_num_groups);
+
 struct mlx5_flow_table *
 mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 		       int prio,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1780a85a8797..68d73f82e009 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -185,6 +185,7 @@ enum {
 	MLX5_CMD_OP_MODIFY_RQT                    = 0x917,
 	MLX5_CMD_OP_DESTROY_RQT                   = 0x918,
 	MLX5_CMD_OP_QUERY_RQT                     = 0x919,
+	MLX5_CMD_OP_SET_FLOW_TABLE_ROOT		  = 0x92f,
 	MLX5_CMD_OP_CREATE_FLOW_TABLE             = 0x930,
 	MLX5_CMD_OP_DESTROY_FLOW_TABLE            = 0x931,
 	MLX5_CMD_OP_QUERY_FLOW_TABLE              = 0x932,
@@ -193,7 +194,8 @@ enum {
 	MLX5_CMD_OP_QUERY_FLOW_GROUP              = 0x935,
 	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x936,
 	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x937,
-	MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY       = 0x938
+	MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY       = 0x938,
+	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c
 };
 
 struct mlx5_ifc_flow_table_fields_supported_bits {
@@ -258,7 +260,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
 	u8         reserved_0[0x2];
 	u8	   flow_modify_en[0x1];
-	u8         reserved_1[0x1c];
+	u8         modify_root[0x1];
+	u8         identified_miss_table_mode[0x1];
+	u8         flow_table_modify[0x1];
+	u8         reserved_1[0x19];
 
 	u8         reserved_2[0x2];
 	u8         log_max_ft_size[0x6];
@@ -293,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
 	u8         reserved_1[0x1a];
 };
 
+struct mlx5_ifc_ipv4_layout_bits {
+	u8         reserved_0[0x60];
+
+	u8         ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+	u8         ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+	struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+	struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+	u8         reserved_0[0x80];
+};
+
 struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 	u8         smac_47_16[0x20];
 
@@ -323,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 	u8         udp_sport[0x10];
 	u8         udp_dport[0x10];
 
-	u8         src_ip[4][0x20];
+	union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
 
-	u8         dst_ip[4][0x20];
+	union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
@@ -5667,12 +5688,16 @@ struct mlx5_ifc_create_flow_table_in_bits {
 
 	u8         reserved_4[0x20];
 
-	u8         reserved_5[0x8];
+	u8         reserved_5[0x4];
+	u8         table_miss_mode[0x4];
 	u8         level[0x8];
 	u8         reserved_6[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_7[0x120];
+	u8         reserved_7[0x8];
+	u8         table_miss_id[0x18];
+
+	u8         reserved_8[0x100];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
@@ -6946,4 +6971,72 @@ union mlx5_ifc_uplink_pci_interface_document_bits {
 	u8         reserved_0[0x20060];
 };
 
+struct mlx5_ifc_set_flow_table_root_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_flow_table_root_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	u8         table_type[0x8];
+	u8         reserved_3[0x18];
+
+	u8         reserved_4[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_5[0x140];
+};
+
+enum {
+	MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
+};
+
+struct mlx5_ifc_modify_flow_table_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_flow_table_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x20];
+
+	u8         reserved_3[0x10];
+	u8         modify_field_select[0x10];
+
+	u8         table_type[0x8];
+	u8         reserved_4[0x18];
+
+	u8         reserved_5[0x8];
+	u8         table_id[0x18];
+
+	u8         reserved_6[0x4];
+	u8         table_miss_mode[0x4];
+	u8         reserved_7[0x18];
+
+	u8         reserved_8[0x8];
+	u8         table_miss_id[0x18];
+
+	u8         reserved_9[0x100];
+};
+
 #endif /* MLX5_IFC_H */