netfilter pull request 24-11-15

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEjF9xRqF1emXiQiqU1w0aZmrPKyEFAmc3S9AACgkQ1w0aZmrP
 KyF7Sg/9GBfCiuuxUqrbigUitY8dJFuCTt+fKxMDfTb6sqU7FgQK/ylqwuW2zikz
 MgyVRXTAMbgD1KU5U+v1VEf5kq8iCU/rpdCC1xMOK9GvbaYQ9l/0cw8PR1jGgmSZ
 P1NWgmpv30IbZ/bQblU9/SbP8sFWg3DLC9lFrqYlLkJjijhfSDTflI6uVVWwt+rn
 9jWqgzf6mUYKAKJ56gFfUW/09jYPkQ5OLYz9CLqvIZLhdYNPGy2GEgldzXkHaVPv
 O65lMjrNojVYfITcinjkVfVVTlcLtQPNG9novclXrsf+qSsov5h/583n0c+7Xh3N
 r+EY1NBzZEcxLloTowJ/iq7xtDHHDG6Rv3BGTMS2JWFxhUDOV3Ks2qj/bIZUkzh5
 /Kl8n4NFbE+f1F3TGOoivZ0CFK1s3jcdIu3RTMXwwa41eiOAt8dPvhckfxTW20kT
 GdIYMNpUC1UVw2a1bxPEw27omB2UF2VADK5vHm97WJ8FBjA1HwPA9afF+PmyNMZ6
 cCOKT225DpXkt2WAMX+bgDyqQN150B05/JrBRdiT5hT5++xJn+heZLvx56L4mPA2
 8Y8NnnXLsyx5pwtE6HKgBOZNXXno2xpE/OrafF5n2zHwiMnF5qeF1+Jwerm8SxUa
 ZTuUS1mAi922IJzksnjRtiVggEA4X9Arq4NRwlIMWunTxybmHnE=
 =Tp49
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-24-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Extended netlink error reporting if nfnetlink attribute parser fails,
   from Donald Hunter.

2) Incorrect request_module() module, from Simon Horman.

3) A series of patches to reduce memory consumption for set element
   transactions.
   Florian Westphal says:

"When doing a flush on a set or mass adding/removing elements from a
set, each element needs to allocate 96 bytes to hold the transactional
state.

In such cases, virtually all the information in struct nft_trans_elem
is the same.

Change nft_trans_elem to a flex-array, i.e. a single nft_trans_elem
can hold multiple set element pointers.

The number of elements that can be stored in one nft_trans_elem is limited
by the slab allocator, this series limits the compaction to at most 62
elements as it caps the reallocation to 2048 bytes of memory."

4) A series of patches to prepare the transition to dscp_t in .flowi_tos.
   From Guillaume Nault.

5) Support for bitwise operations with two source registers,
   from Jeremy Sowden.

* tag 'nf-next-24-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: bitwise: add support for doing AND, OR and XOR directly
  netfilter: bitwise: rename some boolean operation functions
  netfilter: nf_dup4: Convert nf_dup_ipv4_route() to dscp_t.
  netfilter: nft_fib: Convert nft_fib4_eval() to dscp_t.
  netfilter: rpfilter: Convert rpfilter_mt() to dscp_t.
  netfilter: flow_offload: Convert nft_flow_route() to dscp_t.
  netfilter: ipv4: Convert ip_route_me_harder() to dscp_t.
  netfilter: nf_tables: allocate element update information dynamically
  netfilter: nf_tables: switch trans_elem to real flex array
  netfilter: nf_tables: prepare nft audit for set element compaction
  netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure
  netfilter: nf_tables: add nft_trans_commit_list_add_elem helper
  netfilter: bpf: Pass string literal as format argument of request_module()
  netfilter: nfnetlink: Report extack policy errors for batched ops
====================

Link: https://patch.msgid.link/20241115133207.8907-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-11-15 14:09:20 -08:00
commit 26a3beee24
11 changed files with 485 additions and 128 deletions

View File

@ -1759,28 +1759,29 @@ enum nft_trans_elem_flags {
NFT_TRANS_UPD_EXPIRATION = (1 << 1),
};
struct nft_elem_update {
u64 timeout;
u64 expiration;
u8 flags;
};
struct nft_trans_one_elem {
struct nft_elem_priv *priv;
struct nft_elem_update *update;
};
struct nft_trans_elem {
struct nft_trans nft_trans;
struct nft_set *set;
struct nft_elem_priv *elem_priv;
u64 timeout;
u64 expiration;
u8 update_flags;
bool bound;
unsigned int nelems;
struct nft_trans_one_elem elems[] __counted_by(nelems);
};
#define nft_trans_container_elem(t) \
container_of(t, struct nft_trans_elem, nft_trans)
#define nft_trans_elem_set(trans) \
nft_trans_container_elem(trans)->set
#define nft_trans_elem_priv(trans) \
nft_trans_container_elem(trans)->elem_priv
#define nft_trans_elem_update_flags(trans) \
nft_trans_container_elem(trans)->update_flags
#define nft_trans_elem_timeout(trans) \
nft_trans_container_elem(trans)->timeout
#define nft_trans_elem_expiration(trans) \
nft_trans_container_elem(trans)->expiration
#define nft_trans_elem_set_bound(trans) \
nft_trans_container_elem(trans)->bound

View File

@ -564,16 +564,26 @@ enum nft_immediate_attributes {
/**
* enum nft_bitwise_ops - nf_tables bitwise operations
*
* @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and
* XOR boolean operations
* @NFT_BITWISE_MASK_XOR: mask-and-xor operation used to implement NOT, AND, OR
* and XOR boolean operations
* @NFT_BITWISE_LSHIFT: left-shift operation
* @NFT_BITWISE_RSHIFT: right-shift operation
* @NFT_BITWISE_AND: and operation
* @NFT_BITWISE_OR: or operation
* @NFT_BITWISE_XOR: xor operation
*/
enum nft_bitwise_ops {
NFT_BITWISE_BOOL,
NFT_BITWISE_MASK_XOR,
NFT_BITWISE_LSHIFT,
NFT_BITWISE_RSHIFT,
NFT_BITWISE_AND,
NFT_BITWISE_OR,
NFT_BITWISE_XOR,
};
/*
* Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility.
*/
#define NFT_BITWISE_BOOL NFT_BITWISE_MASK_XOR
/**
* enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
@ -586,6 +596,7 @@ enum nft_bitwise_ops {
* @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops)
* @NFTA_BITWISE_DATA: argument for non-boolean operations
* (NLA_NESTED: nft_data_attributes)
* @NFTA_BITWISE_SREG2: second source register (NLA_U32: nft_registers)
*
* The bitwise expression supports boolean and shift operations. It implements
* the boolean operations by performing the following operation:
@ -609,6 +620,7 @@ enum nft_bitwise_attributes {
NFTA_BITWISE_XOR,
NFTA_BITWISE_OP,
NFTA_BITWISE_DATA,
NFTA_BITWISE_SREG2,
__NFTA_BITWISE_MAX
};
#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1)

View File

@ -44,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;

View File

@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = iph->tos & INET_DSCP_MASK;
flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);

View File

@ -33,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);

View File

@ -11,6 +11,7 @@
#include <net/netfilter/nft_fib.h>
#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
@ -107,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;

View File

@ -43,7 +43,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
err = request_module(mod);
err = request_module("%s", mod);
if (err)
return ERR_PTR(err < 0 ? err : -EINVAL);

View File

@ -26,6 +26,9 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
#define NFT_SET_MAX_ANONLEN 16
/* limit compaction to avoid huge kmalloc/krealloc sizes. */
#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem))
unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
@ -391,6 +394,86 @@ static void nf_tables_unregister_hook(struct net *net,
return __nf_tables_unregister_hook(net, table, chain, false);
}
static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b)
{
/* NB: the ->bound equality check is defensive, at this time we only merge
* a new nft_trans_elem transaction request with the transaction tail
* element, but a->bound != b->bound would imply a NEWRULE transaction
* is queued in-between.
*
* The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents
* huge krealloc() requests.
*/
return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS;
}
static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
struct nft_trans_elem *tail,
struct nft_trans_elem *trans,
gfp_t gfp)
{
unsigned int nelems, old_nelems = tail->nelems;
struct nft_trans_elem *new_trans;
if (!nft_trans_collapse_set_elem_allowed(tail, trans))
return false;
/* "cannot happen", at this time userspace element add
* requests always allocate a new transaction element.
*
* This serves as a reminder to adjust the list_add_tail
* logic below in case this ever changes.
*/
if (WARN_ON_ONCE(trans->nelems != 1))
return false;
if (check_add_overflow(old_nelems, trans->nelems, &nelems))
return false;
/* krealloc might free tail which invalidates list pointers */
list_del_init(&tail->nft_trans.list);
new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
if (!new_trans) {
list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
return false;
}
/*
* new_trans->nft_trans.list contains garbage, but
* list_add_tail() doesn't care.
*/
new_trans->nelems = nelems;
new_trans->elems[old_nelems] = trans->elems[0];
list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list);
return true;
}
static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
struct nft_trans *trans, gfp_t gfp)
{
struct nft_trans *tail;
if (list_empty(&nft_net->commit_list))
return false;
tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list);
if (tail->msg_type != trans->msg_type)
return false;
switch (trans->msg_type) {
case NFT_MSG_NEWSETELEM:
case NFT_MSG_DELSETELEM:
return nft_trans_collapse_set_elem(nft_net,
nft_trans_container_elem(tail),
nft_trans_container_elem(trans), gfp);
}
return false;
}
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@ -421,6 +504,24 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
trans->msg_type != NFT_MSG_DELSETELEM);
might_alloc(gfp);
if (nft_trans_try_collapse(nft_net, trans, gfp)) {
kfree(trans);
return;
}
nft_trans_commit_list_add_tail(net, trans);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@ -6435,13 +6536,17 @@ static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
struct nft_trans_elem *te;
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1));
if (trans == NULL)
return NULL;
nft_trans_elem_set(trans) = set;
te = nft_trans_container_elem(trans);
te->nelems = 1;
te->set = set;
return trans;
}
@ -6563,28 +6668,52 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set,
const struct nft_elem_priv *elem_priv,
bool destroy_expr)
static void __nft_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_elem_priv *elem_priv,
bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
};
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem_priv);
}
/* Drop references and destroy. Called from gc and dynset. */
void nft_set_elem_destroy(const struct nft_set *set,
const struct nft_elem_priv *elem_priv,
bool destroy_expr)
{
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
};
__nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
/* Drop references and destroy. Called from abort path. */
static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++) {
/* skip update request, see nft_trans_elems_new_abort() */
if (!te->elems[i].priv)
continue;
__nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
}
}
/* Destroy element. References have been already dropped in the preparation
* path via nft_setelem_data_deactivate().
*/
@ -6600,6 +6729,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem_priv);
}
static void nft_trans_elems_destroy(const struct nft_ctx *ctx,
const struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++)
nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv);
}
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[])
{
@ -6756,6 +6894,38 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
}
}
static void nft_trans_elem_update(const struct nft_set *set,
const struct nft_trans_one_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
const struct nft_elem_update *update = elem->update;
if (update->flags & NFT_TRANS_UPD_TIMEOUT)
WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout);
if (update->flags & NFT_TRANS_UPD_EXPIRATION)
WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration);
}
static void nft_trans_elems_add(const struct nft_ctx *ctx,
struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++) {
struct nft_trans_one_elem *elem = &te->elems[i];
if (elem->update)
nft_trans_elem_update(te->set, elem);
else
nft_setelem_activate(ctx->net, te->set, elem->priv);
nf_tables_setelem_notify(ctx, te->set, elem->priv,
NFT_MSG_NEWSETELEM);
kfree(elem->update);
}
}
static int nft_setelem_catchall_deactivate(const struct net *net,
struct nft_set *set,
struct nft_set_elem *elem)
@ -6838,6 +7008,26 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem_priv);
}
static void nft_trans_elems_remove(const struct nft_ctx *ctx,
const struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++) {
WARN_ON_ONCE(te->elems[i].update);
nf_tables_setelem_notify(ctx, te->set,
te->elems[i].priv,
te->nft_trans.msg_type);
nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) {
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
}
}
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
@ -6874,7 +7064,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@ -7189,23 +7378,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
update_flags = 0;
struct nft_elem_update update = { };
if (timeout != nft_set_ext_timeout(ext2)->timeout) {
nft_trans_elem_timeout(trans) = timeout;
update.timeout = timeout;
if (expiration == 0)
expiration = timeout;
update_flags |= NFT_TRANS_UPD_TIMEOUT;
update.flags |= NFT_TRANS_UPD_TIMEOUT;
}
if (expiration) {
nft_trans_elem_expiration(trans) = expiration;
update_flags |= NFT_TRANS_UPD_EXPIRATION;
update.expiration = expiration;
update.flags |= NFT_TRANS_UPD_EXPIRATION;
}
if (update_flags) {
nft_trans_elem_priv(trans) = elem_priv;
nft_trans_elem_update_flags(trans) = update_flags;
nft_trans_commit_list_add_tail(ctx->net, trans);
if (update.flags) {
struct nft_trans_one_elem *ue;
ue = &nft_trans_container_elem(trans)->elems[0];
ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL);
if (!ue->update) {
err = -ENOMEM;
goto err_element_clash;
}
ue->priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
}
@ -7228,8 +7427,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
err_set_full:
@ -7366,6 +7565,55 @@ void nft_setelem_data_deactivate(const struct net *net,
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem.
* No notifications and no ndeact changes.
*
* Returns true if set had been added to (i.e., elements need to be removed again).
*/
static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
struct nft_trans_elem *te)
{
bool removed = false;
int i;
for (i = 0; i < te->nelems; i++) {
if (te->elems[i].update) {
kfree(te->elems[i].update);
te->elems[i].update = NULL;
/* Update request, so do not release this element */
te->elems[i].priv = NULL;
continue;
}
if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
atomic_dec(&te->set->nelems);
removed = true;
}
return removed;
}
/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */
static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
const struct nft_trans_elem *te)
{
int i;
for (i = 0; i < te->nelems; i++) {
if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) {
nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv);
nft_setelem_activate(ctx->net, te->set, te->elems[i].priv);
}
if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
te->set->ndeact--;
}
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@ -7445,8 +7693,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
fail_ops:
@ -7472,7 +7720,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
return 0;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
struct_size_t(struct nft_trans_elem, elems, 1),
GFP_ATOMIC);
if (!trans)
return -ENOMEM;
@ -7481,8 +7730,9 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
nft_trans_container_elem(trans)->nelems = 1;
nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
}
@ -7498,8 +7748,8 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
}
@ -9680,9 +9930,7 @@ static void nft_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
nf_tables_set_elem_destroy(&ctx,
nft_trans_elem_set(trans),
nft_trans_elem_priv(trans));
nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@ -10255,9 +10503,24 @@ static void nf_tables_commit_audit_free(struct list_head *adl)
}
}
static void nf_tables_commit_audit_collect(struct list_head *adl,
struct nft_table *table, u32 op)
/* nft audit emits the number of elements that get added/removed/updated,
* so NEW/DELSETELEM needs to increment based on the total elem count.
*/
static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans)
{
switch (trans->msg_type) {
case NFT_MSG_NEWSETELEM:
case NFT_MSG_DELSETELEM:
return nft_trans_container_elem(trans)->nelems;
}
return 1;
}
static void nf_tables_commit_audit_collect(struct list_head *adl,
const struct nft_trans *trans, u32 op)
{
const struct nft_table *table = trans->table;
struct nft_audit_data *adp;
list_for_each_entry(adp, adl, list) {
@ -10267,7 +10530,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl,
WARN_ONCE(1, "table=%s not expected in commit list", table->name);
return;
found:
adp->entries++;
adp->entries += nf_tables_commit_audit_entrycount(trans);
if (!adp->op || adp->op > op)
adp->op = op;
}
@ -10426,7 +10689,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_ctx_update(&ctx, trans);
nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
nf_tables_commit_audit_collect(&adl, trans, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@ -10535,25 +10798,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
if (te->update_flags) {
const struct nft_set_ext *ext =
nft_set_elem_ext(te->set, te->elem_priv);
nft_trans_elems_add(&ctx, te);
if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
te->timeout);
}
if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
get_jiffies_64() + te->expiration);
}
} else {
nft_setelem_activate(net, te->set, te->elem_priv);
}
nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@ -10565,14 +10811,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
trans->msg_type);
nft_setelem_remove(net, te->set, te->elem_priv);
if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
nft_trans_elems_remove(&ctx, te);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@ -10692,8 +10932,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem_priv(trans), true);
nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&ctx, nft_trans_obj(trans));
@ -10850,18 +11089,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_update_flags(trans) ||
nft_trans_elem_set_bound(trans)) {
if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
if (!te->set->ops->abort ||
nft_setelem_is_catchall(te->set, te->elem_priv))
nft_setelem_remove(net, te->set, te->elem_priv);
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
if (!nft_trans_elems_new_abort(&ctx, te)) {
nft_trans_destroy(trans);
break;
}
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@ -10873,12 +11109,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
nft_setelem_data_activate(net, te->set, te->elem_priv);
nft_setelem_activate(net, te->set, te->elem_priv);
}
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
nft_trans_elems_destroy_abort(&ctx, te);
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {

View File

@ -517,7 +517,7 @@ replay_abort:
err = nla_parse_deprecated(cda,
ss->cb[cb_id].attr_count,
attr, attrlen,
ss->cb[cb_id].policy, NULL);
ss->cb[cb_id].policy, &extack);
if (err < 0)
goto ack;

View File

@ -17,6 +17,7 @@
struct nft_bitwise {
u8 sreg;
u8 sreg2;
u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
@ -25,8 +26,8 @@ struct nft_bitwise {
struct nft_data data;
};
static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
const struct nft_bitwise *priv)
static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src,
const struct nft_bitwise *priv)
{
unsigned int i;
@ -60,28 +61,72 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src,
}
}
static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2,
const struct nft_bitwise *priv)
{
unsigned int i, n;
for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
dst[i] = src[i] & src2[i];
}
static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2,
const struct nft_bitwise *priv)
{
unsigned int i, n;
for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
dst[i] = src[i] | src2[i];
}
static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2,
const struct nft_bitwise *priv)
{
unsigned int i, n;
for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
dst[i] = src[i] ^ src2[i];
}
void nft_bitwise_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
const u32 *src = &regs->data[priv->sreg];
const u32 *src = &regs->data[priv->sreg], *src2;
u32 *dst = &regs->data[priv->dreg];
switch (priv->op) {
case NFT_BITWISE_BOOL:
nft_bitwise_eval_bool(dst, src, priv);
break;
case NFT_BITWISE_LSHIFT:
if (priv->op == NFT_BITWISE_MASK_XOR) {
nft_bitwise_eval_mask_xor(dst, src, priv);
return;
}
if (priv->op == NFT_BITWISE_LSHIFT) {
nft_bitwise_eval_lshift(dst, src, priv);
break;
case NFT_BITWISE_RSHIFT:
return;
}
if (priv->op == NFT_BITWISE_RSHIFT) {
nft_bitwise_eval_rshift(dst, src, priv);
break;
return;
}
src2 = priv->sreg2 ? &regs->data[priv->sreg2] : priv->data.data;
if (priv->op == NFT_BITWISE_AND) {
nft_bitwise_eval_and(dst, src, src2, priv);
return;
}
if (priv->op == NFT_BITWISE_OR) {
nft_bitwise_eval_or(dst, src, src2, priv);
return;
}
if (priv->op == NFT_BITWISE_XOR) {
nft_bitwise_eval_xor(dst, src, src2, priv);
return;
}
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_SREG] = { .type = NLA_U32 },
[NFTA_BITWISE_SREG2] = { .type = NLA_U32 },
[NFTA_BITWISE_DREG] = { .type = NLA_U32 },
[NFTA_BITWISE_LEN] = { .type = NLA_U32 },
[NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
@ -90,8 +135,8 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_DATA] = { .type = NLA_NESTED },
};
static int nft_bitwise_init_bool(struct nft_bitwise *priv,
const struct nlattr *const tb[])
static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
struct nft_data_desc mask = {
.type = NFT_DATA_VALUE,
@ -105,7 +150,8 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
};
int err;
if (tb[NFTA_BITWISE_DATA])
if (tb[NFTA_BITWISE_DATA] ||
tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@ -139,7 +185,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
int err;
if (tb[NFTA_BITWISE_MASK] ||
tb[NFTA_BITWISE_XOR])
tb[NFTA_BITWISE_XOR] ||
tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_DATA])
@ -157,6 +204,41 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
return 0;
}
static int nft_bitwise_init_bool(const struct nft_ctx *ctx,
struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
int err;
if (tb[NFTA_BITWISE_MASK] ||
tb[NFTA_BITWISE_XOR])
return -EINVAL;
if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) ||
(tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2]))
return -EINVAL;
if (tb[NFTA_BITWISE_DATA]) {
struct nft_data_desc desc = {
.type = NFT_DATA_VALUE,
.size = sizeof(priv->data),
.len = priv->len,
};
err = nft_data_init(NULL, &priv->data, &desc,
tb[NFTA_BITWISE_DATA]);
if (err < 0)
return err;
} else {
err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2],
&priv->sreg2, priv->len);
if (err < 0)
return err;
}
return 0;
}
static int nft_bitwise_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@ -185,32 +267,40 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
if (tb[NFTA_BITWISE_OP]) {
priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP]));
switch (priv->op) {
case NFT_BITWISE_BOOL:
case NFT_BITWISE_MASK_XOR:
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
case NFT_BITWISE_AND:
case NFT_BITWISE_OR:
case NFT_BITWISE_XOR:
break;
default:
return -EOPNOTSUPP;
}
} else {
priv->op = NFT_BITWISE_BOOL;
priv->op = NFT_BITWISE_MASK_XOR;
}
switch(priv->op) {
case NFT_BITWISE_BOOL:
err = nft_bitwise_init_bool(priv, tb);
case NFT_BITWISE_MASK_XOR:
err = nft_bitwise_init_mask_xor(priv, tb);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_init_shift(priv, tb);
break;
case NFT_BITWISE_AND:
case NFT_BITWISE_OR:
case NFT_BITWISE_XOR:
err = nft_bitwise_init_bool(ctx, priv, tb);
break;
}
return err;
}
static int nft_bitwise_dump_bool(struct sk_buff *skb,
const struct nft_bitwise *priv)
static int nft_bitwise_dump_mask_xor(struct sk_buff *skb,
const struct nft_bitwise *priv)
{
if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
NFT_DATA_VALUE, priv->len) < 0)
@ -232,6 +322,21 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb,
return 0;
}
static int nft_bitwise_dump_bool(struct sk_buff *skb,
const struct nft_bitwise *priv)
{
if (priv->sreg2) {
if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2))
return -1;
} else {
if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data,
NFT_DATA_VALUE, sizeof(u32)) < 0)
return -1;
}
return 0;
}
static int nft_bitwise_dump(struct sk_buff *skb,
const struct nft_expr *expr, bool reset)
{
@ -248,13 +353,18 @@ static int nft_bitwise_dump(struct sk_buff *skb,
return -1;
switch (priv->op) {
case NFT_BITWISE_BOOL:
err = nft_bitwise_dump_bool(skb, priv);
case NFT_BITWISE_MASK_XOR:
err = nft_bitwise_dump_mask_xor(skb, priv);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_dump_shift(skb, priv);
break;
case NFT_BITWISE_AND:
case NFT_BITWISE_OR:
case NFT_BITWISE_XOR:
err = nft_bitwise_dump_bool(skb, priv);
break;
}
return err;
@ -269,7 +379,7 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
const struct nft_bitwise *priv = nft_expr_priv(expr);
struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
if (priv->op != NFT_BITWISE_BOOL)
if (priv->op != NFT_BITWISE_MASK_XOR)
return -EOPNOTSUPP;
if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
@ -299,6 +409,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
priv->sreg2 == bitwise->sreg2 &&
priv->dreg == bitwise->dreg &&
priv->op == bitwise->op &&
priv->len == bitwise->len &&
@ -375,7 +486,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
if (tb[NFTA_BITWISE_DATA])
if (tb[NFTA_BITWISE_DATA] ||
tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@ -406,7 +518,7 @@ nft_bitwise_fast_dump(struct sk_buff *skb,
return -1;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32))))
return -1;
if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL)))
if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR)))
return -1;
data.data[0] = priv->mask;
@ -501,7 +613,7 @@ nft_bitwise_select_ops(const struct nft_ctx *ctx,
return &nft_bitwise_ops;
if (tb[NFTA_BITWISE_OP] &&
ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL)
ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR)
return &nft_bitwise_ops;
return &nft_bitwise_fast_ops;

View File

@ -8,7 +8,7 @@
#include <linux/spinlock.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
#include <net/ip.h> /* for ipv4 options. */
#include <net/ip.h>
#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb)));
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;