diff --git a/MAINTAINERS b/MAINTAINERS index 4c37285a4747..9b899a1327a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24464,6 +24464,7 @@ F: include/linux/vdpa.h F: include/linux/virtio*.h F: include/linux/vringh.h F: include/uapi/linux/virtio_*.h +F: net/vmw_vsock/virtio* F: tools/virtio/ F: tools/testing/selftests/drivers/net/virtio_net/ diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 5f43dfa22f79..85c525745b31 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -452,7 +452,7 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) } /* kobj_type: ties together all properties required to register an entry */ -static struct kobj_type fw_cfg_sysfs_entry_ktype = { +static const struct kobj_type fw_cfg_sysfs_entry_ktype = { .default_groups = fw_cfg_sysfs_entry_groups, .sysfs_ops = &fw_cfg_sysfs_attr_ops, .release = fw_cfg_sysfs_release_entry, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 9af8ddb4a78f..a64d96effb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1887,10 +1887,12 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, throttle_op = mlx5_cmd_is_throttle_opcode(opcode); if (throttle_op) { - /* atomic context may not sleep */ - if (callback) - return -EINVAL; - down(&dev->cmd.vars.throttle_sem); + if (callback) { + if (down_trylock(&dev->cmd.vars.throttle_sem)) + return -EBUSY; + } else { + down(&dev->cmd.vars.throttle_sem); + } } pages_queue = is_manage_pages(in); @@ -2096,10 +2098,19 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) { struct mlx5_async_work *work = _work; struct mlx5_async_ctx *ctx; + struct mlx5_core_dev *dev; + u16 opcode; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); + dev = ctx->dev; + opcode = work->opcode; + status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); + /* Can't access "work" from this point on. It could have been freed in + * the callback. + */ + if (mlx5_cmd_is_throttle_opcode(opcode)) + up(&dev->cmd.vars.throttle_sem); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); } diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c index 35c8fbbba10e..f55d60922b87 100644 --- a/drivers/nvdimm/nd_virtio.c +++ b/drivers/nvdimm/nd_virtio.c @@ -44,6 +44,15 @@ static int virtio_pmem_flush(struct nd_region *nd_region) unsigned long flags; int err, err1; + /* + * Don't bother to submit the request to the device if the device is + * not activated. + */ + if (vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_NEEDS_RESET) { + dev_info(&vdev->dev, "virtio pmem device needs a reset\n"); + return -EIO; + } + might_sleep(); req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); if (!req_data) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 0f347717021a..aa36de361c10 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -112,15 +112,12 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, const void *src, int length); u8 ifcvf_get_status(struct ifcvf_hw *hw); void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); -void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); u64 ifcvf_get_dev_features(struct ifcvf_hw *hw); u64 ifcvf_get_hw_features(struct ifcvf_hw *hw); int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features); u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); -struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); -int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); u32 ifcvf_get_config_size(struct ifcvf_hw *hw); u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector); u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector); diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 50aac8fe57ef..2cedf7e2dbc4 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -83,10 +83,28 @@ enum { MLX5_VDPA_NUM_AS = 2 }; +struct mlx5_vdpa_mr_resources { + struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; + + /* Pre-deletion mr list */ + struct list_head mr_list_head; + + /* Deferred mr list */ + struct list_head mr_gc_list_head; + struct workqueue_struct *wq_gc; + struct delayed_work gc_dwork_ent; + + struct mutex lock; + + atomic_t shutdown; +}; + struct mlx5_vdpa_dev { struct vdpa_device vdev; struct mlx5_core_dev *mdev; struct mlx5_vdpa_resources res; + struct mlx5_vdpa_mr_resources mres; u64 mlx_features; u64 actual_features; @@ -95,14 +113,23 @@ struct mlx5_vdpa_dev { u16 max_idx; u32 generation; - struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; - struct list_head mr_list_head; - /* serialize mr access */ - struct mutex mr_mtx; struct mlx5_control_vq cvq; struct workqueue_struct *wq; - unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; bool suspended; + + struct mlx5_async_ctx async_ctx; +}; + +struct mlx5_vdpa_async_cmd { + int err; + struct mlx5_async_work cb_work; + struct completion cmd_done; + + void *in; + size_t inlen; + + void *out; + size_t outlen; }; int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); @@ -121,7 +148,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr); void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, @@ -134,6 +163,14 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, unsigned int asid); int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev); int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds); + +#define mlx5_vdpa_err(__dev, format, ...) \ + dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4758914ccf86..2dd21e0b399e 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -49,17 +49,23 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) } } -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) -{ - int inlen; - void *mkc; - void *in; - int err; +struct mlx5_create_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; + __be64 mtt[]; +}; - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; +struct mlx5_destroy_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)]; +}; + +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + void *in = &mem->in; + void *mkc; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -76,18 +82,36 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(mr->end - mr->start, mr->log_size)); populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); - kvfree(in); - if (err) { - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); - return err; - } - return 0; + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); +} + +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); + + mr->mr = mlx5_idx_to_mkey(mkey_index); +} + +static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_destroy_mkey_mem *mem) +{ + void *in = &mem->in; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr)); } static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { + if (!mr->mr) + return; + mlx5_vdpa_destroy_mkey(mvdev, mr->mr); } @@ -179,6 +203,123 @@ static int klm_byte_size(int nklms) return 16 * ALIGN(nklms, 4); } +#define MLX5_VDPA_MTT_ALIGN 16 + +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_vdpa_async_cmd *cmds; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_create_mkey_mem *cmd_mem; + int mttlen, mttcount; + + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); + if (!cmd_mem) { + err = -ENOMEM; + goto done; + } + + cmds[i].out = cmd_mem->out; + cmds[i].outlen = sizeof(cmd_mem->out); + cmds[i].in = cmd_mem->in; + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); + + fill_create_direct_mr(mvdev, dmr, cmd_mem); + + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); + goto done; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); + + if (!cmd->err) { + create_direct_mr_end(mvdev, dmr, cmd_mem); + } else { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + +done: + for (i = i-1; i >= 0; i--) { + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); + kvfree(cmd_mem); + } + + kvfree(cmds); + return err; +} + +DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T)) +DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T)) + +static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL; + struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + cmds[i].out = cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + cmds[i].in = cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]); + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err); + return err; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + + dmr->mr = 0; + if (cmd->err) { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + + return err; +} + static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { int inlen; @@ -279,14 +420,8 @@ done: goto err_map; } - err = create_direct_mr(mvdev, mr); - if (err) - goto err_direct; - return 0; -err_direct: - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); err_map: sg_free_table(&mr->sg_head); return err; @@ -401,6 +536,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_chain; + err = create_direct_keys(mvdev, mr); + if (err) + goto err_chain; + /* Create the memory key that defines the guests's address space. This * memory key refers to the direct keys that contain the MTT * translations @@ -489,6 +628,7 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr struct mlx5_vdpa_direct_mr *n; destroy_indirect_key(mvdev, mr); + destroy_direct_keys(mvdev, mr); list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { list_del_init(&dmr->list); unmap_direct_mr(mvdev, dmr); @@ -513,22 +653,58 @@ static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_ kfree(mr); } +/* There can be multiple .set_map() operations in quick succession. + * This large delay is a simple way to prevent the MR cleanup from blocking + * .set_map() MR creation in this scenario. + */ +#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000 + +static void mlx5_vdpa_mr_gc_handler(struct work_struct *work) +{ + struct mlx5_vdpa_mr_resources *mres; + struct mlx5_vdpa_mr *mr, *tmp; + struct mlx5_vdpa_dev *mvdev; + + mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work); + + if (atomic_read(&mres->shutdown)) { + mutex_lock(&mres->lock); + } else if (!mutex_trylock(&mres->lock)) { + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + return; + } + + mvdev = container_of(mres, struct mlx5_vdpa_dev, mres); + + list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) { + _mlx5_vdpa_destroy_mr(mvdev, mr); + } + + mutex_unlock(&mres->lock); +} + static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + if (!mr) return; - if (refcount_dec_and_test(&mr->refcount)) - _mlx5_vdpa_destroy_mr(mvdev, mr); + if (refcount_dec_and_test(&mr->refcount)) { + list_move_tail(&mr->mr_list, &mres->mr_gc_list_head); + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + } } void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_put_mr(mvdev, mr); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, @@ -543,44 +719,47 @@ static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_get_mr(mvdev, mr); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *new_mr, unsigned int asid) { - struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid]; + struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid]; - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_put_mr(mvdev, old_mr); - mvdev->mr[asid] = new_mr; + mvdev->mres.mr[asid] = new_mr; - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_mr *mr; - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); - list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) { + list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) { mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " "mr: %p, mkey: 0x%x, refcount: %u\n", mr, mr->mkey, refcount_read(&mr->refcount)); } - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } -void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev) { + if (!mvdev->res.valid) + return; + for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) mlx5_vdpa_update_mr(mvdev, NULL, i); @@ -613,7 +792,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_iotlb; - list_add_tail(&mr->mr_list, &mvdev->mr_list_head); + list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head); return 0; @@ -639,9 +818,9 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (!mr) return ERR_PTR(-ENOMEM); - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); if (err) goto out_err; @@ -661,7 +840,7 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, { int err; - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid) return 0; spin_lock(&mvdev->cvq.iommu_lock); @@ -703,3 +882,33 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) return 0; } + +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc"); + if (!mres->wq_gc) + return -ENOMEM; + + INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler); + + mutex_init(&mres->lock); + + INIT_LIST_HEAD(&mres->mr_list_head); + INIT_LIST_HEAD(&mres->mr_gc_list_head); + + return 0; +} + +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + atomic_set(&mres->shutdown, 1); + + flush_delayed_work(&mres->gc_dwork_ent); + destroy_workqueue(mres->wq_gc); + mres->wq_gc = NULL; + mutex_destroy(&mres->lock); +} diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 5c5a41b64bfc..aeae31d0cefa 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -256,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) mlx5_vdpa_warn(mvdev, "resources already allocated\n"); return -EINVAL; } - mutex_init(&mvdev->mr_mtx); res->uar = mlx5_get_uars_page(mdev); if (IS_ERR(res->uar)) { err = PTR_ERR(res->uar); @@ -301,7 +300,6 @@ err_pd: err_uctx: mlx5_put_uars_page(mdev, res->uar); err_uars: - mutex_destroy(&mvdev->mr_mtx); return err; } @@ -318,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) dealloc_pd(mvdev, res->pdn, res->uid); destroy_uctx(mvdev, res->uid); mlx5_put_uars_page(mvdev->mdev, res->uar); - mutex_destroy(&mvdev->mr_mtx); res->valid = false; } + +static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5_vdpa_async_cmd *cmd = + container_of(context, struct mlx5_vdpa_async_cmd, cb_work); + + cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out); + complete(&cmd->cmd_done); +} + +static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int issued, + int *completed) + +{ + struct mlx5_vdpa_async_cmd *cmd = &cmds[issued]; + int err; + +retry: + err = mlx5_cmd_exec_cb(&mvdev->async_ctx, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen, + virtqueue_cmd_callback, + &cmd->cb_work); + if (err == -EBUSY) { + if (*completed < issued) { + /* Throttled by own commands: wait for oldest completion. */ + wait_for_completion(&cmds[*completed].cmd_done); + (*completed)++; + + goto retry; + } else { + /* Throttled by external commands: switch to sync api. */ + err = mlx5_cmd_exec(mvdev->mdev, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen); + if (!err) + (*completed)++; + } + } + + return err; +} + +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds) +{ + int completed = 0; + int issued = 0; + int err = 0; + + for (int i = 0; i < num_cmds; i++) + init_completion(&cmds[i].cmd_done); + + while (issued < num_cmds) { + + err = issue_async_cmd(mvdev, cmds, issued, &completed); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n", + issued, num_cmds, err); + break; + } + + issued++; + } + + while (completed < issued) + wait_for_completion(&cmds[completed++].cmd_done); + + return err; +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index fa78e8288ebb..dee019977716 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -941,11 +941,11 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); - vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; if (vq_mr) MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); - vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); @@ -953,11 +953,11 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, /* If there is no mr update, make sure that the existing ones are set * modify to ready. */ - vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; if (vq_mr) mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; - vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; if (vq_desc_mr) mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; } @@ -1184,40 +1184,92 @@ struct mlx5_virtq_attr { u16 used_index; }; -static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, - struct mlx5_virtq_attr *attr) +struct mlx5_virtqueue_query_mem { + u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)]; +}; + +struct mlx5_virtqueue_modify_mem { + u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)]; +}; + +static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtqueue_query_mem *cmd) { - int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); - u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; - void *out; - void *obj_context; - void *cmd_hdr; - int err; - - out = kzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); - if (err) - goto err_cmd; +} + +static void query_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_virtqueue_query_mem *cmd, + struct mlx5_virtq_attr *attr) +{ + void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context); - obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); - kfree(out); - return 0; +} -err_cmd: - kfree(out); +static int query_virtqueues(struct mlx5_vdpa_net *ndev, + int start_vq, + int num_vqs, + struct mlx5_virtq_attr *attrs) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_query_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + cmds[i].in = &cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + cmds[i].out = &cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n", + start_vq, start_vq + num_vqs, err); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + int vq_idx = start_vq + i; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err); + if (!err) + err = cmd->err; + continue; + } + + query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); return err; } @@ -1251,51 +1303,30 @@ static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) return true; } -static int modify_virtqueue(struct mlx5_vdpa_net *ndev, - struct mlx5_vdpa_virtqueue *mvq, - int state) +static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state, + struct mlx5_virtqueue_modify_mem *cmd) { - int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); - u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; struct mlx5_vdpa_mr *desc_mr = NULL; struct mlx5_vdpa_mr *vq_mr = NULL; - bool state_change = false; void *obj_context; void *cmd_hdr; void *vq_ctx; - void *in; - int err; - if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) - return 0; - - if (!modifiable_virtqueue_fields(mvq)) - return -EINVAL; - - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context); vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); - if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { - if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { - err = -EINVAL; - goto done; - } - + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) MLX5_SET(virtio_net_q_object, obj_context, state, state); - state_change = true; - } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); @@ -1323,7 +1354,7 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { - vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; if (vq_mr) MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); @@ -1332,7 +1363,7 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { - desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); @@ -1341,38 +1372,36 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, } MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); - if (err) - goto done; +} - if (state_change) - mvq->fw_state = state; +static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]; + struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid]; + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); mlx5_vdpa_get_mr(mvdev, vq_mr); mvq->vq_mr = vq_mr; } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]; + struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid]; + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); mlx5_vdpa_get_mr(mvdev, desc_mr); mvq->desc_mr = desc_mr; } + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) + mvq->fw_state = state; + mvq->modified_fields = 0; - -done: - kfree(in); - return err; -} - -static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, - struct mlx5_vdpa_virtqueue *mvq, - unsigned int state) -{ - mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; - return modify_virtqueue(ndev, mvq, state); } static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -1525,53 +1554,136 @@ err_fwqp: return err; } -static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state) { - struct mlx5_virtq_attr attr; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_modify_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + + mvq = &ndev->vqs[vq_idx]; + + if (!modifiable_virtqueue_fields(mvq)) { + err = -EINVAL; + goto done; + } + + if (mvq->fw_state != state) { + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { + err = -EINVAL; + goto done; + } + + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; + } + + cmd->in = &cmd_mem[i].in; + cmd->inlen = sizeof(cmd_mem[i].in); + cmd->out = &cmd_mem[i].out; + cmd->outlen = sizeof(cmd_mem[i].out); + fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n", + start_vq, start_vq + num_vqs); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + + mvq = &ndev->vqs[vq_idx]; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n", + vq_idx, mvq->fw_state, state, err); + if (!err) + err = cmd->err; + continue; + } + + modify_virtqueue_end(ndev, mvq, state); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); + return err; +} + +static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_virtq_attr *attrs; + int vq_idx, i; int err; + if (start_vq >= ndev->cur_num_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) return 0; if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) return 0; - err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); - if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed, err: %d\n", err); + err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); + if (err) return err; + + attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + err = query_virtqueues(ndev, start_vq, num_vqs, attrs); + if (err) + goto done; + + for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) { + mvq = &ndev->vqs[vq_idx]; + mvq->avail_idx = attrs[i].available_index; + mvq->used_idx = attrs[i].used_index; } - err = query_virtqueue(ndev, mvq, &attr); - if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err); - return err; - } - - mvq->avail_idx = attr.available_index; - mvq->used_idx = attr.used_index; - - return 0; -} - -static int suspend_vqs(struct mlx5_vdpa_net *ndev) -{ - int err = 0; - int i; - - for (i = 0; i < ndev->cur_num_vqs; i++) { - int local_err = suspend_vq(ndev, &ndev->vqs[i]); - - err = local_err ? local_err : err; - } - +done: + kfree(attrs); return err; } -static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { + return suspend_vqs(ndev, mvq->index, 1); +} + +static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) +{ + struct mlx5_vdpa_virtqueue *mvq; int err; + if (start_vq >= ndev->mvdev.max_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) return 0; @@ -1583,13 +1695,9 @@ static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq /* Due to a FW quirk we need to modify the VQ fields first then change state. * This should be fixed soon. After that, a single command can be used. */ - err = modify_virtqueue(ndev, mvq, 0); - if (err) { - mlx5_vdpa_warn(&ndev->mvdev, - "modify vq properties failed for vq %u, err: %d\n", - mvq->index, err); + err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state); + if (err) return err; - } break; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: if (!is_resumable(ndev)) { @@ -1600,30 +1708,17 @@ static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: return 0; default: - mlx5_vdpa_warn(&ndev->mvdev, "resume vq %u called from bad state %d\n", + mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n", mvq->index, mvq->fw_state); return -EINVAL; } - err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) - mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u, err: %d\n", - mvq->index, err); - - return err; + return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); } -static int resume_vqs(struct mlx5_vdpa_net *ndev) +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { - int err = 0; - - for (int i = 0; i < ndev->cur_num_vqs; i++) { - int local_err = resume_vq(ndev, &ndev->vqs[i]); - - err = local_err ? local_err : err; - } - - return err; + return resume_vqs(ndev, mvq->index, 1); } static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -2002,13 +2097,13 @@ static int setup_steering(struct mlx5_vdpa_net *ndev) ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n"); return -EOPNOTSUPP; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ndev->rxft)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n"); return PTR_ERR(ndev->rxft); } mlx5_vdpa_add_rx_flow_table(ndev); @@ -2124,45 +2219,48 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - int cur_qps = ndev->cur_num_vqs / 2; + int cur_vqs = ndev->cur_num_vqs; + int new_vqs = newqps * 2; int err; int i; - if (cur_qps > newqps) { - err = modify_rqt(ndev, 2 * newqps); + if (cur_vqs > new_vqs) { + err = modify_rqt(ndev, new_vqs); if (err) return err; - for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) { - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; - - if (is_resumable(ndev)) - suspend_vq(ndev, mvq); - else - teardown_vq(ndev, mvq); + if (is_resumable(ndev)) { + suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs); + } else { + for (i = new_vqs; i < cur_vqs; i++) + teardown_vq(ndev, &ndev->vqs[i]); } - ndev->cur_num_vqs = 2 * newqps; + ndev->cur_num_vqs = new_vqs; } else { - ndev->cur_num_vqs = 2 * newqps; - for (i = cur_qps * 2; i < 2 * newqps; i++) { - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; + ndev->cur_num_vqs = new_vqs; - err = mvq->initialized ? resume_vq(ndev, mvq) : setup_vq(ndev, mvq, true); + for (i = cur_vqs; i < new_vqs; i++) { + err = setup_vq(ndev, &ndev->vqs[i], false); if (err) goto clean_added; } - err = modify_rqt(ndev, 2 * newqps); + + err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs); + if (err) + goto clean_added; + + err = modify_rqt(ndev, new_vqs); if (err) goto clean_added; } return 0; clean_added: - for (--i; i >= 2 * cur_qps; --i) + for (--i; i >= cur_vqs; --i) teardown_vq(ndev, &ndev->vqs[i]); - ndev->cur_num_vqs = 2 * cur_qps; + ndev->cur_num_vqs = cur_vqs; return err; } @@ -2528,9 +2626,9 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa return 0; } - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) { - mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + mlx5_vdpa_err(mvdev, "failed to query virtqueue\n"); return err; } state->split.avail_index = attr.used_index; @@ -2755,6 +2853,9 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p struct mlx5_eqe *eqe = param; int ret = NOTIFY_DONE; + if (ndev->mvdev.suspended) + return NOTIFY_DONE; + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: @@ -2879,7 +2980,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu int err; if (mvq->initialized) { - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) return err; } @@ -2948,7 +3049,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, bool teardown = !is_resumable(ndev); int err; - suspend_vqs(ndev); + suspend_vqs(ndev, 0, ndev->cur_num_vqs); if (teardown) { err = save_channels_info(ndev); if (err) @@ -2973,7 +3074,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, return err; } - resume_vqs(ndev); + resume_vqs(ndev, 0, ndev->cur_num_vqs); return 0; } @@ -3097,7 +3198,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) teardown_vq_resources(ndev); if (ndev->setup) { - err = resume_vqs(ndev); + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); if (err) { mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); goto err_driver; @@ -3122,7 +3223,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) err_driver: unregister_link_notifier(ndev); err_setup: - mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; err_clear: up_write(&ndev->reslock); @@ -3134,7 +3235,7 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) /* default mapping all groups are mapped to asid 0 */ for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) - mvdev->group2asid[i] = 0; + mvdev->mres.group2asid[i] = 0; } static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev) @@ -3174,7 +3275,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) } if (flags & VDPA_RESET_F_CLEAN_MAP) - mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.suspended = false; ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; @@ -3189,7 +3290,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) if ((flags & VDPA_RESET_F_CLEAN_MAP) && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (mlx5_vdpa_create_dma_mr(mvdev)) - mlx5_vdpa_warn(mvdev, "create MR failed\n"); + mlx5_vdpa_err(mvdev, "create MR failed\n"); } if (vq_reset) setup_vq_resources(ndev, false); @@ -3244,7 +3345,7 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); if (IS_ERR(new_mr)) { err = PTR_ERR(new_mr); - mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); + mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err); return err; } } else { @@ -3252,12 +3353,12 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, new_mr = NULL; } - if (!mvdev->mr[asid]) { + if (!mvdev->mres.mr[asid]) { mlx5_vdpa_update_mr(mvdev, new_mr, asid); } else { err = mlx5_vdpa_change_map(mvdev, new_mr, asid); if (err) { - mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); + mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err); goto out_err; } } @@ -3332,7 +3433,10 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ndev = to_mlx5_vdpa_ndev(mvdev); free_fixed_resources(ndev); - mlx5_vdpa_destroy_mr_resources(mvdev); + mlx5_vdpa_clean_mrs(mvdev); + mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); + if (!is_zero_ether_addr(ndev->config.mac)) { pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); @@ -3500,8 +3604,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) mlx5_vdpa_info(mvdev, "suspending device\n"); down_write(&ndev->reslock); - unregister_link_notifier(ndev); - err = suspend_vqs(ndev); + err = suspend_vqs(ndev, 0, ndev->cur_num_vqs); mlx5_vdpa_cvq_suspend(mvdev); mvdev->suspended = true; up_write(&ndev->reslock); @@ -3521,8 +3624,8 @@ static int mlx5_vdpa_resume(struct vdpa_device *vdev) down_write(&ndev->reslock); mvdev->suspended = false; - err = resume_vqs(ndev); - register_link_notifier(ndev); + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); + queue_link_work(ndev); up_write(&ndev->reslock); return err; @@ -3537,12 +3640,12 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, if (group >= MLX5_VDPA_NUMVQ_GROUPS) return -EINVAL; - mvdev->group2asid[group] = asid; + mvdev->mres.group2asid[group] = asid; - mutex_lock(&mvdev->mr_mtx); - if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) - err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); - mutex_unlock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); + if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid]) + err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid); + mutex_unlock(&mvdev->mres.lock); return err; } @@ -3854,18 +3957,22 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->rqt_size = 1; } + mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); + ndev->mvdev.mlx_features = device_features; mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) goto err_mpfs; - INIT_LIST_HEAD(&mvdev->mr_list_head); + err = mlx5_vdpa_init_mr_resources(mvdev); + if (err) + goto err_res; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { err = mlx5_vdpa_create_dma_mr(mvdev); if (err) - goto err_res; + goto err_mr_res; } err = alloc_fixed_resources(ndev); @@ -3906,6 +4013,8 @@ err_reg: err_res2: free_fixed_resources(ndev); err_mr: + mlx5_vdpa_clean_mrs(mvdev); +err_mr_res: mlx5_vdpa_destroy_mr_resources(mvdev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); @@ -3937,9 +4046,37 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * mgtdev->ndev = NULL; } +static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *add_config) +{ + struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + struct mlx5_core_dev *mdev; + int err = -EOPNOTSUPP; + + mvdev = to_mvdev(dev); + ndev = to_mlx5_vdpa_ndev(mvdev); + mdev = mvdev->mdev; + config = &ndev->config; + + down_write(&ndev->reslock); + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (!err) + ether_addr_copy(config->mac, add_config->net.mac); + } + + up_write(&ndev->reslock); + return err; +} + static const struct vdpa_mgmtdev_ops mdev_ops = { .dev_add = mlx5_vdpa_dev_add, .dev_del = mlx5_vdpa_dev_del, + .dev_set_attr = mlx5_vdpa_set_attr, }; static struct virtio_device_id id_table[] = { diff --git a/drivers/vdpa/pds/cmds.h b/drivers/vdpa/pds/cmds.h index e24d85cb8f1c..6b1bc33356b0 100644 --- a/drivers/vdpa/pds/cmds.h +++ b/drivers/vdpa/pds/cmds.h @@ -14,5 +14,4 @@ int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, struct pds_vdpa_vq_info *vq_info); int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, struct pds_vdpa_vq_info *vq_info); -int pds_vdpa_cmd_set_features(struct pds_vdpa_device *pdsv, u64 features); #endif /* _VDPA_CMDS_H_ */ diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 4dbd2e55a288..8a372b51c21a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1361,6 +1361,80 @@ dev_err: return err; } +static int vdpa_dev_net_device_attr_set(struct vdpa_device *vdev, + struct genl_info *info) +{ + struct vdpa_dev_set_config set_config = {}; + struct vdpa_mgmt_dev *mdev = vdev->mdev; + struct nlattr **nl_attrs = info->attrs; + const u8 *macaddr; + int err = -EOPNOTSUPP; + + down_write(&vdev->cf_lock); + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { + set_config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); + + if (is_valid_ether_addr(macaddr)) { + ether_addr_copy(set_config.net.mac, macaddr); + if (mdev->ops->dev_set_attr) { + err = mdev->ops->dev_set_attr(mdev, vdev, + &set_config); + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "Operation not supported by the device."); + } + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "Invalid MAC address"); + } + } + up_write(&vdev->cf_lock); + return err; +} + +static int vdpa_nl_cmd_dev_attr_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct vdpa_device *vdev; + struct device *dev; + const char *name; + u64 classes; + int err = 0; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + + down_write(&vdpa_dev_lock); + dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + classes = vdpa_mgmtdev_get_classes(vdev->mdev, NULL); + if (classes & BIT_ULL(VIRTIO_ID_NET)) { + err = vdpa_dev_net_device_attr_set(vdev, info); + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, "%s device not supported", + name); + } + +mdev_err: + put_device(dev); +dev_err: + up_write(&vdpa_dev_lock); + return err; +} + static int vdpa_dev_config_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); @@ -1497,6 +1571,11 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, + { + .cmd = VDPA_CMD_DEV_ATTR_SET, + .doit = vdpa_nl_cmd_dev_attr_set_doit, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index cfe962911804..6caf09a1907b 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -414,6 +414,24 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); } +static int vdpasim_net_set_attr(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *config) +{ + struct vdpasim *vdpasim = container_of(dev, struct vdpasim, vdpa); + struct virtio_net_config *vio_config = vdpasim->config; + + mutex_lock(&vdpasim->mutex); + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + ether_addr_copy(vio_config->mac, config->net.mac); + mutex_unlock(&vdpasim->mutex); + return 0; + } + + mutex_unlock(&vdpasim->mutex); + return -EOPNOTSUPP; +} + static void vdpasim_net_setup_config(struct vdpasim *vdpasim, const struct vdpa_dev_set_config *config) { @@ -510,7 +528,8 @@ static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev, static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = { .dev_add = vdpasim_net_dev_add, - .dev_del = vdpasim_net_dev_del + .dev_del = vdpasim_net_dev_del, + .dev_set_attr = vdpasim_net_set_attr }; static struct virtio_device_id id_table[] = { diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 478cd46a49ed..5a49b5a6d496 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) if (irq < 0) return; - irq_bypass_unregister_producer(&vq->call_ctx.producer); if (!vq->call_ctx.ctx) return; - vq->call_ctx.producer.token = vq->call_ctx.ctx; vq->call_ctx.producer.irq = irq; ret = irq_bypass_register_producer(&vq->call_ctx.producer); if (unlikely(ret)) @@ -709,6 +707,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, vq->last_avail_idx = vq_state.split.avail_index; } break; + case VHOST_SET_VRING_CALL: + if (vq->call_ctx.ctx) { + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_unsetup_vq_irq(v, idx); + vq->call_ctx.producer.token = NULL; + } + break; } r = vhost_vring_ioctl(&v->vdev, cmd, argp); @@ -747,13 +753,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; cb.trigger = vq->call_ctx.ctx; + vq->call_ctx.producer.token = vq->call_ctx.ctx; + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_setup_vq_irq(v, idx); } else { cb.callback = NULL; cb.private = NULL; cb.trigger = NULL; } ops->set_vq_cb(vdpa, idx, &cb); - vhost_vdpa_setup_vq_irq(v, idx); break; case VHOST_SET_VRING_NUM: @@ -1419,6 +1428,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) for (i = 0; i < nvqs; i++) { vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; + vqs[i]->call_ctx.ctx = NULL; } vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 54469277ca30..b36d2803674e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -355,6 +355,8 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb) { unsigned long events[NR_VM_EVENT_ITEMS]; unsigned int idx = 0; + unsigned int zid; + unsigned long stall = 0; all_vm_events(events); update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, @@ -363,6 +365,22 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb) pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_OOM_KILL, events[OOM_KILL]); + + /* sum all the stall events */ + for (zid = 0; zid < MAX_NR_ZONES; zid++) + stall += events[ALLOCSTALL_NORMAL - ZONE_NORMAL + zid]; + + update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall); + + update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN, + pages_to_bytes(events[PGSCAN_KSWAPD])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN, + pages_to_bytes(events[PGSCAN_DIRECT])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM, + pages_to_bytes(events[PGSTEAL_KSWAPD])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM, + pages_to_bytes(events[PGSTEAL_DIRECT])); #ifdef CONFIG_HUGETLB_PAGE update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index b6bd6ab71f5d..6404a189e989 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -56,12 +56,14 @@ struct virtio_fs_vq { bool connected; long in_flight; struct completion in_flight_zero; /* No inflight requests */ + struct kobject *kobj; char name[VQ_NAME_LEN]; } ____cacheline_aligned_in_smp; /* A virtio-fs device instance */ struct virtio_fs { struct kobject kobj; + struct kobject *mqs_kobj; struct list_head list; /* on virtio_fs_instances */ char *tag; struct virtio_fs_vq *vqs; @@ -200,19 +202,94 @@ static const struct kobj_type virtio_fs_ktype = { .default_groups = virtio_fs_groups, }; +static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs, + struct kobject *kobj) +{ + int i; + + for (i = 0; i < fs->nvqs; i++) { + if (kobj == fs->vqs[i].kobj) + return &fs->vqs[i]; + } + return NULL; +} + +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); + struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); + + if (!fsvq) + return -EINVAL; + return sysfs_emit(buf, "%s\n", fsvq->name); +} + +static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name); + +static ssize_t cpu_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); + struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); + unsigned int cpu, qid; + const size_t size = PAGE_SIZE - 1; + bool first = true; + int ret = 0, pos = 0; + + if (!fsvq) + return -EINVAL; + + qid = fsvq->vq->index; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid - VQ_REQUEST)) { + if (first) + ret = snprintf(buf + pos, size - pos, "%u", cpu); + else + ret = snprintf(buf + pos, size - pos, ", %u", cpu); + + if (ret >= size - pos) + break; + first = false; + pos += ret; + } + } + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + return pos + ret; +} + +static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list); + +static struct attribute *virtio_fs_vq_attrs[] = { + &virtio_fs_vq_name_attr.attr, + &virtio_fs_vq_cpu_list_attr.attr, + NULL +}; + +static struct attribute_group virtio_fs_vq_attr_group = { + .attrs = virtio_fs_vq_attrs, +}; + /* Make sure virtiofs_mutex is held */ +static void virtio_fs_put_locked(struct virtio_fs *fs) +{ + lockdep_assert_held(&virtio_fs_mutex); + + kobject_put(&fs->kobj); +} + static void virtio_fs_put(struct virtio_fs *fs) { - kobject_put(&fs->kobj); + mutex_lock(&virtio_fs_mutex); + virtio_fs_put_locked(fs); + mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) { struct virtio_fs *vfs = fiq->priv; - mutex_lock(&virtio_fs_mutex); virtio_fs_put(vfs); - mutex_unlock(&virtio_fs_mutex); } static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) @@ -273,6 +350,50 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs) } } +static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + kobject_put(fsvq->kobj); + } +} + +static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + char buff[12]; + int i, j, ret; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + + sprintf(buff, "%d", i); + fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); + if (!fs->mqs_kobj) { + ret = -ENOMEM; + goto out_del; + } + + ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); + if (ret) { + kobject_put(fsvq->kobj); + goto out_del; + } + } + + return 0; + +out_del: + for (j = 0; j < i; j++) { + fsvq = &fs->vqs[j]; + kobject_put(fsvq->kobj); + } + return ret; +} + /* Add a new instance to the list or return -EEXIST if tag name exists*/ static int virtio_fs_add_instance(struct virtio_device *vdev, struct virtio_fs *fs) @@ -296,17 +417,22 @@ static int virtio_fs_add_instance(struct virtio_device *vdev, */ fs->kobj.kset = virtio_fs_kset; ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); - if (ret < 0) { - mutex_unlock(&virtio_fs_mutex); - return ret; + if (ret < 0) + goto out_unlock; + + fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); + if (!fs->mqs_kobj) { + ret = -ENOMEM; + goto out_del; } ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); - if (ret < 0) { - kobject_del(&fs->kobj); - mutex_unlock(&virtio_fs_mutex); - return ret; - } + if (ret < 0) + goto out_put; + + ret = virtio_fs_add_queues_sysfs(fs); + if (ret) + goto out_remove; list_add_tail(&fs->list, &virtio_fs_instances); @@ -315,6 +441,16 @@ static int virtio_fs_add_instance(struct virtio_device *vdev, kobject_uevent(&fs->kobj, KOBJ_ADD); return 0; + +out_remove: + sysfs_remove_link(&fs->kobj, "device"); +out_put: + kobject_put(fs->mqs_kobj); +out_del: + kobject_del(&fs->kobj); +out_unlock: + mutex_unlock(&virtio_fs_mutex); + return ret; } /* Return the virtio_fs with a given tag, or NULL */ @@ -1043,7 +1179,9 @@ static void virtio_fs_remove(struct virtio_device *vdev) mutex_lock(&virtio_fs_mutex); /* This device is going away. No one should get new reference */ list_del_init(&fs->list); + virtio_fs_delete_queues_sysfs(fs); sysfs_remove_link(&fs->kobj, "device"); + kobject_put(fs->mqs_kobj); kobject_del(&fs->kobj); virtio_fs_stop_all_queues(fs); virtio_fs_drain_all_queues_locked(fs); @@ -1052,7 +1190,7 @@ static void virtio_fs_remove(struct virtio_device *vdev) vdev->priv = NULL; /* Put device reference on virtio_fs object */ - virtio_fs_put(fs); + virtio_fs_put_locked(fs); mutex_unlock(&virtio_fs_mutex); } @@ -1581,9 +1719,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc) out_err: kfree(fc); - mutex_lock(&virtio_fs_mutex); virtio_fs_put(fs); - mutex_unlock(&virtio_fs_mutex); return err; } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7977ca03ac7a..2e7a30fe6b92 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status); * @dev: vdpa device to remove * Driver need to remove the specified device by calling * _vdpa_unregister_device(). + * @dev_set_attr: change a vdpa device's attr after it was create + * @mdev: parent device to use for device + * @dev: vdpa device structure + * @config:Attributes to be set for the device. + * The driver needs to check the mask of the structure and then set + * the related information to the vdpa device. The driver must return 0 + * if set successfully. */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); + int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *config); }; /** diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 842bf1201ac4..71edf2c70cc3 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -19,6 +19,7 @@ enum vdpa_command { VDPA_CMD_DEV_GET, /* can dump */ VDPA_CMD_DEV_CONFIG_GET, /* can dump */ VDPA_CMD_DEV_VSTATS_GET, + VDPA_CMD_DEV_ATTR_SET, }; enum vdpa_attr { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index ddaa45e723c4..ee35a372805d 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -71,7 +71,13 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */ #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ -#define VIRTIO_BALLOON_S_NR 10 +#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */ +#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */ +#define VIRTIO_BALLOON_S_ASYNC_SCAN 12 /* Amount of memory scanned asynchronously */ +#define VIRTIO_BALLOON_S_DIRECT_SCAN 13 /* Amount of memory scanned directly */ +#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14 /* Amount of memory reclaimed asynchronously */ +#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15 /* Amount of memory reclaimed directly */ +#define VIRTIO_BALLOON_S_NR 16 #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ @@ -83,7 +89,13 @@ struct virtio_balloon_config { VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \ VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ - VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ + VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \ + VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \ + VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", \ + VIRTIO_BALLOON_S_NAMES_prefix "async-scans", \ + VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", \ + VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", \ + VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \ } #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e0160da4ef43..85e423921734 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -94,6 +94,63 @@ out_rcu: return ret; } +/* Caller need to hold vsock->tx_lock on vq */ +static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq, + struct virtio_vsock *vsock) +{ + int ret, in_sg = 0, out_sg = 0; + struct scatterlist **sgs; + + sgs = vsock->out_sgs; + sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(sgs[out_sg], skb->data, skb->len); + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + /* If skb is nonlinear, then its buffer must contain + * only header and nothing more. Data is stored in + * the fragged part. + */ + WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va; + + /* We will use 'page_to_virt()' for the userspace page + * here, because virtio or dma-mapping layers will call + * 'virt_to_phys()' later to fill the buffer descriptor. + * We don't touch memory at "virtual" address of this page. + */ + va = page_to_virt(skb_frag_page(skb_frag)); + sg_init_one(sgs[out_sg], + va + skb_frag_off(skb_frag), + skb_frag_size(skb_frag)); + out_sg++; + } + } + + ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); + /* Usually this means that there is no more space available in + * the vq + */ + if (ret < 0) + return ret; + + virtio_transport_deliver_tap_pkt(skb); + return 0; +} + static void virtio_transport_send_pkt_work(struct work_struct *work) { @@ -111,66 +168,22 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - int ret, in_sg = 0, out_sg = 0; - struct scatterlist **sgs; struct sk_buff *skb; bool reply; + int ret; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); if (!skb) break; reply = virtio_vsock_skb_reply(skb); - sgs = vsock->out_sgs; - sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), - sizeof(*virtio_vsock_hdr(skb))); - out_sg++; - if (!skb_is_nonlinear(skb)) { - if (skb->len > 0) { - sg_init_one(sgs[out_sg], skb->data, skb->len); - out_sg++; - } - } else { - struct skb_shared_info *si; - int i; - - /* If skb is nonlinear, then its buffer must contain - * only header and nothing more. Data is stored in - * the fragged part. - */ - WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); - - si = skb_shinfo(skb); - - for (i = 0; i < si->nr_frags; i++) { - skb_frag_t *skb_frag = &si->frags[i]; - void *va; - - /* We will use 'page_to_virt()' for the userspace page - * here, because virtio or dma-mapping layers will call - * 'virt_to_phys()' later to fill the buffer descriptor. - * We don't touch memory at "virtual" address of this page. - */ - va = page_to_virt(skb_frag_page(skb_frag)); - sg_init_one(sgs[out_sg], - va + skb_frag_off(skb_frag), - skb_frag_size(skb_frag)); - out_sg++; - } - } - - ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); - /* Usually this means that there is no more space available in - * the vq - */ + ret = virtio_transport_send_skb(skb, vq, vsock); if (ret < 0) { virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break; } - virtio_transport_deliver_tap_pkt(skb); - if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; @@ -195,6 +208,28 @@ out: queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +/* Caller need to hold RCU for vsock. + * Returns 0 if the packet is successfully put on the vq. + */ +static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb) +{ + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + int ret; + + /* Inside RCU, can't sleep! */ + ret = mutex_trylock(&vsock->tx_lock); + if (unlikely(ret == 0)) + return -EBUSY; + + ret = virtio_transport_send_skb(skb, vq, vsock); + if (ret == 0) + virtqueue_kick(vq); + + mutex_unlock(&vsock->tx_lock); + + return ret; +} + static int virtio_transport_send_pkt(struct sk_buff *skb) { @@ -218,11 +253,20 @@ virtio_transport_send_pkt(struct sk_buff *skb) goto out_rcu; } - if (virtio_vsock_skb_reply(skb)) - atomic_inc(&vsock->queued_replies); + /* If send_pkt_queue is empty, we can safely bypass this queue + * because packet order is maintained and (try) to put the packet + * on the virtqueue using virtio_transport_send_skb_fast_path. + * If this fails we simply put the packet on the intermediate + * queue and schedule the worker. + */ + if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) || + virtio_transport_send_skb_fast_path(vsock, skb)) { + if (virtio_vsock_skb_reply(skb)) + atomic_inc(&vsock->queued_replies); - virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); - queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); + virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); + } out_rcu: rcu_read_unlock(); diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index 5a18b2301a63..e471d8e7cfaa 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -276,7 +276,7 @@ static void help(void) fprintf(stderr, "Usage: [--help]" " [--host-affinity H]" " [--guest-affinity G]" - " [--ring-size R (default: %d)]" + " [--ring-size R (default: %u)]" " [--run-cycles C (default: %d)]" " [--batch b]" " [--outstanding o]"