Merge tag 'gvt-next-2018-07-11' of https://github.com/intel/gvt-linux into drm-intel-next-queued

gvt-next-2018-07-11

- vGPU huge page support (Changbin)
- BXT display irq warning fix (Colin)
- Handle GVT dependency well (Henry)

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180711023353.GU1267@zhen-hp.sh.intel.com
This commit is contained in:
Rodrigo Vivi 2018-07-12 00:23:44 -07:00
commit 91045034d5
8 changed files with 518 additions and 95 deletions

View File

@ -216,16 +216,22 @@ static struct gtt_type_table_entry gtt_type_table[] = {
GTT_TYPE_PPGTT_PDE_PT,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_PPGTT_PTE_2M_ENTRY),
/* We take IPS bit as 'PSE' for PTE level. */
GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_INVALID,
GTT_TYPE_INVALID),
GTT_TYPE_PPGTT_PTE_64K_ENTRY),
GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_INVALID,
GTT_TYPE_INVALID),
GTT_TYPE_PPGTT_PTE_64K_ENTRY),
GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_INVALID,
GTT_TYPE_PPGTT_PTE_64K_ENTRY),
GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
GTT_TYPE_PPGTT_PDE_ENTRY,
GTT_TYPE_PPGTT_PDE_PT,
@ -339,8 +345,14 @@ static inline int gtt_set_entry64(void *pt,
#define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30)
#define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21)
#define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16)
#define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12)
#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
#define GTT_64K_PTE_STRIDE 16
static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
{
unsigned long pfn;
@ -349,6 +361,8 @@ static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
else
pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
return pfn;
@ -362,6 +376,9 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
e->val64 &= ~ADDR_2M_MASK;
pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
e->val64 &= ~ADDR_64K_MASK;
pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
} else {
e->val64 &= ~ADDR_4K_MASK;
pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
@ -372,16 +389,41 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
{
/* Entry doesn't have PSE bit. */
if (get_pse_type(e->type) == GTT_TYPE_INVALID)
return !!(e->val64 & _PAGE_PSE);
}
static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
{
if (gen8_gtt_test_pse(e)) {
switch (e->type) {
case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
e->val64 &= ~_PAGE_PSE;
e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
break;
case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
e->val64 &= ~_PAGE_PSE;
break;
default:
WARN_ON(1);
}
}
}
static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
{
if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
return false;
e->type = get_entry_type(e->type);
if (!(e->val64 & _PAGE_PSE))
return false;
return !!(e->val64 & GEN8_PDE_IPS_64K);
}
e->type = get_pse_type(e->type);
return true;
static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
{
if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
return;
e->val64 &= ~GEN8_PDE_IPS_64K;
}
static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
@ -408,6 +450,21 @@ static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
e->val64 |= _PAGE_PRESENT;
}
static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
{
return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
}
static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
{
e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
}
static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
{
e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
}
/*
* Per-platform GMA routines.
*/
@ -440,6 +497,12 @@ static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
.set_present = gtt_entry_set_present,
.test_present = gen8_gtt_test_present,
.test_pse = gen8_gtt_test_pse,
.clear_pse = gen8_gtt_clear_pse,
.clear_ips = gen8_gtt_clear_ips,
.test_ips = gen8_gtt_test_ips,
.clear_64k_splited = gen8_gtt_clear_64k_splited,
.set_64k_splited = gen8_gtt_set_64k_splited,
.test_64k_splited = gen8_gtt_test_64k_splited,
.get_pfn = gen8_gtt_get_pfn,
.set_pfn = gen8_gtt_set_pfn,
};
@ -453,6 +516,27 @@ static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
.gma_to_pml4_index = gen8_gma_to_pml4_index,
};
/* Update entry type per pse and ips bit. */
static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops,
struct intel_gvt_gtt_entry *entry, bool ips)
{
switch (entry->type) {
case GTT_TYPE_PPGTT_PDE_ENTRY:
case GTT_TYPE_PPGTT_PDP_ENTRY:
if (pte_ops->test_pse(entry))
entry->type = get_pse_type(entry->type);
break;
case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
if (ips)
entry->type = get_pse_type(entry->type);
break;
default:
GEM_BUG_ON(!gtt_type_is_entry(entry->type));
}
GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
}
/*
* MM helpers.
*/
@ -468,8 +552,7 @@ static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
mm->ppgtt_mm.shadow_pdps,
entry, index, false, 0, mm->vgpu);
pte_ops->test_pse(entry);
update_entry_type_for_real(pte_ops, entry, false);
}
static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
@ -574,7 +657,8 @@ static inline int ppgtt_spt_get_entry(
if (ret)
return ret;
ops->test_pse(e);
update_entry_type_for_real(ops, e, guest ?
spt->guest_page.pde_ips : false);
gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
type, e->type, index, e->val64);
@ -653,10 +737,12 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
if (spt->guest_page.oos_page)
detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
if (spt->guest_page.gfn) {
if (spt->guest_page.oos_page)
detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
}
list_del_init(&spt->post_shadow_list);
free_spt(spt);
@ -717,8 +803,9 @@ static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
/* Allocate shadow page table without guest page. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
struct intel_vgpu *vgpu, int type, unsigned long gfn)
struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
{
struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct intel_vgpu_ppgtt_spt *spt = NULL;
@ -753,26 +840,12 @@ retry:
spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
/*
* Init guest_page.
*/
spt->guest_page.type = type;
spt->guest_page.gfn = gfn;
ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
ppgtt_write_protection_handler, spt);
ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
if (ret)
goto err_unmap_dma;
ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
if (ret)
goto err_unreg_page_track;
trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
return spt;
err_unreg_page_track:
intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
err_unmap_dma:
dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
err_free_spt:
@ -780,6 +853,37 @@ err_free_spt:
return ERR_PTR(ret);
}
/* Allocate shadow page table associated with specific gfn. */
static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
unsigned long gfn, bool guest_pde_ips)
{
struct intel_vgpu_ppgtt_spt *spt;
int ret;
spt = ppgtt_alloc_spt(vgpu, type);
if (IS_ERR(spt))
return spt;
/*
* Init guest_page.
*/
ret = intel_vgpu_register_page_track(vgpu, gfn,
ppgtt_write_protection_handler, spt);
if (ret) {
ppgtt_free_spt(spt);
return ERR_PTR(ret);
}
spt->guest_page.type = type;
spt->guest_page.gfn = gfn;
spt->guest_page.pde_ips = guest_pde_ips;
trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
return spt;
}
#define pt_entry_size_shift(spt) \
((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
@ -787,24 +891,38 @@ err_free_spt:
(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
#define for_each_present_guest_entry(spt, e, i) \
for (i = 0; i < pt_entries(spt); i++) \
for (i = 0; i < pt_entries(spt); \
i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
if (!ppgtt_get_guest_entry(spt, e, i) && \
spt->vgpu->gvt->gtt.pte_ops->test_present(e))
#define for_each_present_shadow_entry(spt, e, i) \
for (i = 0; i < pt_entries(spt); i++) \
for (i = 0; i < pt_entries(spt); \
i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
if (!ppgtt_get_shadow_entry(spt, e, i) && \
spt->vgpu->gvt->gtt.pte_ops->test_present(e))
static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
#define for_each_shadow_entry(spt, e, i) \
for (i = 0; i < pt_entries(spt); \
i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
if (!ppgtt_get_shadow_entry(spt, e, i))
static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
{
int v = atomic_read(&spt->refcount);
trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
atomic_inc(&spt->refcount);
}
static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
{
int v = atomic_read(&spt->refcount);
trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
return atomic_dec_return(&spt->refcount);
}
static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
@ -843,7 +961,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
pfn = ops->get_pfn(entry);
type = spt->shadow_page.type;
if (pfn == vgpu->gtt.scratch_pt[type].page_mfn)
/* Uninitialized spte or unshadowed spte. */
if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
return;
intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
@ -855,14 +974,11 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
struct intel_gvt_gtt_entry e;
unsigned long index;
int ret;
int v = atomic_read(&spt->refcount);
trace_spt_change(spt->vgpu->id, "die", spt,
spt->guest_page.gfn, spt->shadow_page.type);
trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
if (atomic_dec_return(&spt->refcount) > 0)
if (ppgtt_put_spt(spt) > 0)
return 0;
for_each_present_shadow_entry(spt, &e, index) {
@ -871,9 +987,15 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
gvt_vdbg_mm("invalidate 4K entry\n");
ppgtt_invalidate_pte(spt, &e);
break;
case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
/* We don't setup 64K shadow entry so far. */
WARN(1, "suspicious 64K gtt entry\n");
continue;
case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
gvt_vdbg_mm("invalidate 2M entry\n");
continue;
case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
WARN(1, "GVT doesn't support 2M/1GB page\n");
WARN(1, "GVT doesn't support 1GB page\n");
continue;
case GTT_TYPE_PPGTT_PML4_ENTRY:
case GTT_TYPE_PPGTT_PDP_ENTRY:
@ -899,6 +1021,22 @@ fail:
return ret;
}
static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) {
u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
GAMW_ECO_ENABLE_64K_IPS_FIELD;
return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
} else if (INTEL_GEN(dev_priv) >= 11) {
/* 64K paging only controlled by IPS bit in PTE now. */
return true;
} else
return false;
}
static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
@ -906,35 +1044,54 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
{
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *spt = NULL;
bool ips = false;
int ret;
GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
if (spt)
if (spt) {
ppgtt_get_spt(spt);
else {
if (ips != spt->guest_page.pde_ips) {
spt->guest_page.pde_ips = ips;
gvt_dbg_mm("reshadow PDE since ips changed\n");
clear_page(spt->shadow_page.vaddr);
ret = ppgtt_populate_spt(spt);
if (ret) {
ppgtt_put_spt(spt);
goto err;
}
}
} else {
int type = get_next_pt_type(we->type);
spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we));
spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
if (IS_ERR(spt)) {
ret = PTR_ERR(spt);
goto fail;
goto err;
}
ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
if (ret)
goto fail;
goto err_free_spt;
ret = ppgtt_populate_spt(spt);
if (ret)
goto fail;
goto err_free_spt;
trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
spt->shadow_page.type);
}
return spt;
fail:
err_free_spt:
ppgtt_free_spt(spt);
err:
gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
spt, we->val64, we->type);
return ERR_PTR(ret);
@ -948,16 +1105,118 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
se->type = ge->type;
se->val64 = ge->val64;
/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
ops->clear_ips(se);
ops->set_pfn(se, s->shadow_page.mfn);
}
/**
* Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition,
* negtive if found err.
*/
static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
struct intel_gvt_gtt_entry *entry)
{
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
unsigned long pfn;
if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M))
return 0;
pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
if (pfn == INTEL_GVT_INVALID_ADDR)
return -EINVAL;
return PageTransHuge(pfn_to_page(pfn));
}
static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
struct intel_gvt_gtt_entry *se)
{
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *sub_spt;
struct intel_gvt_gtt_entry sub_se;
unsigned long start_gfn;
dma_addr_t dma_addr;
unsigned long sub_index;
int ret;
gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
start_gfn = ops->get_pfn(se);
sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
if (IS_ERR(sub_spt))
return PTR_ERR(sub_spt);
for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
start_gfn + sub_index, PAGE_SIZE, &dma_addr);
if (ret) {
ppgtt_invalidate_spt(spt);
return ret;
}
sub_se.val64 = se->val64;
/* Copy the PAT field from PDE. */
sub_se.val64 &= ~_PAGE_PAT;
sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
}
/* Clear dirty field. */
se->val64 &= ~_PAGE_DIRTY;
ops->clear_pse(se);
ops->clear_ips(se);
ops->set_pfn(se, sub_spt->shadow_page.mfn);
ppgtt_set_shadow_entry(spt, se, index);
return 0;
}
static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
struct intel_gvt_gtt_entry *se)
{
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_gvt_gtt_entry entry = *se;
unsigned long start_gfn;
dma_addr_t dma_addr;
int i, ret;
gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
start_gfn = ops->get_pfn(se);
entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
ops->set_64k_splited(&entry);
for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
start_gfn + i, PAGE_SIZE, &dma_addr);
if (ret)
return ret;
ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
ppgtt_set_shadow_entry(spt, &entry, index + i);
}
return 0;
}
static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
struct intel_gvt_gtt_entry *ge)
{
struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
struct intel_gvt_gtt_entry se = *ge;
unsigned long gfn;
unsigned long gfn, page_size = PAGE_SIZE;
dma_addr_t dma_addr;
int ret;
@ -970,16 +1229,33 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
gvt_vdbg_mm("shadow 4K gtt entry\n");
break;
case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
gvt_vdbg_mm("shadow 64K gtt entry\n");
/*
* The layout of 64K page is special, the page size is
* controlled by uper PDE. To be simple, we always split
* 64K page to smaller 4K pages in shadow PT.
*/
return split_64KB_gtt_entry(vgpu, spt, index, &se);
case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
gvt_vdbg_mm("shadow 2M gtt entry\n");
ret = is_2MB_gtt_possible(vgpu, ge);
if (ret == 0)
return split_2MB_gtt_entry(vgpu, spt, index, &se);
else if (ret < 0)
return ret;
page_size = I915_GTT_PAGE_SIZE_2M;
break;
case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n");
gvt_vgpu_err("GVT doesn't support 1GB entry\n");
return -EINVAL;
default:
GEM_BUG_ON(1);
};
/* direct shadow */
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
&dma_addr);
if (ret)
return -ENXIO;
@ -1062,8 +1338,12 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
ret = ppgtt_invalidate_spt(s);
if (ret)
goto fail;
} else
} else {
/* We don't setup 64K shadow entry so far. */
WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
"suspicious 64K entry\n");
ppgtt_invalidate_pte(spt, se);
}
return 0;
fail:
@ -1286,7 +1566,7 @@ static int ppgtt_handle_guest_write_page_table(
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_gvt_gtt_entry old_se;
int new_present;
int ret;
int i, ret;
new_present = ops->test_present(we);
@ -1308,8 +1588,27 @@ static int ppgtt_handle_guest_write_page_table(
goto fail;
if (!new_present) {
ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index);
/* For 64KB splited entries, we need clear them all. */
if (ops->test_64k_splited(&old_se) &&
!(index % GTT_64K_PTE_STRIDE)) {
gvt_vdbg_mm("remove splited 64K shadow entries\n");
for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
ops->clear_64k_splited(&old_se);
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index + i);
}
} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
ops->clear_pse(&old_se);
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index);
} else {
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index);
}
}
return 0;
@ -1391,7 +1690,17 @@ static int ppgtt_handle_guest_write_page_table_bytes(
ppgtt_get_guest_entry(spt, &we, index);
ops->test_pse(&we);
/*
* For page table which has 64K gtt entry, only PTE#0, PTE#16,
* PTE#32, ... PTE#496 are used. Unused PTEs update should be
* ignored.
*/
if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
(index % GTT_64K_PTE_STRIDE)) {
gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
index);
return 0;
}
if (bytes == info->gtt_entry_size) {
ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
@ -1880,7 +2189,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
}
ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
&dma_addr);
PAGE_SIZE, &dma_addr);
if (ret) {
gvt_vgpu_err("fail to populate guest ggtt entry\n");
/* guest driver may read/write the entry when partial

View File

@ -63,6 +63,12 @@ struct intel_gvt_gtt_pte_ops {
void (*clear_present)(struct intel_gvt_gtt_entry *e);
void (*set_present)(struct intel_gvt_gtt_entry *e);
bool (*test_pse)(struct intel_gvt_gtt_entry *e);
void (*clear_pse)(struct intel_gvt_gtt_entry *e);
bool (*test_ips)(struct intel_gvt_gtt_entry *e);
void (*clear_ips)(struct intel_gvt_gtt_entry *e);
bool (*test_64k_splited)(struct intel_gvt_gtt_entry *e);
void (*clear_64k_splited)(struct intel_gvt_gtt_entry *e);
void (*set_64k_splited)(struct intel_gvt_gtt_entry *e);
void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn);
unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e);
};
@ -95,6 +101,7 @@ typedef enum {
GTT_TYPE_GGTT_PTE,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_64K_ENTRY,
GTT_TYPE_PPGTT_PTE_2M_ENTRY,
GTT_TYPE_PPGTT_PTE_1G_ENTRY,
@ -220,6 +227,7 @@ struct intel_vgpu_ppgtt_spt {
struct {
intel_gvt_gtt_type_t type;
bool pde_ips; /* for 64KB PTEs */
void *vaddr;
struct page *page;
unsigned long mfn;
@ -227,6 +235,7 @@ struct intel_vgpu_ppgtt_spt {
struct {
intel_gvt_gtt_type_t type;
bool pde_ips; /* for 64KB PTEs */
unsigned long gfn;
unsigned long write_cnt;
struct intel_vgpu_oos_page *oos_page;

View File

@ -468,3 +468,7 @@ out_clean_idr:
kfree(gvt);
return ret;
}
#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
MODULE_SOFTDEP("pre: kvmgt");
#endif

View File

@ -210,6 +210,31 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
return 0;
}
static int gamw_echo_dev_rw_ia_write(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
u32 ips = (*(u32 *)p_data) & GAMW_ECO_ENABLE_64K_IPS_FIELD;
if (INTEL_GEN(vgpu->gvt->dev_priv) <= 10) {
if (ips == GAMW_ECO_ENABLE_64K_IPS_FIELD)
gvt_dbg_core("vgpu%d: ips enabled\n", vgpu->id);
else if (!ips)
gvt_dbg_core("vgpu%d: ips disabled\n", vgpu->id);
else {
/* All engines must be enabled together for vGPU,
* since we don't know which engine the ppgtt will
* bind to when shadowing.
*/
gvt_vgpu_err("Unsupported IPS setting %x, cannot enable 64K gtt.\n",
ips);
return -EINVAL;
}
}
write_vreg(vgpu, offset, p_data, bytes);
return 0;
}
static int fence_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
void *p_data, unsigned int bytes)
{
@ -1559,6 +1584,13 @@ static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu,
return 0;
}
static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
vgpu_vreg(vgpu, offset) = 0;
return 0;
}
static int mmio_read_from_hw(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
@ -1769,7 +1801,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL,
gamw_echo_dev_rw_ia_write);
MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
@ -3155,6 +3189,9 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT);
MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT);
MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
MMIO_D(RC6_CTX_BASE, D_BXT);
MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT);

View File

@ -53,7 +53,7 @@ struct intel_gvt_mpt {
unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr);
unsigned long size, dma_addr_t *dma_addr);
void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,

View File

@ -94,6 +94,7 @@ struct gvt_dma {
struct rb_node dma_addr_node;
gfn_t gfn;
dma_addr_t dma_addr;
unsigned long size;
struct kref ref;
};
@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
static void intel_vgpu_release_work(struct work_struct *work);
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t *dma_addr)
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct page *page;
unsigned long pfn;
int total_pages;
int npage;
int ret;
/* Pin the page first. */
ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
gfn, ret);
return -EINVAL;
total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage;
ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1);
WARN_ON(ret != 1);
}
}
/* Pin a normal or compound guest page for dma. */
static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size, struct page **page)
{
unsigned long base_pfn = 0;
int total_pages;
int npage;
int ret;
total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
/*
* We pin the pages one-by-one to avoid allocating a big arrary
* on stack to hold pfns.
*/
for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage;
unsigned long pfn;
ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1,
IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
cur_gfn, ret);
goto err;
}
if (!pfn_valid(pfn)) {
gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
npage++;
ret = -EFAULT;
goto err;
}
if (npage == 0)
base_pfn = pfn;
else if (base_pfn + npage != pfn) {
gvt_vgpu_err("The pages are not continuous\n");
ret = -EINVAL;
npage++;
goto err;
}
}
*page = pfn_to_page(base_pfn);
return 0;
err:
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
return ret;
}
static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t *dma_addr, unsigned long size)
{
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
struct page *page = NULL;
int ret;
ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
if (ret)
return ret;
/* Setup DMA mapping. */
page = pfn_to_page(pfn);
*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, *dma_addr)) {
gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
return -ENOMEM;
*dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
ret = dma_mapping_error(dev, *dma_addr);
if (ret) {
gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
page_to_pfn(page), ret);
gvt_unpin_guest_page(vgpu, gfn, size);
}
return 0;
return ret;
}
static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t dma_addr)
dma_addr_t dma_addr, unsigned long size)
{
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
int ret;
dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
WARN_ON(ret != 1);
dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
gvt_unpin_guest_page(vgpu, gfn, size);
}
static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
}
static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
dma_addr_t dma_addr)
dma_addr_t dma_addr, unsigned long size)
{
struct gvt_dma *new, *itr;
struct rb_node **link, *parent = NULL;
@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
new->vgpu = vgpu;
new->gfn = gfn;
new->dma_addr = dma_addr;
new->size = size;
kref_init(&new->ref);
/* gfn_cache maps gfn to struct gvt_dma. */
@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
break;
}
dma = rb_entry(node, struct gvt_dma, gfn_node);
gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr);
gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
__gvt_cache_remove_entry(vgpu, dma);
mutex_unlock(&vgpu->vdev.cache_lock);
}
@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
if (!entry)
continue;
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(vgpu, entry);
}
mutex_unlock(&vgpu->vdev.cache_lock);
@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
}
int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr)
unsigned long size, dma_addr_t *dma_addr)
{
struct kvmgt_guest_info *info;
struct intel_vgpu *vgpu;
@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
entry = __gvt_cache_find_gfn(info->vgpu, gfn);
if (!entry) {
ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
if (ret)
goto err_unlock;
ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr);
ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size);
if (ret)
goto err_unmap;
} else {
@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
return 0;
err_unmap:
gvt_dma_unmap_page(vgpu, gfn, *dma_addr);
gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
err_unlock:
mutex_unlock(&info->vgpu->vdev.cache_lock);
return ret;
@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref)
{
struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(entry->vgpu, entry);
}

View File

@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
/**
* intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
* @vgpu: a vGPU
* @gpfn: guest pfn
* @gfn: guest pfn
* @size: page size
* @dma_addr: retrieve allocated dma addr
*
* Returns:
* 0 on success, negative error code if failed.
*/
static inline int intel_gvt_hypervisor_dma_map_guest_page(
struct intel_vgpu *vgpu, unsigned long gfn,
struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size,
dma_addr_t *dma_addr)
{
return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn,
return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size,
dma_addr);
}

View File

@ -46,6 +46,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT;
vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
vgpu_aperture_gmadr_base(vgpu);