1f106ff0ea
IOTLB device presence, iommu coherency and snooping are boolean capabilities. Use them as bits and keep them adjacent. Structure layout before the reorg. $ pahole -C dmar_domain drivers/iommu/intel/dmar.o struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ bool has_iotlb_device; /* 772 1 */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_coherency; /* 3148 4 */ int iommu_snooping; /* 3152 4 */ int iommu_count; /* 3156 4 */ int iommu_superpage; /* 3160 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3168 8 */ u32 default_pasid; /* 3176 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3184 72 */ /* size: 3256, cachelines: 51, members: 18 */ /* sum members: 3245, holes: 3, sum holes: 11 */ /* forced alignments: 1 */ /* last cacheline: 56 bytes */ } __attribute__((__aligned__(8))); After arranging it for natural padding and to make flags as u8 bits, it saves 8 bytes for the struct. struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ u8 has_iotlb_device:1; /* 772: 0 1 */ u8 iommu_coherency:1; /* 772: 1 1 */ u8 iommu_snooping:1; /* 772: 2 1 */ /* XXX 5 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_count; /* 3148 4 */ int iommu_superpage; /* 3152 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3160 8 */ u32 default_pasid; /* 3168 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3176 72 */ /* size: 3248, cachelines: 51, members: 18 */ /* sum members: 3236, holes: 3, sum holes: 11 */ /* sum bitfield members: 3 bits, bit holes: 1, sum bit holes: 5 bits */ /* forced alignments: 1 */ /* last cacheline: 48 bytes */ } __attribute__((__aligned__(8))); Signed-off-by: Parav Pandit <parav@nvidia.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-20-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de> |
||
---|---|---|
.. | ||
amd | ||
arm | ||
intel | ||
dma-iommu.c | ||
exynos-iommu.c | ||
fsl_pamu_domain.c | ||
fsl_pamu_domain.h | ||
fsl_pamu.c | ||
fsl_pamu.h | ||
hyperv-iommu.c | ||
io-pgfault.c | ||
io-pgtable-arm-v7s.c | ||
io-pgtable-arm.c | ||
io-pgtable-arm.h | ||
io-pgtable.c | ||
ioasid.c | ||
iommu-debugfs.c | ||
iommu-sva-lib.c | ||
iommu-sva-lib.h | ||
iommu-sysfs.c | ||
iommu-traces.c | ||
iommu.c | ||
iova.c | ||
ipmmu-vmsa.c | ||
irq_remapping.c | ||
irq_remapping.h | ||
Kconfig | ||
Makefile | ||
msm_iommu_hw-8xxx.h | ||
msm_iommu.c | ||
msm_iommu.h | ||
mtk_iommu_v1.c | ||
mtk_iommu.c | ||
mtk_iommu.h | ||
of_iommu.c | ||
omap-iommu-debug.c | ||
omap-iommu.c | ||
omap-iommu.h | ||
omap-iopgtable.h | ||
rockchip-iommu.c | ||
s390-iommu.c | ||
sprd-iommu.c | ||
sun50i-iommu.c | ||
tegra-gart.c | ||
tegra-smmu.c | ||
virtio-iommu.c |