forked from Minki/linux
IB/hfi1: Optimize devdata cachelines
Profiling shows hot path struct members that need to be in a minimum set of cachelines. Group these struct member in the same cacheline: sc2vl_lock sc2vl rhf_rcv_function_map rcv_limit rhf_offset Group these struct member in the same cacheline: process_pio_send process_dma_send pport rcd int_counter flags num_pports first_user_ctxt Fill holes in struct hfi1_devdata revealed by pahole. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
a6cd5f08e0
commit
6e768f0682
@ -852,32 +852,29 @@ struct hfi1_devdata {
|
||||
u8 __iomem *kregend;
|
||||
/* physical address of chip for io_remap, etc. */
|
||||
resource_size_t physaddr;
|
||||
/* receive context data */
|
||||
struct hfi1_ctxtdata **rcd;
|
||||
/* Per VL data. Enough for all VLs but not all elements are set/used. */
|
||||
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
|
||||
/* send context data */
|
||||
struct send_context_info *send_contexts;
|
||||
/* map hardware send contexts to software index */
|
||||
u8 *hw_to_sw;
|
||||
/* spinlock for allocating and releasing send context resources */
|
||||
spinlock_t sc_lock;
|
||||
/* Per VL data. Enough for all VLs but not all elements are set/used. */
|
||||
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
|
||||
/* lock for pio_map */
|
||||
spinlock_t pio_map_lock;
|
||||
/* Send Context initialization lock. */
|
||||
spinlock_t sc_init_lock;
|
||||
/* lock for sdma_map */
|
||||
spinlock_t sde_map_lock;
|
||||
/* array of kernel send contexts */
|
||||
struct send_context **kernel_send_context;
|
||||
/* array of vl maps */
|
||||
struct pio_vl_map __rcu *pio_map;
|
||||
/* seqlock for sc2vl */
|
||||
seqlock_t sc2vl_lock;
|
||||
u64 sc2vl[4];
|
||||
/* Send Context initialization lock. */
|
||||
spinlock_t sc_init_lock;
|
||||
/* default flags to last descriptor */
|
||||
u64 default_desc1;
|
||||
|
||||
/* fields common to all SDMA engines */
|
||||
|
||||
/* default flags to last descriptor */
|
||||
u64 default_desc1;
|
||||
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
|
||||
dma_addr_t sdma_heads_phys;
|
||||
void *sdma_pad_dma; /* DMA'ed by chip */
|
||||
@ -888,8 +885,6 @@ struct hfi1_devdata {
|
||||
u32 chip_sdma_engines;
|
||||
/* num used */
|
||||
u32 num_sdma;
|
||||
/* lock for sdma_map */
|
||||
spinlock_t sde_map_lock;
|
||||
/* array of engines sized by num_sdma */
|
||||
struct sdma_engine *per_sdma;
|
||||
/* array of vl maps */
|
||||
@ -898,14 +893,11 @@ struct hfi1_devdata {
|
||||
wait_queue_head_t sdma_unfreeze_wq;
|
||||
atomic_t sdma_unfreeze_count;
|
||||
|
||||
u32 lcb_access_count; /* count of LCB users */
|
||||
|
||||
/* common data between shared ASIC HFIs in this OS */
|
||||
struct hfi1_asic_data *asic_data;
|
||||
|
||||
/* hfi1_pportdata, points to array of (physical) port-specific
|
||||
* data structs, indexed by pidx (0..n-1)
|
||||
*/
|
||||
struct hfi1_pportdata *pport;
|
||||
|
||||
/* mem-mapped pointer to base of PIO buffers */
|
||||
void __iomem *piobase;
|
||||
/*
|
||||
@ -922,20 +914,13 @@ struct hfi1_devdata {
|
||||
/* send context numbers and sizes for each type */
|
||||
struct sc_config_sizes sc_sizes[SC_MAX];
|
||||
|
||||
u32 lcb_access_count; /* count of LCB users */
|
||||
|
||||
char *boardname; /* human readable board info */
|
||||
|
||||
/* device (not port) flags, basically device capabilities */
|
||||
u32 flags;
|
||||
|
||||
/* reset value */
|
||||
u64 z_int_counter;
|
||||
u64 z_rcv_limit;
|
||||
u64 z_send_schedule;
|
||||
/* percpu int_counter */
|
||||
u64 __percpu *int_counter;
|
||||
u64 __percpu *rcv_limit;
|
||||
|
||||
u64 __percpu *send_schedule;
|
||||
/* number of receive contexts in use by the driver */
|
||||
u32 num_rcv_contexts;
|
||||
@ -950,6 +935,7 @@ struct hfi1_devdata {
|
||||
/* base receive interrupt timeout, in CSR units */
|
||||
u32 rcv_intr_timeout_csr;
|
||||
|
||||
u32 freezelen; /* max length of freezemsg */
|
||||
u64 __iomem *egrtidbase;
|
||||
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
|
||||
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
|
||||
@ -971,7 +957,6 @@ struct hfi1_devdata {
|
||||
* IB link status cheaply
|
||||
*/
|
||||
struct hfi1_status *status;
|
||||
u32 freezelen; /* max length of freezemsg */
|
||||
|
||||
/* revision register shadow */
|
||||
u64 revision;
|
||||
@ -999,6 +984,8 @@ struct hfi1_devdata {
|
||||
u16 rcvegrbufsize_shift;
|
||||
/* both sides of the PCIe link are gen3 capable */
|
||||
u8 link_gen3_capable;
|
||||
/* default link down value (poll/sleep) */
|
||||
u8 link_default;
|
||||
/* localbus width (1, 2,4,8,16,32) from config space */
|
||||
u32 lbus_width;
|
||||
/* localbus speed in MHz */
|
||||
@ -1034,8 +1021,6 @@ struct hfi1_devdata {
|
||||
u8 hfi1_id;
|
||||
/* implementation code */
|
||||
u8 icode;
|
||||
/* default link down value (poll/sleep) */
|
||||
u8 link_default;
|
||||
/* vAU of this device */
|
||||
u8 vau;
|
||||
/* vCU of this device */
|
||||
@ -1046,27 +1031,17 @@ struct hfi1_devdata {
|
||||
u16 vl15_init;
|
||||
|
||||
/* Misc small ints */
|
||||
/* Number of physical ports available */
|
||||
u8 num_pports;
|
||||
/* Lowest context number which can be used by user processes */
|
||||
u8 first_user_ctxt;
|
||||
u8 n_krcv_queues;
|
||||
u8 qos_shift;
|
||||
u8 qpn_mask;
|
||||
|
||||
u16 rhf_offset; /* offset of RHF within receive header entry */
|
||||
u16 irev; /* implementation revision */
|
||||
u16 dc8051_ver; /* 8051 firmware version */
|
||||
|
||||
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
|
||||
struct platform_config platform_config;
|
||||
struct platform_config_cache pcfg_cache;
|
||||
|
||||
struct diag_client *diag_client;
|
||||
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
|
||||
|
||||
u8 psxmitwait_supported;
|
||||
/* cycle length of PS* counters in HW (in picoseconds) */
|
||||
u16 psxmitwait_check_rate;
|
||||
|
||||
/* MSI-X information */
|
||||
struct hfi1_msix_entry *msix_entries;
|
||||
@ -1081,6 +1056,9 @@ struct hfi1_devdata {
|
||||
|
||||
struct rcv_array_data rcv_entries;
|
||||
|
||||
/* cycle length of PS* counters in HW (in picoseconds) */
|
||||
u16 psxmitwait_check_rate;
|
||||
|
||||
/*
|
||||
* 64 bit synthetic counters
|
||||
*/
|
||||
@ -1113,11 +1091,11 @@ struct hfi1_devdata {
|
||||
struct err_info_rcvport err_info_rcvport;
|
||||
struct err_info_constraint err_info_rcv_constraint;
|
||||
struct err_info_constraint err_info_xmit_constraint;
|
||||
u8 err_info_uncorrectable;
|
||||
u8 err_info_fmconfig;
|
||||
|
||||
atomic_t drop_packet;
|
||||
u8 do_drop;
|
||||
u8 err_info_uncorrectable;
|
||||
u8 err_info_fmconfig;
|
||||
|
||||
/*
|
||||
* Software counters for the status bits defined by the
|
||||
@ -1140,47 +1118,71 @@ struct hfi1_devdata {
|
||||
u64 sw_cce_err_status_aggregate;
|
||||
/* Software counter that aggregates all bypass packet rcv errors */
|
||||
u64 sw_rcv_bypass_packet_errors;
|
||||
/* receive interrupt functions */
|
||||
rhf_rcv_function_ptr *rhf_rcv_function_map;
|
||||
/* receive interrupt function */
|
||||
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
|
||||
|
||||
/* Save the enabled LCB error bits */
|
||||
u64 lcb_err_en;
|
||||
|
||||
/*
|
||||
* Handlers for outgoing data so that snoop/capture does not
|
||||
* have to have its hooks in the send path
|
||||
*/
|
||||
send_routine process_pio_send;
|
||||
send_routine process_pio_send ____cacheline_aligned_in_smp;
|
||||
send_routine process_dma_send;
|
||||
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
|
||||
u64 pbc, const void *from, size_t count);
|
||||
/* hfi1_pportdata, points to array of (physical) port-specific
|
||||
* data structs, indexed by pidx (0..n-1)
|
||||
*/
|
||||
struct hfi1_pportdata *pport;
|
||||
/* receive context data */
|
||||
struct hfi1_ctxtdata **rcd;
|
||||
u64 __percpu *int_counter;
|
||||
/* device (not port) flags, basically device capabilities */
|
||||
u16 flags;
|
||||
/* Number of physical ports available */
|
||||
u8 num_pports;
|
||||
/* Lowest context number which can be used by user processes */
|
||||
u8 first_user_ctxt;
|
||||
/* adding a new field here would make it part of this cacheline */
|
||||
|
||||
/* seqlock for sc2vl */
|
||||
seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
|
||||
u64 sc2vl[4];
|
||||
/* receive interrupt functions */
|
||||
rhf_rcv_function_ptr *rhf_rcv_function_map;
|
||||
u64 __percpu *rcv_limit;
|
||||
u16 rhf_offset; /* offset of RHF within receive header entry */
|
||||
/* adding a new field here would make it part of this cacheline */
|
||||
|
||||
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
|
||||
u8 oui1;
|
||||
u8 oui2;
|
||||
u8 oui3;
|
||||
u8 dc_shutdown;
|
||||
|
||||
/* Timer and counter used to detect RcvBufOvflCnt changes */
|
||||
struct timer_list rcverr_timer;
|
||||
u32 rcv_ovfl_cnt;
|
||||
|
||||
wait_queue_head_t event_queue;
|
||||
|
||||
/* Save the enabled LCB error bits */
|
||||
u64 lcb_err_en;
|
||||
u8 dc_shutdown;
|
||||
|
||||
/* receive context tail dummy address */
|
||||
__le64 *rcvhdrtail_dummy_kvaddr;
|
||||
dma_addr_t rcvhdrtail_dummy_dma;
|
||||
|
||||
bool eprom_available; /* true if EPROM is available for this device */
|
||||
bool aspm_supported; /* Does HW support ASPM */
|
||||
bool aspm_enabled; /* ASPM state: enabled/disabled */
|
||||
u32 rcv_ovfl_cnt;
|
||||
/* Serialize ASPM enable/disable between multiple verbs contexts */
|
||||
spinlock_t aspm_lock;
|
||||
/* Number of verbs contexts which have disabled ASPM */
|
||||
atomic_t aspm_disabled_cnt;
|
||||
|
||||
struct hfi1_affinity *affinity;
|
||||
bool eprom_available; /* true if EPROM is available for this device */
|
||||
bool aspm_supported; /* Does HW support ASPM */
|
||||
bool aspm_enabled; /* ASPM state: enabled/disabled */
|
||||
struct rhashtable sdma_rht;
|
||||
|
||||
struct kobject kobj;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user