IB/hfi1: Optimize devdata cachelines
Profiling shows hot path struct members that need to be in a minimum set of cachelines. Group these struct member in the same cacheline: sc2vl_lock sc2vl rhf_rcv_function_map rcv_limit rhf_offset Group these struct member in the same cacheline: process_pio_send process_dma_send pport rcd int_counter flags num_pports first_user_ctxt Fill holes in struct hfi1_devdata revealed by pahole. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
		
							parent
							
								
									a6cd5f08e0
								
							
						
					
					
						commit
						6e768f0682
					
				| @ -852,32 +852,29 @@ struct hfi1_devdata { | ||||
| 	u8 __iomem *kregend; | ||||
| 	/* physical address of chip for io_remap, etc. */ | ||||
| 	resource_size_t physaddr; | ||||
| 	/* receive context data */ | ||||
| 	struct hfi1_ctxtdata **rcd; | ||||
| 	/* Per VL data. Enough for all VLs but not all elements are set/used. */ | ||||
| 	struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; | ||||
| 	/* send context data */ | ||||
| 	struct send_context_info *send_contexts; | ||||
| 	/* map hardware send contexts to software index */ | ||||
| 	u8 *hw_to_sw; | ||||
| 	/* spinlock for allocating and releasing send context resources */ | ||||
| 	spinlock_t sc_lock; | ||||
| 	/* Per VL data. Enough for all VLs but not all elements are set/used. */ | ||||
| 	struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; | ||||
| 	/* lock for pio_map */ | ||||
| 	spinlock_t pio_map_lock; | ||||
| 	/* Send Context initialization lock. */ | ||||
| 	spinlock_t sc_init_lock; | ||||
| 	/* lock for sdma_map */ | ||||
| 	spinlock_t                          sde_map_lock; | ||||
| 	/* array of kernel send contexts */ | ||||
| 	struct send_context **kernel_send_context; | ||||
| 	/* array of vl maps */ | ||||
| 	struct pio_vl_map __rcu *pio_map; | ||||
| 	/* seqlock for sc2vl */ | ||||
| 	seqlock_t sc2vl_lock; | ||||
| 	u64 sc2vl[4]; | ||||
| 	/* Send Context initialization lock. */ | ||||
| 	spinlock_t sc_init_lock; | ||||
| 	/* default flags to last descriptor */ | ||||
| 	u64 default_desc1; | ||||
| 
 | ||||
| 	/* fields common to all SDMA engines */ | ||||
| 
 | ||||
| 	/* default flags to last descriptor */ | ||||
| 	u64 default_desc1; | ||||
| 	volatile __le64                    *sdma_heads_dma; /* DMA'ed by chip */ | ||||
| 	dma_addr_t                          sdma_heads_phys; | ||||
| 	void                               *sdma_pad_dma; /* DMA'ed by chip */ | ||||
| @ -888,8 +885,6 @@ struct hfi1_devdata { | ||||
| 	u32                                 chip_sdma_engines; | ||||
| 	/* num used */ | ||||
| 	u32                                 num_sdma; | ||||
| 	/* lock for sdma_map */ | ||||
| 	spinlock_t                          sde_map_lock; | ||||
| 	/* array of engines sized by num_sdma */ | ||||
| 	struct sdma_engine                 *per_sdma; | ||||
| 	/* array of vl maps */ | ||||
| @ -898,14 +893,11 @@ struct hfi1_devdata { | ||||
| 	wait_queue_head_t		  sdma_unfreeze_wq; | ||||
| 	atomic_t			  sdma_unfreeze_count; | ||||
| 
 | ||||
| 	u32 lcb_access_count;		/* count of LCB users */ | ||||
| 
 | ||||
| 	/* common data between shared ASIC HFIs in this OS */ | ||||
| 	struct hfi1_asic_data *asic_data; | ||||
| 
 | ||||
| 	/* hfi1_pportdata, points to array of (physical) port-specific
 | ||||
| 	 * data structs, indexed by pidx (0..n-1) | ||||
| 	 */ | ||||
| 	struct hfi1_pportdata *pport; | ||||
| 
 | ||||
| 	/* mem-mapped pointer to base of PIO buffers */ | ||||
| 	void __iomem *piobase; | ||||
| 	/*
 | ||||
| @ -922,20 +914,13 @@ struct hfi1_devdata { | ||||
| 	/* send context numbers and sizes for each type */ | ||||
| 	struct sc_config_sizes sc_sizes[SC_MAX]; | ||||
| 
 | ||||
| 	u32 lcb_access_count;		/* count of LCB users */ | ||||
| 
 | ||||
| 	char *boardname; /* human readable board info */ | ||||
| 
 | ||||
| 	/* device (not port) flags, basically device capabilities */ | ||||
| 	u32 flags; | ||||
| 
 | ||||
| 	/* reset value */ | ||||
| 	u64 z_int_counter; | ||||
| 	u64 z_rcv_limit; | ||||
| 	u64 z_send_schedule; | ||||
| 	/* percpu int_counter */ | ||||
| 	u64 __percpu *int_counter; | ||||
| 	u64 __percpu *rcv_limit; | ||||
| 
 | ||||
| 	u64 __percpu *send_schedule; | ||||
| 	/* number of receive contexts in use by the driver */ | ||||
| 	u32 num_rcv_contexts; | ||||
| @ -950,6 +935,7 @@ struct hfi1_devdata { | ||||
| 	/* base receive interrupt timeout, in CSR units */ | ||||
| 	u32 rcv_intr_timeout_csr; | ||||
| 
 | ||||
| 	u32 freezelen; /* max length of freezemsg */ | ||||
| 	u64 __iomem *egrtidbase; | ||||
| 	spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ | ||||
| 	spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ | ||||
| @ -971,7 +957,6 @@ struct hfi1_devdata { | ||||
| 	 * IB link status cheaply | ||||
| 	 */ | ||||
| 	struct hfi1_status *status; | ||||
| 	u32 freezelen; /* max length of freezemsg */ | ||||
| 
 | ||||
| 	/* revision register shadow */ | ||||
| 	u64 revision; | ||||
| @ -999,6 +984,8 @@ struct hfi1_devdata { | ||||
| 	u16 rcvegrbufsize_shift; | ||||
| 	/* both sides of the PCIe link are gen3 capable */ | ||||
| 	u8 link_gen3_capable; | ||||
| 	/* default link down value (poll/sleep) */ | ||||
| 	u8 link_default; | ||||
| 	/* localbus width (1, 2,4,8,16,32) from config space  */ | ||||
| 	u32 lbus_width; | ||||
| 	/* localbus speed in MHz */ | ||||
| @ -1034,8 +1021,6 @@ struct hfi1_devdata { | ||||
| 	u8 hfi1_id; | ||||
| 	/* implementation code */ | ||||
| 	u8 icode; | ||||
| 	/* default link down value (poll/sleep) */ | ||||
| 	u8 link_default; | ||||
| 	/* vAU of this device */ | ||||
| 	u8 vau; | ||||
| 	/* vCU of this device */ | ||||
| @ -1046,27 +1031,17 @@ struct hfi1_devdata { | ||||
| 	u16 vl15_init; | ||||
| 
 | ||||
| 	/* Misc small ints */ | ||||
| 	/* Number of physical ports available */ | ||||
| 	u8 num_pports; | ||||
| 	/* Lowest context number which can be used by user processes */ | ||||
| 	u8 first_user_ctxt; | ||||
| 	u8 n_krcv_queues; | ||||
| 	u8 qos_shift; | ||||
| 	u8 qpn_mask; | ||||
| 
 | ||||
| 	u16 rhf_offset; /* offset of RHF within receive header entry */ | ||||
| 	u16 irev;	/* implementation revision */ | ||||
| 	u16 dc8051_ver; /* 8051 firmware version */ | ||||
| 
 | ||||
| 	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ | ||||
| 	struct platform_config platform_config; | ||||
| 	struct platform_config_cache pcfg_cache; | ||||
| 
 | ||||
| 	struct diag_client *diag_client; | ||||
| 	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ | ||||
| 
 | ||||
| 	u8 psxmitwait_supported; | ||||
| 	/* cycle length of PS* counters in HW (in picoseconds) */ | ||||
| 	u16 psxmitwait_check_rate; | ||||
| 
 | ||||
| 	/* MSI-X information */ | ||||
| 	struct hfi1_msix_entry *msix_entries; | ||||
| @ -1081,6 +1056,9 @@ struct hfi1_devdata { | ||||
| 
 | ||||
| 	struct rcv_array_data rcv_entries; | ||||
| 
 | ||||
| 	/* cycle length of PS* counters in HW (in picoseconds) */ | ||||
| 	u16 psxmitwait_check_rate; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * 64 bit synthetic counters | ||||
| 	 */ | ||||
| @ -1113,11 +1091,11 @@ struct hfi1_devdata { | ||||
| 	struct err_info_rcvport err_info_rcvport; | ||||
| 	struct err_info_constraint err_info_rcv_constraint; | ||||
| 	struct err_info_constraint err_info_xmit_constraint; | ||||
| 	u8 err_info_uncorrectable; | ||||
| 	u8 err_info_fmconfig; | ||||
| 
 | ||||
| 	atomic_t drop_packet; | ||||
| 	u8 do_drop; | ||||
| 	u8 err_info_uncorrectable; | ||||
| 	u8 err_info_fmconfig; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Software counters for the status bits defined by the | ||||
| @ -1140,47 +1118,71 @@ struct hfi1_devdata { | ||||
| 	u64 sw_cce_err_status_aggregate; | ||||
| 	/* Software counter that aggregates all bypass packet rcv errors */ | ||||
| 	u64 sw_rcv_bypass_packet_errors; | ||||
| 	/* receive interrupt functions */ | ||||
| 	rhf_rcv_function_ptr *rhf_rcv_function_map; | ||||
| 	/* receive interrupt function */ | ||||
| 	rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; | ||||
| 
 | ||||
| 	/* Save the enabled LCB error bits */ | ||||
| 	u64 lcb_err_en; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Handlers for outgoing data so that snoop/capture does not | ||||
| 	 * have to have its hooks in the send path | ||||
| 	 */ | ||||
| 	send_routine process_pio_send; | ||||
| 	send_routine process_pio_send ____cacheline_aligned_in_smp; | ||||
| 	send_routine process_dma_send; | ||||
| 	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, | ||||
| 				u64 pbc, const void *from, size_t count); | ||||
| 	/* hfi1_pportdata, points to array of (physical) port-specific
 | ||||
| 	 * data structs, indexed by pidx (0..n-1) | ||||
| 	 */ | ||||
| 	struct hfi1_pportdata *pport; | ||||
| 	/* receive context data */ | ||||
| 	struct hfi1_ctxtdata **rcd; | ||||
| 	u64 __percpu *int_counter; | ||||
| 	/* device (not port) flags, basically device capabilities */ | ||||
| 	u16 flags; | ||||
| 	/* Number of physical ports available */ | ||||
| 	u8 num_pports; | ||||
| 	/* Lowest context number which can be used by user processes */ | ||||
| 	u8 first_user_ctxt; | ||||
| 	/* adding a new field here would make it part of this cacheline */ | ||||
| 
 | ||||
| 	/* seqlock for sc2vl */ | ||||
| 	seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; | ||||
| 	u64 sc2vl[4]; | ||||
| 	/* receive interrupt functions */ | ||||
| 	rhf_rcv_function_ptr *rhf_rcv_function_map; | ||||
| 	u64 __percpu *rcv_limit; | ||||
| 	u16 rhf_offset; /* offset of RHF within receive header entry */ | ||||
| 	/* adding a new field here would make it part of this cacheline */ | ||||
| 
 | ||||
| 	/* OUI comes from the HW. Used everywhere as 3 separate bytes. */ | ||||
| 	u8 oui1; | ||||
| 	u8 oui2; | ||||
| 	u8 oui3; | ||||
| 	u8 dc_shutdown; | ||||
| 
 | ||||
| 	/* Timer and counter used to detect RcvBufOvflCnt changes */ | ||||
| 	struct timer_list rcverr_timer; | ||||
| 	u32 rcv_ovfl_cnt; | ||||
| 
 | ||||
| 	wait_queue_head_t event_queue; | ||||
| 
 | ||||
| 	/* Save the enabled LCB error bits */ | ||||
| 	u64 lcb_err_en; | ||||
| 	u8 dc_shutdown; | ||||
| 
 | ||||
| 	/* receive context tail dummy address */ | ||||
| 	__le64 *rcvhdrtail_dummy_kvaddr; | ||||
| 	dma_addr_t rcvhdrtail_dummy_dma; | ||||
| 
 | ||||
| 	bool eprom_available;	/* true if EPROM is available for this device */ | ||||
| 	bool aspm_supported;	/* Does HW support ASPM */ | ||||
| 	bool aspm_enabled;	/* ASPM state: enabled/disabled */ | ||||
| 	u32 rcv_ovfl_cnt; | ||||
| 	/* Serialize ASPM enable/disable between multiple verbs contexts */ | ||||
| 	spinlock_t aspm_lock; | ||||
| 	/* Number of verbs contexts which have disabled ASPM */ | ||||
| 	atomic_t aspm_disabled_cnt; | ||||
| 
 | ||||
| 	struct hfi1_affinity *affinity; | ||||
| 	bool eprom_available;	/* true if EPROM is available for this device */ | ||||
| 	bool aspm_supported;	/* Does HW support ASPM */ | ||||
| 	bool aspm_enabled;	/* ASPM state: enabled/disabled */ | ||||
| 	struct rhashtable sdma_rht; | ||||
| 
 | ||||
| 	struct kobject kobj; | ||||
| }; | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user