From d84c4d194ebad0f5d327da72404c37c7de2c1714 Mon Sep 17 00:00:00 2001 From: Jimmy Kizito <Jimmy.Kizito@amd.com> Date: Thu, 14 Apr 2022 09:49:37 -0400 Subject: [PATCH 01/46] drm/amd/display: Update link training fallback behaviour. [Why] Some displays may need several link training attempts before link training succeeds. [How] If training succeeds after falling back to lower link bandwidth, retry at original link bandwidth instead of abandoning link training whenever link bandwidth is less than stream bandwidth. Reviewed-by: Jun Lei <Jun.Lei@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Jimmy Kizito <Jimmy.Kizito@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 77 +++++++++++++------ 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 975d631534b5..d8de8dbf3676 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2783,31 +2783,37 @@ bool perform_link_training_with_retries( struct dc_link *link = stream->link; enum dp_panel_mode panel_mode = dp_get_panel_mode(link); enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0; - struct dc_link_settings current_setting = *link_setting; + struct dc_link_settings cur_link_settings = *link_setting; const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); int fail_count = 0; + bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */ + bool is_link_bw_min = /* RBR x 1 */ + (cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE); dp_trace_commit_lt_init(link); - if (dp_get_link_encoding_format(¤t_setting) == DP_8b_10b_ENCODING) + if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING) /* We need to do this before the link training to ensure the idle * pattern in SST mode will be sent right after the link training */ link_hwss->setup_stream_encoder(pipe_ctx); dp_trace_set_lt_start_timestamp(link, false); - for (j = 0; j < attempts; ++j) { + j = 0; + while (j < attempts && fail_count < (attempts * 10)) { - DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d\n", - __func__, (unsigned int)j + 1, attempts); + DC_LOG_HW_LINK_TRAINING("%s: Beginning link training attempt %u of %d @ rate(%d) x lane(%d)\n", + __func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, + cur_link_settings.lane_count); dp_enable_link_phy( link, &pipe_ctx->link_res, signal, pipe_ctx->clock_source->id, - ¤t_setting); + &cur_link_settings); if (stream->sink_patches.dppowerup_delay > 0) { int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay; @@ -2832,30 +2838,30 @@ bool perform_link_training_with_retries( dp_set_panel_mode(link, panel_mode); if (link->aux_access_disabled) { - dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, ¤t_setting); + dc_link_dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings); return true; } else { /** @todo Consolidate USB4 DP and DPx.x training. */ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dc_link_dpia_perform_link_training(link, &pipe_ctx->link_res, - ¤t_setting, + &cur_link_settings, skip_video_pattern); /* Transmit idle pattern once training successful. */ - if (status == LINK_TRAINING_SUCCESS) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) dp_set_hw_test_pattern(link, &pipe_ctx->link_res, DP_TEST_PATTERN_VIDEO_MODE, NULL, 0); } else { status = dc_link_dp_perform_link_training(link, &pipe_ctx->link_res, - ¤t_setting, + &cur_link_settings, skip_video_pattern); } dp_trace_lt_total_count_increment(link, false); dp_trace_lt_result_update(link, status, false); dp_trace_set_lt_end_timestamp(link, false); - if (status == LINK_TRAINING_SUCCESS) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) return true; } @@ -2866,8 +2872,9 @@ bool perform_link_training_with_retries( if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY) break; - DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n", - __func__, (unsigned int)j + 1, attempts); + DC_LOG_WARNING("%s: Link training attempt %u of %d failed @ rate(%d) x lane(%d)\n", + __func__, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, + cur_link_settings.lane_count); dp_disable_link_phy(link, &pipe_ctx->link_res, signal); @@ -2876,27 +2883,49 @@ bool perform_link_training_with_retries( enum dc_connection_type type = dc_connection_none; dc_link_detect_sink(link, &type); - if (type == dc_connection_none) + if (type == dc_connection_none) { + DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__); break; - } else if (do_fallback) { + } + } + + /* Try to train again at original settings if: + * - not falling back between training attempts; + * - aborted previous attempt due to reasons other than sink unplug; + * - successfully trained but at a link rate lower than that required by stream; + * - reached minimum link bandwidth. + */ + if (!do_fallback || (status == LINK_TRAINING_ABORT) || + (status == LINK_TRAINING_SUCCESS && is_link_bw_low) || + is_link_bw_min) { + j++; + cur_link_settings = *link_setting; + delay_between_attempts += LINK_TRAINING_RETRY_DELAY; + is_link_bw_low = false; + is_link_bw_min = (cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE); + + } else if (do_fallback) { /* Try training at lower link bandwidth if doing fallback. */ uint32_t req_bw; uint32_t link_bw; - decide_fallback_link_setting(link, *link_setting, ¤t_setting, status); - /* Fail link training if reduced link bandwidth no longer meets - * stream requirements. + decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status); + /* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to + * minimum link bandwidth. */ req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing); - link_bw = dc_link_bandwidth_kbps(link, ¤t_setting); - if (req_bw > link_bw) - break; + link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings); + is_link_bw_low = (req_bw > link_bw); + is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) && + (cur_link_settings.lane_count <= LANE_COUNT_ONE)); + + if (is_link_bw_low) + DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n", + __func__, req_bw, link_bw); } msleep(delay_between_attempts); - - delay_between_attempts += LINK_TRAINING_RETRY_DELAY; } - return false; } From fc0b067df7ed973addbba8e136d9a729df86ccdc Mon Sep 17 00:00:00 2001 From: Jimmy Kizito <Jimmy.Kizito@amd.com> Date: Fri, 1 Apr 2022 15:24:47 -0400 Subject: [PATCH 02/46] drm/amd/display: Query DPIA HPD status. [Why] Driver needs up to date DPIA HPD status. [How] Use HPD query command to get DPIA HPD status. Reviewed-by: Meenakshikumar Somasundaram <Meenakshikumar.Somasundaram@amd.com> Reviewed-by: Jun Lei <Jun.Lei@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Jimmy Kizito <Jimmy.Kizito@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 ++- .../drm/amd/display/dc/core/dc_link_dpia.c | 19 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/dc_link_dpia.h | 5 +++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 67ef357e5798..b40abd2bf7f6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -33,6 +33,7 @@ #include "gpio_service_interface.h" #include "core_status.h" #include "dc_link_dp.h" +#include "dc_link_dpia.h" #include "dc_link_ddc.h" #include "link_hwss.h" #include "opp.h" @@ -240,7 +241,7 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) /* Link may not have physical HPD pin. */ if (link->ep_type != DISPLAY_ENDPOINT_PHY) { - if (link->is_hpd_pending || !link->hpd_status) + if (link->is_hpd_pending || !dc_link_dpia_query_hpd_status(link)) *type = dc_connection_none; else *type = dc_connection_single; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c index a5765f36d86f..1b7a8774b0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c @@ -34,6 +34,7 @@ #include "dm_helpers.h" #include "dmub/inc/dmub_cmd.h" #include "inc/link_dpcd.h" +#include "dc_dmub_srv.h" #define DC_LOGGER \ link->ctx->logger @@ -69,6 +70,24 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) return status; } +bool dc_link_dpia_query_hpd_status(struct dc_link *link) +{ + union dmub_rb_cmd cmd = {0}; + struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv; + bool is_hpd_high = false; + + /* prepare QUERY_HPD command */ + cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; + cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; + cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; + + /* Return HPD status reported by DMUB if query successfully executed. */ + if (dc_dmub_srv_cmd_with_reply_data(dmub_srv, &cmd) && cmd.query_hpd.data.status == AUX_RET_SUCCESS) + is_hpd_high = cmd.query_hpd.data.result; + + return is_hpd_high; +} + /* Configure link as prescribed in link_setting; set LTTPR mode; and * Initialize link training settings. * Abort link training if sink unplug detected. diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h index 74dafd0f9d3d..39c1d1d07357 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h @@ -87,6 +87,11 @@ union dpia_set_config_data { */ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link); +/* Query hot plug status of USB4 DP tunnel. + * Returns true if HPD high. + */ +bool dc_link_dpia_query_hpd_status(struct dc_link *link); + /* Train DP tunneling link for USB4 DPIA display endpoint. * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. From 903940b0b7c7f48e9743c65ae7cd65267083539f Mon Sep 17 00:00:00 2001 From: Alvin Lee <Alvin.Lee2@amd.com> Date: Mon, 2 May 2022 15:04:31 -0400 Subject: [PATCH 03/46] drm/amd/display: Clean up code in dc [Why & How] Code clean up in dc. Reviewed-by: Jun Lei <Jun.Lei@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +++++++++------ .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e41a48f596a3..f14449401188 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2901,14 +2901,15 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg); } - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, true); - else + } else { /* Lock the top pipe while updating plane addrs, since freesync requires * plane addr update event triggers to be synchronized. * top_pipe_to_program is expected to never be NULL */ dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true); + } // Stream updates if (stream_update) @@ -2924,10 +2925,11 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->hwss.program_front_end_for_ctx) dc->hwss.program_front_end_for_ctx(dc, context); - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, false); - else + } else { dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); + } dc->hwss.post_unlock_program_front_end(dc, context); return; } @@ -3052,10 +3054,11 @@ static void commit_planes_for_stream(struct dc *dc, } - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { dc->hwss.interdependent_update_lock(dc, context, false); - else + } else { dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); + } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index e1f87bd72e4a..0da024912dbe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1773,7 +1773,6 @@ void dcn20_post_unlock_program_front_end( */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; From 3f69ee66f507a9e1180fd3a67b43807fae9b0e37 Mon Sep 17 00:00:00 2001 From: Paul Hsieh <paul.hsieh@amd.com> Date: Tue, 3 May 2022 14:26:41 +0800 Subject: [PATCH 04/46] drm/amd/display: clear request when release aux engine [Why] when driver and dmub request aux engine at the same time, dmub grant the aux engine but driver fail. Then driver release aux engine but doesn't clear the request bit. Then aux engine will be occupied by driver forever. [How] When driver release aux engine, clear request bit as well. Reviewed-by: Anthony Koo <Anthony.Koo@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Paul Hsieh <paul.hsieh@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 29e20d92b0bb..9e39cd7b203e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -87,7 +87,8 @@ static void release_engine( engine->ddc = NULL; - REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1); + REG_UPDATE_2(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1, + AUX_SW_USE_AUX_REG_REQ, 0); } #define SW_CAN_ACCESS_AUX 1 From 49947b906a6bd9668eaf4f9cf691973c25c26955 Mon Sep 17 00:00:00 2001 From: David Galiffi <David.Galiffi@amd.com> Date: Tue, 3 May 2022 18:30:25 -0400 Subject: [PATCH 05/46] drm/amd/display: Check if modulo is 0 before dividing. [How & Why] If a value of 0 is read, then this will cause a divide-by-0 panic. Reviewed-by: Martin Leung <Martin.Leung@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: David Galiffi <David.Galiffi@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5e6fea85a7b5..845aa8a1027d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1101,9 +1101,12 @@ static bool get_pixel_clk_frequency_100hz( * not be programmed equal to DPREFCLK */ modulo_hz = REG_READ(MODULO[inst]); - *pixel_clk_khz = div_u64((uint64_t)clock_hz* - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, - modulo_hz); + if (modulo_hz) + *pixel_clk_khz = div_u64((uint64_t)clock_hz* + clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, + modulo_hz); + else + *pixel_clk_khz = 0; } else { /* NOTE: There is agreement with VBIOS here that MODULO is * programmed equal to DPREFCLK, in which case PHASE will be From ab144f0b4ad615b86934ce9cbdd27b23f65ba3a4 Mon Sep 17 00:00:00 2001 From: Derek Lai <Derek.Lai@amd.com> Date: Thu, 5 May 2022 17:59:49 +0800 Subject: [PATCH 06/46] drm/amd/display: Allow individual control of eDP hotplug support [Why] Second eDP can send display off notification through HPD but DC isn't hooked up to handle. Some primary eDP panels will toggle on/off incorrectly if it's enabled generically. [How] Extend the debug option to allow individually enabling hotplug either the first eDP or the second eDP in a dual eDP system. Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Derek Lai <Derek.Lai@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 21 +++++++++++++++++-- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index b40abd2bf7f6..a789ea8af27f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1605,8 +1605,25 @@ static bool dc_link_construct_legacy(struct dc_link *link, if (link->hpd_gpio) { if (!link->dc->config.allow_edp_hotplug_detection) link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; - link->irq_source_hpd_rx = - dal_irq_get_rx_source(link->hpd_gpio); + + switch (link->dc->config.allow_edp_hotplug_detection) { + case 1: // only the 1st eDP handles hotplug + if (link->link_index == 0) + link->irq_source_hpd_rx = + dal_irq_get_rx_source(link->hpd_gpio); + else + link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; + break; + case 2: // only the 2nd eDP handles hotplug + if (link->link_index == 1) + link->irq_source_hpd_rx = + dal_irq_get_rx_source(link->hpd_gpio); + else + link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; + break; + default: + break; + } } break; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 26c24db8f1da..7cfc04a8ef15 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -329,7 +329,7 @@ struct dc_config { bool disable_dmcu; bool enable_4to1MPC; bool enable_windowed_mpo_odm; - bool allow_edp_hotplug_detection; + uint32_t allow_edp_hotplug_detection; bool clamp_min_dcfclk; uint64_t vblank_alignment_dto_params; uint8_t vblank_alignment_max_frame_time_diff; From 66a197203794339b028eedfa880bff9367fce783 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Date: Thu, 5 May 2022 16:50:42 -0400 Subject: [PATCH 07/46] drm/amd/display: Check zero planes for OTG disable W/A on clock change [Why] A display clock change hang can occur when switching between DIO and HPO enabled modes during the optimize_bandwidth in dc_commit_state_no_check call. This happens when going from 4k120 8bpc 420 to 4k144 10bpc 444. Display clock in the DIO case is 1200MHz, but pixel rate is 600MHz because the pixel format is 420. Display clock in the HPO case is less (800MHz?) because of ODM combine which results in a smaller divider. The DIO is still active in prepare but not active in the optimize which results in the hang occuring. During this change there are no planes on the stream so it's safe to apply the workaround, but dpms_off = false and signal type is not virtual. [How] Check for plane_count == 0, no planes on the stream. It's easiest to check pipe->plane_state == NULL as an equivalent check rather than trying to search for the stream status in the context associated with the stream, so let's do that. The primary, non MPO pipe should not have a NULL plane state. Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 3 ++- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 27501b735a9c..a2ade6e93f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -91,7 +91,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 3121dd2d2a91..fc3af81ed6c6 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else From e4b0eac3e6242abf5f5ebcffdeb852e7ffa1c3d0 Mon Sep 17 00:00:00 2001 From: Jasdeep Dhillon <jdhillon@amd.com> Date: Fri, 6 May 2022 13:03:45 -0400 Subject: [PATCH 08/46] drm/amd/display: Move FPU associated DCN30 code to DML folder [why & how] As part of the FPU isolation work documented in https://patchwork.freedesktop.org/series/93042/, isolate code that uses FPU in DCN30 to DML, where all FPU code should locate. Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Jasdeep Dhillon <jdhillon@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 10 + .../drm/amd/display/dc/dcn30/dcn30_resource.c | 518 ++------------- .../drm/amd/display/dc/dcn30/dcn30_resource.h | 5 + .../amd/display/dc/dcn301/dcn301_resource.c | 2 + .../amd/display/dc/dcn302/dcn302_resource.c | 2 + .../amd/display/dc/dcn303/dcn303_resource.c | 2 + .../drm/amd/display/dc/dcn31/dcn31_resource.c | 2 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 +- .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 617 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 67 ++ .../gpu/drm/amd/display/dc/inc/core_types.h | 7 + 11 files changed, 757 insertions(+), 478 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index f5e8916601d3..b604fb26f288 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -28,6 +28,8 @@ #include "dc.h" #include "dcn_calc_math.h" +#include "dml/dcn30/dcn30_fpu.h" + #define REG(reg)\ optc1->tg_regs->reg @@ -184,6 +186,14 @@ void optc3_set_dsc_config(struct timing_generator *optc, } +void optc3_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg) +{ + DC_FP_START(); + optc3_fpu_set_vrr_m_const(optc, vtotal_avg); + DC_FP_END(); +} + void optc3_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 336b2ce6a636..1c1a67c4cec1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -84,6 +84,7 @@ #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dml/dcn30/dcn30_fpu.h" #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" @@ -91,137 +92,6 @@ #define DC_LOGGER_INIT(logger) -struct _vcs_dpi_ip_params_st dcn3_0_ip = { - .use_min_dcfclk = 0, - .clamp_min_dcfclk = 0, - .odm_capable = 1, - .gpuvm_enable = 0, - .hostvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_max_page_table_levels = 4, - .hostvm_cached_page_table_levels = 0, - .pte_group_size_bytes = 2048, - .num_dsc = 6, - .rob_buffer_size_kbytes = 184, - .det_buffer_size_kbytes = 184, - .dpte_buffer_size_in_pte_reqs_luma = 84, - .pde_proc_buffer_size_64k_reqs = 48, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .pte_enable = 1, - .max_page_table_levels = 2, - .pte_chunk_size_kbytes = 2, // ? - .meta_chunk_size_kbytes = 2, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 789504, - .is_line_buffer_bpp_fixed = 0, // ? - .line_buffer_fixed_bpp = 0, // ? - .dcc_supported = true, - .writeback_interface_buffer_size_kbytes = 90, - .writeback_line_buffer_buffer_size = 0, - .max_line_buffer_lines = 12, - .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 - .writeback_chroma_buffer_size_kbytes = 8, - .writeback_chroma_line_buffer_width_pixels = 4, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_luma_buffer_size = 0, - .writeback_line_buffer_chroma_buffer_size = 14643, - .cursor_buffer_size = 8, - .cursor_chunk_size = 2, - .max_num_otg = 6, - .max_num_dpp = 6, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .hscl_mults = 4, - .vscl_mults = 4, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .underscan_factor = 1.11, - .min_vblank_lines = 32, - .dppclk_delay_subtotal = 46, - .dynamic_metadata_vm_enabled = true, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_scl = 50, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dcfclk_cstate_latency = 5.2, // SRExitTime - .max_inter_dcn_tile_repeaters = 8, - .odm_combine_4to1_supported = true, - - .xfc_supported = false, - .xfc_fill_bw_overhead_percent = 10.0, - .xfc_fill_constant_bytes = 0, - .gfx7_compat_tiling_supported = 0, - .number_of_cursors = 1, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 562.0, - .dppclk_mhz = 300.0, - .phyclk_mhz = 300.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 405.6, - }, - }, - .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ - .num_states = 1, - .sr_exit_time_us = 15.5, - .sr_enter_plus_exit_time_us = 20, - .urgent_latency_us = 4.0, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 40.0, - .writeback_latency_us = 12.0, - .max_request_size_bytes = 256, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .dcn_downspread_percent = 0.5, - .downspread_percent = 0.38, - .dram_page_open_time_ns = 50.0, - .dram_rw_turnaround_time_ns = 17.5, - .dram_return_buffer_per_channel_bytes = 8192, - .round_trip_ping_latency_dcfclk_cycles = 191, - .urgent_out_of_order_return_per_channel_bytes = 4096, - .channel_interleave_bytes = 256, - .num_banks = 8, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .dram_clock_change_latency_us = 404, - .dummy_pstate_latency_us = 5, - .writeback_dram_clock_change_latency_us = 23.0, - .return_bus_width_bytes = 64, - .dispclk_dppclk_vco_speed_mhz = 3650, - .xfc_bus_transport_time_us = 20, // ? - .xfc_xbuf_latency_tolerance_us = 4, // ? - .use_urgent_burst_bw = 1, // ? - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn30_clk_src_array_id { DCN30_CLK_SRC_PLL0, DCN30_CLK_SRC_PLL1, @@ -1480,90 +1350,9 @@ int dcn30_populate_dml_pipes_from_context( void dcn30_populate_dml_writeback_from_context( struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) { - int pipe_cnt, i, j; - double max_calc_writeback_dispclk; - double writeback_dispclk; - struct writeback_st dout_wb; - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; - - if (!stream) - continue; - max_calc_writeback_dispclk = 0; - - /* Set writeback information */ - pipes[pipe_cnt].dout.wb_enable = 0; - pipes[pipe_cnt].dout.num_active_wb = 0; - for (j = 0; j < stream->num_wb_info; j++) { - struct dc_writeback_info *wb_info = &stream->writeback_info[j]; - - if (wb_info->wb_enabled && wb_info->writeback_source_plane && - (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { - pipes[pipe_cnt].dout.wb_enable = 1; - pipes[pipe_cnt].dout.num_active_wb++; - dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_height : - wb_info->dwb_params.cnv_params.src_height; - dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_width : - wb_info->dwb_params.cnv_params.src_width; - dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; - dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; - - /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ - if (dc->dml.ip.writeback_max_hscl_taps > 1) { - dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; - dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; - } else { - dout_wb.wb_htaps_luma = 1; - dout_wb.wb_vtaps_luma = 1; - } - dout_wb.wb_htaps_chroma = 0; - dout_wb.wb_vtaps_chroma = 0; - dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_width / - (double)wb_info->dwb_params.dest_width : - (double)wb_info->dwb_params.cnv_params.src_width / - (double)wb_info->dwb_params.dest_width; - dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_height / - (double)wb_info->dwb_params.dest_height : - (double)wb_info->dwb_params.cnv_params.src_height / - (double)wb_info->dwb_params.dest_height; - if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || - wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) - dout_wb.wb_pixel_format = dm_444_64; - else - dout_wb.wb_pixel_format = dm_444_32; - - /* Workaround for cases where multiple writebacks are connected to same plane - * In which case, need to compute worst case and set the associated writeback parameters - * This workaround is necessary due to DML computation assuming only 1 set of writeback - * parameters per pipe - */ - writeback_dispclk = dml30_CalculateWriteBackDISPCLK( - dout_wb.wb_pixel_format, - pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, - dout_wb.wb_hratio, - dout_wb.wb_vratio, - dout_wb.wb_htaps_luma, - dout_wb.wb_vtaps_luma, - dout_wb.wb_src_width, - dout_wb.wb_dst_width, - pipes[pipe_cnt].pipe.dest.htotal, - dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); - - if (writeback_dispclk > max_calc_writeback_dispclk) { - max_calc_writeback_dispclk = writeback_dispclk; - pipes[pipe_cnt].dout.wb = dout_wb; - } - } - } - - pipe_cnt++; - } - + DC_FP_START(); + dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes); + DC_FP_END(); } unsigned int dcn30_calc_max_scaled_time( @@ -1598,7 +1387,7 @@ void dcn30_set_mcif_arb_params( enum mmhubbub_wbif_mode wbif_mode; struct display_mode_lib *dml = &context->bw_ctx.dml; struct mcif_arb_params *wb_arb_params; - int i, j, k, dwb_pipe; + int i, j, dwb_pipe; /* Writeback MCIF_WB arbitration parameters */ dwb_pipe = 0; @@ -1622,17 +1411,15 @@ void dcn30_set_mcif_arb_params( else wbif_mode = PACKED_444; - for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { - wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; - wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; - } + DC_FP_START(); + dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j); + DC_FP_END(); wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */ wb_arb_params->slice_lines = 32; wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */ wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel, wbif_mode, wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ - wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ dwb_pipe++; @@ -2111,178 +1898,11 @@ validate_out: return out; } -/* - * This must be noinline to ensure anything that deals with FP registers - * is contained within this call; previously our compiling with hard-float - * would result in fp instructions being emitted outside of the boundaries - * of the DC_FP_START/END macros, which makes sense as the compiler has no - * idea about what is wrapped and what is not - * - * This is largely just a workaround to avoid breakage introduced with 5.6, - * ideally all fp-using code should be moved into its own file, only that - * should be compiled with hard-float, and all code exported from there - * should be strictly wrapped with DC_FP_START/END - */ -static noinline void dcn30_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; - int i, pipe_idx; - double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; - - if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) - dcfclk = context->bw_ctx.dml.soc.min_dcfclk; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - /* Set B: - * DCFCLK: 1GHz or min required above 1GHz - * FCLK/UCLK: Max - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - if (vlevel == 0) { - pipes[0].clks_cfg.voltage = 1; - pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - } - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - - /* Set D: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * MALL stutter, sr_enter_exit = 4, sr_exit = 2us - */ - /* - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - */ - - /* Set C: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * pstate latency overridden to 5us - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - unsigned int min_dram_speed_mts_margin = 160; - - if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - if (!pstate_en) { - /* The only difference between A and C is p-state latency, if p-state is not supported we want to - * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * - * Set A calculated last so that following calculations are based on Set A - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - /* Make set D = set A until set D is enabled */ - context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - DC_FP_START(); - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; - } + DC_FP_START(); + dcn30_fpu_update_soc_for_wm_a(dc, context); + DC_FP_END(); } void dcn30_calculate_wm_and_dlg( @@ -2292,7 +1912,7 @@ void dcn30_calculate_wm_and_dlg( int vlevel) { DC_FP_START(); - dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); DC_FP_END(); } @@ -2351,40 +1971,6 @@ validate_out: return out; } -/* - * This must be noinline to ensure anything that deals with FP registers - * is contained within this call; previously our compiling with hard-float - * would result in fp instructions being emitted outside of the boundaries - * of the DC_FP_START/END macros, which makes sense as the compiler has no - * idea about what is wrapped and what is not - * - * This is largely just a workaround to avoid breakage introduced with 5.6, - * ideally all fp-using code should be moved into its own file, only that - * should be compiled with hard-float, and all code exported from there - * should be strictly wrapped with DC_FP_START/END - */ -static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { unsigned int i, j; @@ -2399,47 +1985,43 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params unsigned int num_dcfclk_sta_targets = 4; unsigned int num_uclk_states; + struct dc_bounding_box_max_clk dcn30_bb_max_clk; + + memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk)); + if (dc->ctx->dc_bios->vram_info.num_chans) dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + DC_FP_START(); + dcn30_fpu_update_dram_channel_width_bytes(dc); + DC_FP_END(); if (bw_params->clk_table.entries[0].memclk_mhz) { - int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz) + dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz) + dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz) + dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz) + dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; } - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; + DC_FP_START(); + dcn30_fpu_update_max_clk(&dcn30_bb_max_clk); + DC_FP_END(); - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz; num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + } else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; + if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) { + dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz; break; } } @@ -2452,7 +2034,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params // Calculate optimal dcfclk for each uclk for (i = 0; i < num_uclk_states; i++) { DC_FP_START(); - dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, &optimal_dcfclk_for_uclk[i], NULL); DC_FP_END(); if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { @@ -2479,7 +2061,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } else { @@ -2494,33 +2076,15 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } dcn3_0_soc.num_states = num_states; - for (i = 0; i < dcn3_0_soc.num_states; i++) { - dcn3_0_soc.clock_limits[i].state = i; - dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* Fill all states with max values of all other clocks */ - dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; - /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ - dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; - dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; - } - /* re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + DC_FP_START(); + dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts); + DC_FP_END(); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index b92e4cc0232f..3330a1026fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -35,6 +35,9 @@ struct dc; struct resource_pool; struct _vcs_dpi_display_pipe_params_st; +extern struct _vcs_dpi_ip_params_st dcn3_0_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc; + struct dcn30_resource_pool { struct resource_pool base; }; @@ -96,4 +99,6 @@ enum dc_status dcn30_add_stream_to_ctx( void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); + #endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 4daf8931aa7c..a5df74110284 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -81,6 +81,8 @@ #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dml/dcn30/display_mode_vba_30.h" #include "dml/dcn301/dcn301_fpu.h" #include "vm_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index f0938653bb88..f537888f4fa6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -43,6 +43,8 @@ #include "dcn20/dcn20_dsc.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_resource.h" #include "dce/dce_abm.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4fcbc0502808..76f863eb86ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -25,6 +25,8 @@ #include "dcn20/dcn20_dsc.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_resource.h" #include "dc_link_ddc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index ccf1b71a8269..3d9f07d4770b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -36,6 +36,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn30/dcn30_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ee911452c048..a64b88ca01a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) @@ -113,7 +114,7 @@ DML += dcn20/dcn20_fpu.o DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o -DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o +DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn31/dcn31_fpu.o DML += dcn301/dcn301_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c new file mode 100644 index 000000000000..574676a0711a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -0,0 +1,617 @@ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "resource.h" +#include "clk_mgr.h" +#include "reg_helper.h" +#include "dcn_calc_math.h" +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" + + +#include "display_mode_vba_30.h" +#include "dcn30_fpu.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + + +struct _vcs_dpi_ip_params_st dcn3_0_ip = { + .use_min_dcfclk = 0, + .clamp_min_dcfclk = 0, + .odm_capable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .pte_group_size_bytes = 2048, + .num_dsc = 6, + .rob_buffer_size_kbytes = 184, + .det_buffer_size_kbytes = 184, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .pde_proc_buffer_size_64k_reqs = 48, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .max_page_table_levels = 2, + .pte_chunk_size_kbytes = 2, // ? + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 8, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, // ? + .line_buffer_fixed_bpp = 0, // ? + .dcc_supported = true, + .writeback_interface_buffer_size_kbytes = 90, + .writeback_line_buffer_buffer_size = 0, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 14643, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 6, + .max_num_dpp = 6, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.11, + .min_vblank_lines = 32, + .dppclk_delay_subtotal = 46, + .dynamic_metadata_vm_enabled = true, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dcfclk_cstate_latency = 5.2, // SRExitTime + .max_inter_dcn_tile_repeaters = 8, + .max_num_hdmi_frl_outputs = 1, + .odm_combine_4to1_supported = true, + + .xfc_supported = false, + .xfc_fill_bw_overhead_percent = 10.0, + .xfc_fill_constant_bytes = 0, + .gfx7_compat_tiling_supported = 0, + .number_of_cursors = 1, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 562.0, + .dppclk_mhz = 300.0, + .phyclk_mhz = 300.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 405.6, + }, + }, + + .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ + .num_states = 1, + .sr_exit_time_us = 15.5, + .sr_enter_plus_exit_time_us = 20, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .max_request_size_bytes = 256, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.38, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 191, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .dram_clock_change_latency_us = 404, + .dummy_pstate_latency_us = 5, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3650, + .xfc_bus_transport_time_us = 20, // ? + .xfc_xbuf_latency_tolerance_us = 4, // ? + .use_urgent_burst_bw = 1, // ? + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + + +void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg) +{ +struct optc *optc1 = DCN10TG_FROM_TG(optc); + double vtotal_min, vtotal_max; + double ratio, modulo, phase; + uint32_t vblank_start; + uint32_t v_total_mask_value = 0; + + dc_assert_fp_enabled(); + + /* Compute VTOTAL_MIN and VTOTAL_MAX, so that + * VOTAL_MAX - VTOTAL_MIN = 1 + */ + v_total_mask_value = 16; + vtotal_min = dcn_bw_floor(vtotal_avg); + vtotal_max = dcn_bw_ceil(vtotal_avg); + + /* Check that bottom VBLANK is at least 2 lines tall when running with + * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number + * of lines in a frame - 1'. + */ + REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, + &vblank_start); + ASSERT(vtotal_min >= vblank_start + 1); + + /* Special case where the average frame rate can be achieved + * without using the DTO + */ + if (vtotal_min == vtotal_max) { + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); + return; + } + + ratio = vtotal_max - vtotal_avg; + modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */ + phase = ratio * modulo; + + /* Special cases where the DTO phase gets rounded to 0 or + * to DTO modulo + */ + if (phase <= 0 || phase >= modulo) { + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, + phase <= 0 ? + (uint32_t)vtotal_max : (uint32_t)vtotal_min); + REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0); + REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); + return; + } + REG_UPDATE_6(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_SET_V_TOTAL_MIN_MASK_EN, 1, + OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value, + OTG_VTOTAL_MID_REPLACING_MIN_EN, 0, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 0); + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); + optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max); + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase); + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo); +} + +void dcn30_fpu_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + + /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ + if (dc->dml.ip.writeback_max_hscl_taps > 1) { + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; + } else { + dout_wb.wb_htaps_luma = 1; + dout_wb.wb_vtaps_luma = 1; + } + dout_wb.wb_htaps_chroma = 0; + dout_wb.wb_vtaps_chroma = 0; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || + wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) + dout_wb.wb_pixel_format = dm_444_64; + else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe + */ + writeback_dispclk = dml30_CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_src_width, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } +} + +void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int cur_pipe) +{ + int i; + + dc_assert_fp_enabled(); + + for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) { + wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; + wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; + } + + wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ +} + +void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + +dc_assert_fp_enabled(); + +if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + } +} + +void dcn30_fpu_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ +int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + int i, pipe_idx; + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; + +dc_assert_fp_enabled(); + + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + /* Set B: + * DCFCLK: 1GHz or min required above 1GHz + * FCLK/UCLK: Max + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + if (vlevel == 0) { + pipes[0].clks_cfg.voltage = 1; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + + /* Set D: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * MALL stutter, sr_enter_exit = 4, sr_exit = 2us + */ + /* + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + */ + + /* Set C: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * pstate latency overridden to 5us + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin = 160; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if (!pstate_en) { + /* The only difference between A and C is p-state latency, if p-state is not supported we want to + * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * + * Set A calculated last so that following calculations are based on Set A + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + /* Make set D = set A until set D is enabled */ + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + DC_FP_START(); + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + +} + +void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc) +{ + dc_assert_fp_enabled(); + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; +} + +void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk) +{ + dc_assert_fp_enabled(); + + if (!dcn30_bb_max_clk->max_dcfclk_mhz) + dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; + if (!dcn30_bb_max_clk->max_dispclk_mhz) + dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; + if (!dcn30_bb_max_clk->max_dppclk_mhz) + dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; + if (!dcn30_bb_max_clk->max_phyclk_mhz) + dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; +} + +void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + dc_assert_fp_enabled(); + + bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +void dcn30_fpu_update_bw_bounding_box(struct dc *dc, + struct clk_bw_params *bw_params, + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, + unsigned int *dcfclk_mhz, + unsigned int *dram_speed_mts) +{ + unsigned int i; + + dc_assert_fp_enabled(); + + dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + for (i = 0; i < dcn3_0_soc.num_states; i++) { + dcn3_0_soc.clock_limits[i].state = i; + dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* Fill all states with max values of all other clocks */ + dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz; + dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz; + dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz; + dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; + /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ + /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; + dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; + } + /* re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + +} + + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h new file mode 100644 index 000000000000..dedfe7b5f173 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -0,0 +1,67 @@ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN30_FPU_H__ +#define __DCN30_FPU_H__ + +#include "core_types.h" +#include "dcn20/dcn20_optc.h" + +void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, + double vtotal_avg); + +void dcn30_fpu_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); + +void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int cur_pipe); + +void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + +void dcn30_fpu_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc); + +void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk); + +void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk); + +void dcn30_fpu_update_bw_bounding_box(struct dc *dc, + struct clk_bw_params *bw_params, + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, + unsigned int *dcfclk_mhz, + unsigned int *dram_speed_mts); + + +#endif /* __DCN30_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 26f3a55c35d7..555d4d9e1454 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -486,4 +486,11 @@ struct dc_state { } perf_params; }; +struct dc_bounding_box_max_clk { + int max_dcfclk_mhz; + int max_dispclk_mhz; + int max_dppclk_mhz; + int max_phyclk_mhz; +}; + #endif /* _CORE_TYPES_H_ */ From a32cc8177eabcd3497721836241f3d456342be62 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com> Date: Thu, 5 May 2022 16:16:47 -0400 Subject: [PATCH 09/46] drm/amd/display: Fic incorrect pipe being used for clk update [Why] we save the prev_dppclk value using "dpp_inst" but when reading this value we use the index "i". In a case where a pipe is fused off we can end up reading the incorrect instance because i != dpp_inst in this case. [How] read the prev_dppclk using dpp_inst instead of i Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 02943ca65807..cf1b5f354ae9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -122,7 +122,7 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst; dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; - prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i]; + prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[dpp_inst]; if (safe_to_lower || prev_dppclk_khz < dppclk_khz) clk_mgr->dccg->funcs->update_dpp_dto( From 4d1d699f479dc8e01fbb6b7b5a8f2116de3a3883 Mon Sep 17 00:00:00 2001 From: Michael Strauss <michael.strauss@amd.com> Date: Fri, 6 May 2022 17:17:55 -0400 Subject: [PATCH 10/46] Revert "drm/amd/display: Refactor LTTPR cap retrieval" This reverts commit 3b90318d44f87a3582f876802253a7748d270385. [WHY] Regressions unintentionally caused by change, reverting until this can be resolved. Reviewed-by: Aric Cyr <Aric.Cyr@amd.com> Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Michael Strauss <michael.strauss@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 171 +++++++----------- drivers/gpu/drm/amd/display/dc/dc_link.h | 2 - .../gpu/drm/amd/display/dc/inc/dc_link_dp.h | 3 +- .../amd/display/include/link_service_types.h | 6 - 4 files changed, 69 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index d8de8dbf3676..3c9523218c19 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5126,13 +5126,16 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) return true; } -void dp_retrieve_lttpr_cap(struct dc_link *link) +bool dp_retrieve_lttpr_cap(struct dc_link *link) { + uint8_t lttpr_dpcd_data[8]; bool allow_lttpr_non_transparent_mode = 0; + bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; enum dc_status status = DC_ERROR_UNEXPECTED; + bool is_lttpr_present = false; - memset(link->lttpr_dpcd_data, '\0', sizeof(link->lttpr_dpcd_data)); + memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data)); if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 && link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) { @@ -5142,116 +5145,82 @@ void dp_retrieve_lttpr_cap(struct dc_link *link) allow_lttpr_non_transparent_mode = 1; } - link->lttpr_mode = LTTPR_MODE_NON_LTTPR; - link->lttpr_support = LTTPR_UNSUPPORTED; - /* - * Logic to determine LTTPR support + * Logic to determine LTTPR mode */ - if (vbios_lttpr_interop) - link->lttpr_support = LTTPR_SUPPORTED; - else if (link->dc->config.allow_lttpr_non_transparent_mode.raw == 0 - || !link->dc->caps.extended_aux_timeout_support) - link->lttpr_support = LTTPR_UNSUPPORTED; - else - link->lttpr_support = LTTPR_CHECK_EXT_SUPPORT; + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; + if (vbios_lttpr_enable && vbios_lttpr_interop) + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + else if (!vbios_lttpr_enable && vbios_lttpr_interop) { + if (allow_lttpr_non_transparent_mode) + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + else + link->lttpr_mode = LTTPR_MODE_TRANSPARENT; + } else if (!vbios_lttpr_enable && !vbios_lttpr_interop) { + if (!allow_lttpr_non_transparent_mode || !link->dc->caps.extended_aux_timeout_support) + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; + else + link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; + } +#if defined(CONFIG_DRM_AMD_DC_DCN) /* Check DP tunnel LTTPR mode debug option. */ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->dc->debug.dpia_debug.bits.force_non_lttpr) - link->lttpr_support = LTTPR_UNSUPPORTED; + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; +#endif - if (link->lttpr_support > LTTPR_UNSUPPORTED) { + if (link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT || link->lttpr_mode == LTTPR_MODE_TRANSPARENT) { /* By reading LTTPR capability, RX assumes that we will enable * LTTPR extended aux timeout if LTTPR is present. */ status = core_link_read_dpcd( link, DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, - link->lttpr_dpcd_data, - sizeof(link->lttpr_dpcd_data)); + lttpr_dpcd_data, + sizeof(lttpr_dpcd_data)); + + link->dpcd_caps.lttpr_caps.revision.raw = + lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_link_rate = + lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.phy_repeater_cnt = + lttpr_dpcd_data[DP_PHY_REPEATER_CNT - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_lane_count = + lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.mode = + lttpr_dpcd_data[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.max_ext_timeout = + lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw = + lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw = + lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + + /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ + is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 && + link->dpcd_caps.lttpr_caps.max_lane_count <= 4 && + link->dpcd_caps.lttpr_caps.revision.raw >= 0x14); + if (is_lttpr_present) { + CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); + configure_lttpr_mode_transparent(link); + } else + link->lttpr_mode = LTTPR_MODE_NON_LTTPR; } -} - -bool dp_parse_lttpr_mode(struct dc_link *link) -{ - bool dpcd_allow_lttpr_non_transparent_mode = false; - bool is_lttpr_present = false; - - bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; - - if ((link->dc->config.allow_lttpr_non_transparent_mode.bits.DP2_0 && - link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)) { - dpcd_allow_lttpr_non_transparent_mode = true; - } else if (link->dc->config.allow_lttpr_non_transparent_mode.bits.DP1_4A && - !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { - dpcd_allow_lttpr_non_transparent_mode = true; - } - - /* - * Logic to determine LTTPR mode - */ - if (link->lttpr_support == LTTPR_SUPPORTED) - if (vbios_lttpr_enable) - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - else if (dpcd_allow_lttpr_non_transparent_mode) - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - else - link->lttpr_mode = LTTPR_MODE_TRANSPARENT; - else // lttpr_support == LTTPR_CHECK_EXT_SUPPORT - if (dpcd_allow_lttpr_non_transparent_mode) { - link->lttpr_support = LTTPR_SUPPORTED; - link->lttpr_mode = LTTPR_MODE_NON_TRANSPARENT; - } else { - link->lttpr_support = LTTPR_UNSUPPORTED; - } - - if (link->lttpr_support == LTTPR_UNSUPPORTED) - return false; - - link->dpcd_caps.lttpr_caps.revision.raw = - link->lttpr_dpcd_data[DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_link_rate = - link->lttpr_dpcd_data[DP_MAX_LINK_RATE_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.phy_repeater_cnt = - link->lttpr_dpcd_data[DP_PHY_REPEATER_CNT - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_lane_count = - link->lttpr_dpcd_data[DP_MAX_LANE_COUNT_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.mode = - link->lttpr_dpcd_data[DP_PHY_REPEATER_MODE - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.max_ext_timeout = - link->lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.main_link_channel_coding.raw = - link->lttpr_dpcd_data[DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.raw = - link->lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES - - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; - - - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ - is_lttpr_present = (link->dpcd_caps.lttpr_caps.max_lane_count > 0 && - link->dpcd_caps.lttpr_caps.max_lane_count <= 4 && - link->dpcd_caps.lttpr_caps.revision.raw >= 0x14); - if (is_lttpr_present) { - CONN_DATA_DETECT(link, link->lttpr_dpcd_data, sizeof(link->lttpr_dpcd_data), "LTTPR Caps: "); - configure_lttpr_mode_transparent(link); - } else - link->lttpr_mode = LTTPR_MODE_NON_LTTPR; - return is_lttpr_present; } @@ -5403,8 +5372,7 @@ static bool retrieve_link_cap(struct dc_link *link) status = wa_try_to_wake_dprx(link, timeout_ms); } - dp_retrieve_lttpr_cap(link); - + is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); @@ -5440,9 +5408,6 @@ static bool retrieve_link_cap(struct dc_link *link) return false; } - if (link->lttpr_support > LTTPR_UNSUPPORTED) - is_lttpr_present = dp_parse_lttpr_mode(link); - if (!is_lttpr_present) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 251f2bbc96b9..a3c37ee3f849 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -129,8 +129,6 @@ struct dc_link { bool link_state_valid; bool aux_access_disabled; bool sync_lt_in_progress; - uint8_t lttpr_dpcd_data[8]; - enum lttpr_support lttpr_support; enum lttpr_mode lttpr_mode; bool is_internal_display; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 78f09893c118..44f167d2584f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -217,8 +217,7 @@ void disable_dp_hpo_output(struct dc_link *link, void setup_dp_hpo_stream(struct pipe_ctx *pipe_ctx, bool enable); bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx); -void dp_retrieve_lttpr_cap(struct dc_link *link); -bool dp_apply_lttpr_mode(struct dc_link *link); +bool dp_retrieve_lttpr_cap(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link); void dp_receiver_power_ctrl(struct dc_link *link, bool on); void dp_source_sequence_trace(struct dc_link *link, uint8_t dp_test_mode); diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h index 9f465b4d626e..447a56286dd0 100644 --- a/drivers/gpu/drm/amd/display/include/link_service_types.h +++ b/drivers/gpu/drm/amd/display/include/link_service_types.h @@ -80,12 +80,6 @@ enum link_training_result { DP_128b_132b_CDS_DONE_TIMEOUT, }; -enum lttpr_support { - LTTPR_UNSUPPORTED, - LTTPR_CHECK_EXT_SUPPORT, - LTTPR_SUPPORTED, -}; - enum lttpr_mode { LTTPR_MODE_NON_LTTPR, LTTPR_MODE_TRANSPARENT, From c51bdd1a9c34936c1a6b2f6eb79703c730cc0e6e Mon Sep 17 00:00:00 2001 From: Aric Cyr <aric.cyr@amd.com> Date: Sun, 8 May 2022 23:31:34 -0400 Subject: [PATCH 11/46] drm/amd/display: 3.2.186 This version brings along the following: - Improvements in link training fallback - Adding individual edp hotplug support - Fixes in DPIA HPD status, display clock change hang, etc. - FPU isolation work for DCN30 Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com> Signed-off-by: Aric Cyr <aric.cyr@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7cfc04a8ef15..a31ea3644ec2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.185" +#define DC_VER "3.2.186" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 2c2dd0555fd6bcea6d43ab3224c6af718b910e22 Mon Sep 17 00:00:00 2001 From: Haohui Mai <ricetons@gmail.com> Date: Mon, 16 May 2022 05:00:53 -0700 Subject: [PATCH 12/46] drm/amdgpu: Clean up of initializing doorbells for gfx_v9 and gfx_v10 Clean up redundant, copy-paste code blocks during the initialization of the doorbells in mqd_init(). Signed-off-by: Haohui Mai <ricetons@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 17 ----------------- 2 files changed, 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 65a4126135b0..63fbe03283bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, prop->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 83639b5ea6a9..f49a2dd89ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); From a0af5dbdc914eae667fba8322cb02afc4ce3967b Mon Sep 17 00:00:00 2001 From: Jonathan Kim <jonathan.kim@amd.com> Date: Thu, 12 May 2022 20:38:18 -0400 Subject: [PATCH 13/46] drm/amdkfd: simplify cpu hive assignment CPU hive assignment currently assumes when a GPU hive is connected_to_cpu, there is only one hive in the system. Only assign CPUs to the hive if they are explicitly directly connected to the GPU hive to get rid of the need for this assumption. It's more efficient to do this when querying IO links since other non-CRAT info has to be filled in anyways. Also, stop re-assigning the same CPU to the same GPU hive if it has already been done before. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 2e20f54bb147..8d50d207cf66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1271,6 +1271,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) if (!peer_dev) continue; + /* Include the CPU peer in GPU hive if connected over xGMI. */ + if (!peer_dev->gpu && !peer_dev->node_props.hive_id && + dev->node_props.hive_id && + dev->gpu->adev->gmc.xgmi.connected_to_cpu) + peer_dev->node_props.hive_id = dev->node_props.hive_id; + list_for_each_entry(inbound_link, &peer_dev->io_link_props, list) { if (inbound_link->node_to != link->node_from) @@ -1302,22 +1308,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ - if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { - struct kfd_topology_device *top_dev; - - down_read(&topology_lock); - - list_for_each_entry(top_dev, &topology_device_list, list) { - if (top_dev->gpu) - break; - - top_dev->node_props.hive_id = gpu->hive_id; - } - - up_read(&topology_lock); - } - /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, * else create a Virtual CRAT for this gpu device and then parse that From 69493c034d2455204dfcd370de8c4dc204374a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Fri, 13 May 2022 13:54:02 +0200 Subject: [PATCH 14/46] drm/amdgpu: cleanup ctx implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let each context have a pointer to the ctx manager and properly initialize the adev pointer inside the context manager. Reduce the BUG_ON() in amdgpu_ctx_add_fence() into a WARN_ON() and directly return the sequence number instead of writing into a parmeter. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Shashank Sharma <shashank.sharma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 46 ++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 11 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e552a2004868..84caab5e4d22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, p->fence = dma_fence_get(&job->base.s_fence->finished); - amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); + seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence); amdgpu_cs_post_dependencies(p); if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c317078d1afd..a61e4c83a545 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) { - struct amdgpu_device *adev = ctx->adev; - int32_t ctx_prio; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; + int32_t ctx_prio; ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; @@ -166,7 +166,7 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, const u32 ring) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; struct amdgpu_ctx_entity *entity; struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; unsigned num_scheds = 0; @@ -220,10 +220,8 @@ error_free_entity: return r; } -static int amdgpu_ctx_init(struct amdgpu_device *adev, - int32_t priority, - struct drm_file *filp, - struct amdgpu_ctx *ctx) +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) { int r; @@ -233,15 +231,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - kref_init(&ctx->refcount); + ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); mutex_init(&ctx->lock); - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; @@ -266,7 +263,7 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level current_level; current_level = amdgpu_dpm_get_performance_level(adev); @@ -294,7 +291,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level level; u32 current_stable_pstate; int r; @@ -345,7 +342,8 @@ done: static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_mgr *mgr = ctx->mgr; + struct amdgpu_device *adev = mgr->adev; unsigned i, j, idx; if (!adev) @@ -421,7 +419,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, } *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, priority, filp, ctx); + r = amdgpu_ctx_init(mgr, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -671,9 +669,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) return 0; } -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *handle) +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; @@ -682,8 +680,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, idx = seq & (amdgpu_sched_jobs - 1); other = centity->fences[idx]; - if (other) - BUG_ON(!dma_fence_is_signaled(other)); + WARN_ON(other && !dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -693,8 +690,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, spin_unlock(&ctx->ring_lock); dma_fence_put(other); - if (handle) - *handle = seq; + return seq; } struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, @@ -731,7 +727,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, int hw_ip, int32_t priority) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; struct drm_gpu_scheduler **scheds = NULL; unsigned num_scheds; @@ -796,8 +792,10 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, return r; } -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev) { + mgr->adev = adev; mutex_init(&mgr->lock); idr_init(&mgr->ctx_handles); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 142f2f87d44c..681050bc828c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -40,7 +40,7 @@ struct amdgpu_ctx_entity { struct amdgpu_ctx { struct kref refcount; - struct amdgpu_device *adev; + struct amdgpu_ctx_mgr *mgr; unsigned reset_counter; unsigned reset_counter_query; uint32_t vram_lost_counter; @@ -70,9 +70,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx); int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity); -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *seq); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, uint64_t seq); @@ -85,7 +85,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity); -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 497478f8a5d3..801f6fa692e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1152,7 +1152,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init(&fpriv->bo_list_handles); - amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; goto out_suspend; From 842035543c0bfa35b1471e74094a107673815b01 Mon Sep 17 00:00:00 2001 From: Haohui Mai <ricetons@gmail.com> Date: Mon, 16 May 2022 23:06:35 -0700 Subject: [PATCH 15/46] drm/amdgpu: Set CP_HQD_PQ_CONTROL.RPTR_BLOCK_SIZE correctly Remove the accidental shifts on the values of RPTR_BLOCK_SIZE in gfx_v8-v11. The bug essentially always programs the corresponding fields to zero instead of the correct value. The hardware clamps the min value to 5 so this resulted in a value of 5 being programmed. Signed-off-by: Haohui Mai <ricetons@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 63fbe03283bf..dada7ab5d43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8773cbd1f03b..8c0a3fc7aaa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4082,7 +4082,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 90f64219d291..fb9302910742 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f49a2dd89ee7..f12ae6e2359a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif From 10784fec9cbddad2ca6031b28c1ca1d041444dc5 Mon Sep 17 00:00:00 2001 From: Haohui Mai <ricetons@gmail.com> Date: Tue, 17 May 2022 04:24:38 -0700 Subject: [PATCH 16/46] drm/amdgpu/gfx10: rework KIQ programming Make sure the queue is not longer active before programming the kiq EOP registers. Signed-off-by: Haohui Mai <ricetons@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 +++++++++++++------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index dada7ab5d43f..02754ee86c81 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6956,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - /* write the EOP addr */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, - mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, - mqd->cp_hqd_eop_base_addr_hi); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, - mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); @@ -6988,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_hi); } + /* disable doorbells */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + /* set the pointer to the MQD */ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); From a2b28708b645c5632dc93669ab06e97874c8244f Mon Sep 17 00:00:00 2001 From: Gong Yuanjun <ruc_gongyuanjun@163.com> Date: Tue, 17 May 2022 17:57:00 +0800 Subject: [PATCH 17/46] drm/radeon: fix a possible null pointer dereference In radeon_fp_native_mode(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. The failure status of drm_cvt_mode() on the other path is checked too. Signed-off-by: Gong Yuanjun <ruc_gongyuanjun@163.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/radeon/radeon_connectors.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index a16892c16f60..58db79921cd3 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } From d2f4460a3d9502513419f06cc376c7ade49d5753 Mon Sep 17 00:00:00 2001 From: Gong Yuanjun <ruc_gongyuanjun@163.com> Date: Tue, 17 May 2022 17:57:46 +0800 Subject: [PATCH 18/46] drm/amd/pm: fix a potential gpu_metrics_table memory leak gpu_metrics_table is allocated in yellow_carp_init_smc_tables() but not freed in yellow_carp_fini_smc_tables(). Signed-off-by: Gong Yuanjun <ruc_gongyuanjun@163.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 87257b1b028f..feff4f8c927c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 72063c71c39162d897c7c6f47fdc26425cfba03b Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Thu, 5 May 2022 11:49:14 +0800 Subject: [PATCH 19/46] drm/amd/pm: enable more dpm features for SMU 13.0.0 Enable MP0CLK DPM and FW Dstate since they are already supported by latest 78.36.0 PMFW. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 197a0e2ff063..7bfceca246ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -296,6 +296,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT); + return 0; } From 704d6bf605faf65555438c2fa0282c02dca1a7b6 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Fri, 6 May 2022 17:36:06 +0800 Subject: [PATCH 20/46] drm/amd/pm: skip dpm disablement on suspend for SMU 13.0.0 Since PMFW will handle this properly. Driver involvement is unnecessary. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 6016b325b6b5..a601024ba4de 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1436,6 +1436,7 @@ static int smu_disable_dpms(struct smu_context *smu) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): + case IP_VERSION(13, 0, 0): return 0; default: break; From 1c65e54881f3a56f16783b0b772501a8ddeb8c10 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Mon, 9 May 2022 11:42:23 +0800 Subject: [PATCH 21/46] drm/amd/pm: update SMU 13.0.0 driver_if header To align with 78.37.0 and later PMFWs. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../inc/pmfw_if/smu13_driver_if_v13_0_0.h | 22 +++++++++---------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index ecc6411dfc8d..c1f76236da26 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -671,8 +671,8 @@ typedef struct { uint16_t reserved[2]; //Frequency changes - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -683,15 +683,14 @@ typedef struct { //Fan control uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS]; uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS]; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; - uint8_t Padding[1]; - + uint8_t MaxOpTemp; uint32_t Spare[13]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround @@ -719,15 +718,14 @@ typedef struct { uint8_t FanLinearPwmPoints; uint8_t FanLinearTempPoints; - uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; - uint16_t FanAcousticLimitRpm; + uint16_t AcousticTargetRpmThreshold; + uint16_t AcousticLimitRpmThreshold; uint16_t FanTargetTemperature; // Degree Celcius uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; - uint8_t Padding[1]; - + uint8_t MaxOpTemp; uint32_t Spare[13]; @@ -997,7 +995,8 @@ typedef struct { uint16_t SocketPowerLimitAcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms uint16_t SocketPowerLimitDcTau[PPT_THROTTLER_COUNT]; // Time constant of LPF in ms - uint32_t SpareVmin[12]; + QuadraticInt_t Vmin_droop; + uint32_t SpareVmin[9]; //SECTION: DPM Configuration 1 @@ -1286,7 +1285,6 @@ typedef struct { uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS - // SECTION: Board Reserved uint32_t BoardSpare[64]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 2b44d41a5157..afa1991e26f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x27 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x28 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x28 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From 6fd693817dcf07aed021b4196993822fad225664 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Tue, 10 May 2022 11:04:06 +0800 Subject: [PATCH 22/46] drm/amd/pm: correct the softpptable ids used for SMU 13.0.0 To better match with the pptable_id settings from VBIOS. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 47 ++++++++++++++----- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index ae6321af9d88..7be4f6875a7b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -218,13 +218,25 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) pptable_id == 3688) pptable_id = 36881; /* - * Temporary solution for SMU V13.0.0: - * - use 99991 signed pptable when SCPM enabled - * TODO: drop this when the pptable carried in vbios - * is ready. + * Temporary solution for SMU V13.0.0 with SCPM enabled: + * - use 36831 signed pptable when pp_table_id is 3683 + * - use 36641 signed pptable when pp_table_id is 3664 or 0 + * TODO: drop these when the pptable carried in vbios is ready. */ - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) - pptable_id = 99991; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { + switch (pptable_id) { + case 0: + case 3664: + pptable_id = 36641; + break; + case 3683: + pptable_id = 36831; + break; + default: + dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); + return -EINVAL; + } + } } /* "pptable_id == 0" means vbios carries the pptable. */ @@ -448,13 +460,24 @@ int smu_v13_0_setup_pptable(struct smu_context *smu) pptable_id = smu->smu_table.boot_values.pp_table_id; /* - * Temporary solution for SMU V13.0.0: - * - use 9999 unsigned pptable when SCPM disabled - * TODO: drop this when the pptable carried in vbios - * is ready. + * Temporary solution for SMU V13.0.0 with SCPM disabled: + * - use 3664 or 3683 on request + * - use 3664 when pptable_id is 0 + * TODO: drop these when the pptable carried in vbios is ready. */ - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) - pptable_id = 9999; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) { + switch (pptable_id) { + case 0: + pptable_id = 3664; + break; + case 3664: + case 3683: + break; + default: + dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); + return -EINVAL; + } + } } /* force using vbios pptable in sriov mode */ From 0aceb728f4e4790d80ce64e83bb8fad40693db13 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Tue, 10 May 2022 15:00:43 +0800 Subject: [PATCH 23/46] drm/amd/pm: enable more dpm features for SMU 13.0.0 Enable OOB Monitor and SOC CG which are ready since 78.38.0. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7bfceca246ae..7bb2923eb819 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -299,6 +299,9 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); + return 0; } From 3670c46f07d13ba42bb66948fde68495078457ec Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Fri, 13 May 2022 11:03:07 +0800 Subject: [PATCH 24/46] drm/amd/pm: enable memory temp reading for SMU 13.0.0 With the latest vbios, the memory temp reading is working. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7bb2923eb819..7432b3e76d3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -275,9 +275,7 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT); } -#if 0 *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT); -#endif if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); From 2c270d3e71ed0b68b2f75c0b15645fb023b4032c Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Wed, 18 May 2022 20:38:36 +0300 Subject: [PATCH 25/46] drm/amdgpu/pm: smu_v13_0_4: delete duplicate condition There is no need to check if "clock_ranges' is non-NULL. It is checked already on the line before. Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 7d6ff141b43f..5a17b51aa0f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -644,42 +644,40 @@ static int smu_v13_0_4_set_watermarks_table(struct smu_context *smu, if (!table || !clock_ranges) return -EINVAL; - if (clock_ranges) { - if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES || - clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) - return -EINVAL; + if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES || + clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) + return -EINVAL; - for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) { - table->WatermarkRow[WM_DCFCLK][i].MinClock = - clock_ranges->reader_wm_sets[i].min_drain_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MaxClock = - clock_ranges->reader_wm_sets[i].max_drain_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MinMclk = - clock_ranges->reader_wm_sets[i].min_fill_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].MaxMclk = - clock_ranges->reader_wm_sets[i].max_fill_clk_mhz; + for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) { + table->WatermarkRow[WM_DCFCLK][i].MinClock = + clock_ranges->reader_wm_sets[i].min_drain_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MaxClock = + clock_ranges->reader_wm_sets[i].max_drain_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MinMclk = + clock_ranges->reader_wm_sets[i].min_fill_clk_mhz; + table->WatermarkRow[WM_DCFCLK][i].MaxMclk = + clock_ranges->reader_wm_sets[i].max_fill_clk_mhz; - table->WatermarkRow[WM_DCFCLK][i].WmSetting = - clock_ranges->reader_wm_sets[i].wm_inst; - } - - for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) { - table->WatermarkRow[WM_SOCCLK][i].MinClock = - clock_ranges->writer_wm_sets[i].min_fill_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MaxClock = - clock_ranges->writer_wm_sets[i].max_fill_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MinMclk = - clock_ranges->writer_wm_sets[i].min_drain_clk_mhz; - table->WatermarkRow[WM_SOCCLK][i].MaxMclk = - clock_ranges->writer_wm_sets[i].max_drain_clk_mhz; - - table->WatermarkRow[WM_SOCCLK][i].WmSetting = - clock_ranges->writer_wm_sets[i].wm_inst; - } - - smu->watermarks_bitmap |= WATERMARKS_EXIST; + table->WatermarkRow[WM_DCFCLK][i].WmSetting = + clock_ranges->reader_wm_sets[i].wm_inst; } + for (i = 0; i < clock_ranges->num_writer_wm_sets; i++) { + table->WatermarkRow[WM_SOCCLK][i].MinClock = + clock_ranges->writer_wm_sets[i].min_fill_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MaxClock = + clock_ranges->writer_wm_sets[i].max_fill_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MinMclk = + clock_ranges->writer_wm_sets[i].min_drain_clk_mhz; + table->WatermarkRow[WM_SOCCLK][i].MaxMclk = + clock_ranges->writer_wm_sets[i].max_drain_clk_mhz; + + table->WatermarkRow[WM_SOCCLK][i].WmSetting = + clock_ranges->writer_wm_sets[i].wm_inst; + } + + smu->watermarks_bitmap |= WATERMARKS_EXIST; + /* pass data to smu controller */ if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { From 950d64250fba5fbb8c290f692de8ffdee380726c Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" <Stanley.Yang@amd.com> Date: Wed, 27 Apr 2022 12:16:51 +0800 Subject: [PATCH 26/46] drm/amdgpu: support ras on SRIOV support umc/gfx/sdma ras on guest side Changed from V1: move sriov judgment in amdgpu_ras_interrupt_fatal_error_handler Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 40 ++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +++ drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 9 +++-- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9af8d7a1d011..c45736a902fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5219,6 +5219,10 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r = amdgpu_device_reset_sriov(adev, job ? false : true); if (r) adev->asic_reset_res = r; + + /* Aldebaran supports ras in SRIOV, so need resume ras during reset */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, &reset_context); if (r && r == -EAGAIN) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 035891ec59d5..2de9309a4193 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); - if (!amdgpu_ras_intr_triggered()) { + /* Only enable ras feature operation handle on host side */ + if (!amdgpu_sriov_vf(adev) && + !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n", @@ -1523,7 +1525,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) */ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) { - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) + /* Fatal error events are handled on host side */ + if (amdgpu_sriov_vf(adev) || + !amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) return; if (adev->nbio.ras && @@ -2270,10 +2274,14 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { adev->ras_hw_enabled = adev->ras_enabled = 0; - if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || + if (!adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) return; + if (!(amdgpu_sriov_vf(adev) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))) + return; + if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); @@ -2285,15 +2293,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); + if (!amdgpu_sriov_vf(adev)) { + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + } else { + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + } } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } @@ -2637,6 +2651,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) struct amdgpu_ras_block_object *obj; int r; + /* Guest side doesn't need init ras feature */ + if (amdgpu_sriov_vf(adev)) + return 0; + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 8e221a1ba937..42c1f050542f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (amdgpu_sriov_vf(adev)) + return AMDGPU_RAS_SUCCESS; + amdgpu_ras_reset_gpu(adev); return AMDGPU_RAS_SUCCESS; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index d6d79e97def9..18014ed0e853 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -85,9 +85,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, chip_name); if (err) return err; - err = psp_init_ta_microcode(&adev->psp, chip_name); - if (err) - return err; + /* It's not necessary to load ras ta on Guest side */ + if (!amdgpu_sriov_vf(adev)) { + err = psp_init_ta_microcode(&adev->psp, chip_name); + if (err) + return err; + } break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): From 4d33e7040d70b50f1fb564f7020644ec5b45d6b7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri <sunil.khatri@amd.com> Date: Tue, 17 May 2022 11:27:11 +0530 Subject: [PATCH 27/46] drm/amdgpu: move amdgpu_gmc_tmz_set after ip_version populated To enable TMZ feature based on IP version needs adev->ip_version populated but its empty. Move amdgpu_gmc_tmz_set to a place where ip_version is populated. Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c45736a902fb..625424f3082b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_gmc_tmz_set(adev); - - return 0; } @@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Enable TMZ based on IP_VERSION */ + amdgpu_gmc_tmz_set(adev); + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { From 0ef3dc7e97884a861db4cb3dfd721db71edb0236 Mon Sep 17 00:00:00 2001 From: Sunil Khatri <sunil.khatri@amd.com> Date: Tue, 17 May 2022 11:28:46 +0530 Subject: [PATCH 28/46] drm/amdgpu: change code name to ip version for tmz set Use IP version rather then code name of IPs for tmz set. Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 88b852b3a2cb..7e55ee61f84c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -512,9 +512,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) */ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (adev->ip_versions[GC_HWIP][0]) { + /* RAVEN */ + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + /* RENOIR looks like RAVEN */ + case IP_VERSION(9, 3, 0): if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -525,12 +528,18 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) "Trusted Memory Zone (TMZ) feature enabled\n"); } break; - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: - case CHIP_IP_DISCOVERY: + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + /* VANGOGH */ + case IP_VERSION(10, 3, 1): + /* YELLOW_CARP*/ + case IP_VERSION(10, 3, 3): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { From 49b74d12d1e02fc67b2854a593e589372d894e62 Mon Sep 17 00:00:00 2001 From: Sunil Khatri <sunil.khatri@amd.com> Date: Tue, 17 May 2022 11:33:45 +0530 Subject: [PATCH 29/46] drm/amdgpu: add support of tmz for GC 10.3.7 Add support of IP GC 10.3.7 in amdgpu_gmc_tmz_set. Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 7e55ee61f84c..798c56214a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -540,6 +540,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): + /* GC 10.3.7 */ + case IP_VERSION(10, 3, 7): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { From b0f4d663fce6a4232d3c20ce820f919111b1c60b Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Thu, 19 May 2022 10:50:25 +0530 Subject: [PATCH 30/46] drm/amd/pm: Fix missing thermal throttler status On aldebaran, when thermal throttling happens due to excessive GPU temperature, the reason for throttling event is missed in warning message. This patch fixes it. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Yang Wang <kevinyang.wang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 38af648cb857..fb130409309c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1666,6 +1666,7 @@ static const struct throttling_logging_label { uint32_t feature_mask; const char *label; } logging_label[] = { + {(1U << THROTTLER_TEMP_GPU_BIT), "GPU"}, {(1U << THROTTLER_TEMP_MEM_BIT), "HBM"}, {(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"}, {(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"}, From 6880ed280edf292c542aa87567547ffb9c222597 Mon Sep 17 00:00:00 2001 From: Alan Liu <HaoPing.Liu@amd.com> Date: Tue, 17 May 2022 22:30:00 +0800 Subject: [PATCH 31/46] drm/amd/display: Add HDMI_ACP_SEND register Define HDMI_ACP_SEND register shift/mask. Signed-off-by: Alan Liu <HaoPing.Liu@amd.com> Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h | 4 ++-- drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h | 2 ++ 9 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h index c755f43aaaf8..7a2c6b12c249 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_10_0_sh_mask.h @@ -6070,6 +6070,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h index 14a3bacfcfd1..fa1f4374fafe 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_0_sh_mask.h @@ -6058,6 +6058,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h index 106094ed0661..39f6fde6db1d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_11_2_sh_mask.h @@ -7142,6 +7142,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h index bcd190a3fcdd..c5f4afac3b39 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h @@ -37285,12 +37285,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND__SHIFT 0x0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h index 9b6825b74cc1..23580907663b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h @@ -5584,6 +5584,8 @@ #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x200 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x1000 +#define HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x3f0000 #define HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h index e7c0cad41081..a788ff3b68c0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h @@ -30357,12 +30357,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h index dc8ce7aaa0cf..c70f7ba94d8f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h @@ -39439,12 +39439,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h index 91969554e36a..ca1e1eb39256 100755 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h @@ -16956,7 +16956,7 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 - +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L @@ -16964,7 +16964,7 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L - +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h index 2f780aefc722..6104ae304099 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h @@ -35487,12 +35487,14 @@ #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT__SHIFT 0x5 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND__SHIFT 0x8 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT__SHIFT 0x9 +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND__SHIFT 0xc #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE__SHIFT 0x10 #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK 0x00000001L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK 0x00000010L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK 0x00000020L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_SEND_MASK 0x00000100L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_CONT_MASK 0x00000200L +#define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ACP_SEND_MASK 0x00001000L #define DIG0_HDMI_VBI_PACKET_CONTROL__HDMI_ISRC_LINE_MASK 0x003F0000L //DIG0_HDMI_INFOFRAME_CONTROL0 #define DIG0_HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND__SHIFT 0x4 From 5e613723f804658feb689be1b3cb88ceeed234d3 Mon Sep 17 00:00:00 2001 From: Eric Huang <JinhuiEric.Huang@amd.com> Date: Mon, 16 May 2022 14:22:38 -0400 Subject: [PATCH 32/46] drm/amdkfd: port cwsr trap handler from dkms branch Most of changes are for debugger feature, and it is to simplify trap handler support for new asics in the future. Signed-off-by: Eric Huang <jinhuieric.huang@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 2649 +++++++++-------- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 333 ++- .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 248 +- 3 files changed, 1663 insertions(+), 1567 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 475f89700c74..8cbdc7f519c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -166,7 +166,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x807c847c, 0x806eff6e, 0x00000400, 0xbf0a757c, 0xbf85ffef, 0xbf9c0000, - 0xbf8200cd, 0xbef8007e, + 0xbf8200ce, 0xbef8007e, 0x8679ff7f, 0x0000ffff, 0x8779ff79, 0x00040000, 0xbefa0080, 0xbefb00ff, @@ -212,304 +212,310 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x761e0000, 0xe0524100, 0x761e0100, 0xe0524200, 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, + 0x761e0300, 0xbf8c0f70, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x80f2c072, + 0xb8f31605, 0x80738173, + 0x8e738473, 0x8e7a8273, + 0xbefa00ff, 0x01000000, + 0xbefc0073, 0xc031003c, + 0x00000072, 0x80f2c072, + 0xbf8c007f, 0x80fc907c, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff1, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0xbefa0084, + 0xbefa00ff, 0x01000000, + 0xc0211cfc, 0x00000072, + 0x80728472, 0xc0211c3c, 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, + 0xc0211c7c, 0x00000072, + 0x80728472, 0xc0211bbc, 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, + 0xc0211bfc, 0x00000072, + 0x80728472, 0xc0211d3c, 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, + 0xc0211d7c, 0x00000072, + 0x80728472, 0xc0211a3c, 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, + 0xc0211a7c, 0x00000072, + 0x80728472, 0xc0211dfc, 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, + 0xc0211b3c, 0x00000072, + 0x80728472, 0xc0211b7c, 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x8671ff71, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f768374, 0xb976e0c2, - 0xbf800002, 0xb9740002, - 0xbf8a0000, 0x95807370, - 0xbf810000, 0x00000000, + 0xbf8c007f, 0xbefc0073, + 0xbefe006e, 0xbeff006f, + 0x867375ff, 0x000003ff, + 0xb9734803, 0x867375ff, + 0xfffff800, 0x8f738b73, + 0xb973a2c3, 0xb977f801, + 0x8673ff71, 0xf0000000, + 0x8f739c73, 0x8e739073, + 0xbef60080, 0x87767376, + 0x8673ff71, 0x08000000, + 0x8f739b73, 0x8e738f73, + 0x87767376, 0x8673ff74, + 0x00800000, 0x8f739773, + 0xb976f807, 0x8671ff71, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f768374, + 0xb976e0c2, 0xbf800002, + 0xb9740002, 0xbf8a0000, + 0x95807370, 0xbf810000, }; static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820248, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf820254, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8fb1605, - 0x807b817b, 0x8e7b867b, - 0x807a7b7a, 0x807a7e7a, - 0x827b807f, 0x867bff7b, - 0x0000ffff, 0xc04b1c3d, - 0x00000050, 0xbf8cc07f, - 0xc04b1d3d, 0x00000060, - 0xbf8cc07f, 0xc0431e7d, - 0x00000074, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x867aff7f, 0x08000000, - 0x8f7a837a, 0x87777a77, - 0x867aff7f, 0x70000000, - 0x8f7a817a, 0x87777a77, - 0xbef1007c, 0xbef00080, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840063, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf84005f, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02a05, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02a05, 0x80708170, 0x8e708a70, 0xb8fa1605, 0x807a817a, 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb4306, 0x867bc17b, + 0xbf840063, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf84005f, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2a05, - 0x807b817b, 0x8e7b827b, - 0x8e76887b, 0xbef600ff, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2a05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, 0x01000000, 0xbefc0084, 0xbf0a7b7c, 0xbf84006d, 0xbf11017c, 0x807bff7b, @@ -566,15 +572,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200da, + 0xbf9c0000, 0xbf8200c7, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, 0x866eff7f, 0x04000000, 0xbf84001e, 0xbefe00c1, 0xbeff00c1, 0xb8ef4306, @@ -591,28 +593,28 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x781d0000, 0x807cff7c, 0x00000200, 0x8078ff78, 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbef80080, - 0xbefe00c1, 0xbeff00c1, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x8e76886f, - 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0x806fff6f, - 0x00008000, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, @@ -663,90 +665,101 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xc00b1c37, 0x00000050, 0xc00b1d37, 0x00000060, 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x866fff6d, - 0xf8000000, 0x8f6f9b6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x04000000, 0x8f6f9a6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff7a, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e837a, 0xb96ee0c2, - 0xbf800002, 0xb97a0002, - 0xbf8a0000, 0x95806f6c, - 0xbf810000, 0x00000000, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e837a, + 0xb96ee0c2, 0xbf800002, + 0xb97a0002, 0xbf8a0000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { - 0xbf820001, 0xbf8201cd, + 0xbf820001, 0xbf8201f1, 0xb0804004, 0xb978f802, - 0x8a788678, 0xb96ef801, - 0x876eff6e, 0x00000800, - 0xbf840003, 0x876eff78, + 0x8a78ff78, 0x00020006, + 0xb97bf803, 0x876eff78, 0x00002000, 0xbf840009, - 0xb97bf803, 0x876eff7b, - 0x00000400, 0xbf850033, - 0x876eff7b, 0x00000100, - 0xbf840002, 0x8878ff78, - 0x00002000, 0x8a77ff77, - 0xff000000, 0xb96ef807, - 0x876fff6e, 0x02000000, - 0x8f6f866f, 0x88776f77, - 0x876fff6e, 0x003f8000, - 0x8f6f896f, 0x88776f77, - 0x8a6eff6e, 0x023f8000, - 0xb9eef807, 0xb97af812, + 0x876eff6d, 0x00ff0000, + 0xbf85001e, 0x876eff7b, + 0x00000400, 0xbf850057, + 0xbf8e0010, 0xb97bf803, + 0xbf82fffa, 0x876eff7b, + 0x00000900, 0xbf850015, + 0x876eff7b, 0x000071ff, + 0xbf840008, 0x876fff7b, + 0x00007080, 0xbf840001, + 0xbeee1d87, 0xb96ff801, + 0x8f6e8c6e, 0x876e6f6e, + 0xbf85000a, 0x876eff6d, + 0x00ff0000, 0xbf850007, + 0xb96ef801, 0x876eff6e, + 0x00000800, 0xbf850003, + 0x876eff7b, 0x00000400, + 0xbf85003c, 0x8a77ff77, + 0xff000000, 0xb97af807, + 0x877bff7a, 0x02000000, + 0x8f7b867b, 0x88777b77, + 0x877bff7a, 0x003f8000, + 0x8f7b897b, 0x88777b77, + 0x8a7aff7a, 0x023f8000, + 0xb9faf807, 0xb97af812, 0xb97bf813, 0x8ffa887a, - 0xf4051bbd, 0xfa000000, - 0xbf8cc07f, 0xf4051ebd, - 0xfa000008, 0xbf8cc07f, - 0x87ee6e6e, 0xbf840001, - 0xbe80206e, 0xb97bf803, - 0x877bff7b, 0x000001ff, + 0xf4011bbd, 0xfa000010, + 0xbf8cc07f, 0x8f6e976e, + 0x8a77ff77, 0x00800000, + 0x88776e77, 0xf4051bbd, + 0xfa000000, 0xbf8cc07f, + 0xf4051ebd, 0xfa000008, + 0xbf8cc07f, 0x87ee6e6e, + 0xbf840001, 0xbe80206e, + 0x876eff6d, 0x01ff0000, + 0xbf850005, 0x8878ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x876eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x876dff6d, - 0x0000ffff, 0x906e8977, - 0x876fff6e, 0x003f8000, - 0x906e8677, 0x876eff6e, - 0x02000000, 0x886e6f6e, - 0xb9eef807, 0x87fe7e7e, + 0x0000ffff, 0x907a8977, + 0x877bff7a, 0x003f8000, + 0x907a8677, 0x877aff7a, + 0x02000000, 0x887a7b7a, + 0xb9faf807, 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c, 0x876dff6d, 0x0000ffff, 0xbefa0380, - 0xb9fa0283, 0xb97a2c07, - 0x8f7a9a7a, 0x886d7a6d, - 0xb97a03c7, 0x8f7a997a, - 0x886d7a6d, 0xb97a0647, - 0x8f7a987a, 0x886d7a6d, - 0xb97af807, 0x877aff7a, - 0x00007fff, 0xb9faf807, - 0xbeee037e, 0xbeef037f, - 0xbefe0480, 0xbf900004, - 0xbf8e0002, 0xbf88fffe, - 0xb97b02dc, 0x8f7b997b, - 0x887b7b7f, 0xb97a2a05, + 0xb9fa0283, 0x8a77ff77, + 0xff000000, 0xb97af807, + 0x877bff7a, 0x02000000, + 0x8f7b867b, 0x88777b77, + 0x877bff7a, 0x003f8000, + 0x8f7b897b, 0x88777b77, + 0x8a7aff7a, 0x023f8000, + 0xb9faf807, 0xbeee037e, + 0xbeef037f, 0xbefe0480, + 0xbf900004, 0xbf8e0002, + 0xbf88fffe, 0x877aff7f, + 0x04000000, 0x8f7a857a, + 0x886d7a6d, 0xb97b02dc, + 0x8f7b997b, 0xb97a2a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, - 0x877bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xf4491c3d, 0xfa000050, - 0xf4491d3d, 0xfa000060, - 0xf4411e7d, 0xfa000074, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0x877aff7f, 0x08000000, - 0x907a837a, 0x88777a77, - 0x877aff7f, 0x70000000, - 0x907a817a, 0x88777a77, - 0xbef1037c, 0xbef00380, - 0xb97302dc, 0x8f739973, - 0x8873737f, 0xb97bf816, + 0xb97b1e06, 0x8f7b8a7b, + 0x807a7b7a, 0x877bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xf4491c3d, + 0xfa000050, 0xf4491d3d, + 0xfa000060, 0xf4411e7d, + 0xfa000074, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xbef1037c, + 0xbef00380, 0xb97302dc, + 0x8f739973, 0xb97bf816, 0xba80f816, 0x00000000, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, @@ -776,8 +789,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbefe037c, 0xbefc0370, 0xf4611b3a, 0xf8000000, 0x80708470, 0xbefc037e, + 0x8a7aff6d, 0x80000000, 0xbefe037c, 0xbefc0370, - 0xf4611b7a, 0xf8000000, + 0xf4611eba, 0xf8000000, 0x80708470, 0xbefc037e, 0xbefe037c, 0xbefc0370, 0xf4611bba, 0xf8000000, @@ -838,7 +852,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf820001, 0xbeff03c1, 0xb97b4306, 0x877bc17b, 0xbf840044, 0xbf8a0000, - 0x877aff73, 0x04000000, + 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, 0xbef6037b, 0xb9702a05, 0x80708170, @@ -894,7 +908,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf0a7b7c, 0xbf85ffef, 0xbf820025, 0xbef603ff, 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840020, + 0xbf0a7b7c, 0xbf840011, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, @@ -911,71 +925,69 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x705d0000, 0x807c817c, 0x8070ff70, 0x00000080, 0xbf0a7b7c, 0xbf85fff8, - 0xbf820151, 0xbef4037e, + 0xbf820144, 0xbef4037e, 0x8775ff7f, 0x0000ffff, 0x8875ff75, 0x00040000, 0xbef60380, 0xbef703ff, - 0x10807fac, 0x876eff7f, - 0x08000000, 0x906e836e, - 0x88776e77, 0x876eff7f, - 0x70000000, 0x906e816e, - 0x88776e77, 0xb97202dc, - 0x8f729972, 0x8872727f, - 0x876eff7f, 0x04000000, - 0xbf840034, 0xbefe03c1, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f4306, - 0x876fc16f, 0xbf840029, - 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9782a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef603ff, - 0x01000000, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850009, - 0xe0310000, 0x781d0000, - 0x807cff7c, 0x00000080, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff8, - 0xbf820008, 0xe0310000, - 0x781d0000, 0x807cff7c, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fff8, 0xbef80380, + 0x10807fac, 0xb97202dc, + 0x8f729972, 0x876eff7f, + 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f2a05, 0x806f816f, - 0x8f6f826f, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbf850021, 0xbef603ff, - 0x01000000, 0xbeee0378, + 0xb96f4306, 0x876fc16f, + 0xbf840029, 0x8f6f866f, + 0x8f6f826f, 0xbef6036f, + 0xb9782a05, 0x80788178, + 0xbf0d9972, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb96e1e06, + 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xe0304000, - 0x785d0000, 0xe0304080, - 0x785d0100, 0xe0304100, - 0x785d0200, 0xe0304180, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85ffee, 0xe0304000, - 0x6e5d0000, 0xe0304080, - 0x6e5d0100, 0xe0304100, - 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf820032, + 0x8078ff78, 0x00000080, + 0xbef603ff, 0x01000000, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850009, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000080, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff8, 0xbf820008, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7c, 0xbf85fff8, + 0xbef80380, 0xbefe03c1, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f2a05, + 0x806f816f, 0x8f6f826f, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850024, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000200, 0xbefc0384, + 0xbf0a6f7c, 0xbf840050, + 0xe0304000, 0x785d0000, + 0xe0304080, 0x785d0100, + 0xe0304100, 0x785d0200, + 0xe0304180, 0x785d0300, + 0xbf8c3f70, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85ffee, + 0xe0304000, 0x6e5d0000, + 0xe0304080, 0x6e5d0100, + 0xe0304100, 0x6e5d0200, + 0xe0304180, 0x6e5d0300, + 0xbf8c3f70, 0xbf820034, 0xbef603ff, 0x01000000, 0xbeee0378, 0x8078ff78, 0x00000400, 0xbefc0384, + 0xbf0a6f7c, 0xbf840012, 0xe0304000, 0x785d0000, 0xe0304100, 0x785d0100, 0xe0304200, 0x785d0200, @@ -1060,233 +1072,173 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb96e2a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, - 0x8f6e8a6e, 0x806eff6e, - 0x00000200, 0x806e746e, - 0x826f8075, 0x876fff6f, - 0x0000ffff, 0xf4091c37, - 0xfa000050, 0xf4091d37, - 0xfa000060, 0xf4011e77, - 0xfa000074, 0xbf8cc07f, - 0x876fff6d, 0xfc000000, - 0x906f9a6f, 0x8f6f906f, - 0xbeee0380, 0x886e6f6e, - 0x876fff6d, 0x02000000, - 0x906f996f, 0x8f6f8f6f, - 0x886e6f6e, 0x876fff6d, - 0x01000000, 0x906f986f, - 0x8f6f996f, 0x886e6f6e, - 0x876fff7a, 0x00800000, - 0x906f976f, 0xb9eef807, - 0x876dff6d, 0x0000ffff, - 0x87fe7e7e, 0x87ea6a6a, - 0xb9faf802, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0x8f6e8a6e, 0xb96f1e06, + 0x8f6f8a6f, 0x806e6f6e, + 0x806eff6e, 0x00000200, + 0x806e746e, 0x826f8075, + 0x876fff6f, 0x0000ffff, + 0xf4091c37, 0xfa000050, + 0xf4091d37, 0xfa000060, + 0xf4011e77, 0xfa000074, + 0xbf8cc07f, 0x906e8977, + 0x876fff6e, 0x003f8000, + 0x906e8677, 0x876eff6e, + 0x02000000, 0x886e6f6e, + 0xb9eef807, 0x876dff6d, + 0x0000ffff, 0x87fe7e7e, + 0x87ea6a6a, 0xb9faf802, + 0xbe80226c, 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202c4, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf8202d0, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0x8e7a817a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x87777a77, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x87777a77, 0xbef1007c, - 0xbef00080, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, - 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, - 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840064, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf840060, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, 0xb8f02a05, 0x80708170, 0x8e708a70, 0x8e708170, 0xb8fa1605, 0x807a817a, 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, + 0x10000000, 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1306,24 +1258,86 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840064, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf840060, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2a05, - 0x807b817b, 0x8e7b827b, - 0x8e76887b, 0xbef600ff, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2a05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, 0x01000000, 0xbefc0084, 0xbf0a7b7c, 0xbf84006d, 0xbf11017c, 0x807bff7b, @@ -1440,15 +1454,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf820106, + 0xbf9c0000, 0xbf8200e3, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, 0x866eff7f, 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, 0xb8ef4306, @@ -1466,26 +1476,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x807cff7c, 0x00000200, 0x8078ff78, 0x00000200, 0xbf0a6f7c, 0xbf85fff6, - 0xbef80080, 0xbefe00c1, - 0xbeff00c1, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x8e76886f, 0xbef90076, + 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbef30079, - 0x8079ff79, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x806fff6f, 0x00008000, - 0xe0524000, 0x791d0000, - 0xe0524100, 0x791d0100, - 0xe0524200, 0x791d0200, - 0xe0524300, 0x791d0300, - 0x8079ff79, 0x00000400, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0xe0524000, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, @@ -1494,20 +1492,24 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffdb, 0xbf9c0000, - 0xe0524000, 0x731d0000, - 0xe0524100, 0x731d0100, - 0xe0524200, 0x731d0200, - 0xe0524300, 0x731d0300, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xb8f82a05, + 0xbf85ffee, 0xbefc0080, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0xd3d94000, 0x18000100, + 0xd3d94001, 0x18000101, + 0xd3d94002, 0x18000102, + 0xd3d94003, 0x18000103, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffea, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0x8e788178, 0xb8ee1605, 0x806e816e, 0x8e6e866e, @@ -1559,224 +1561,162 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc00b1c37, 0x00000050, 0xc00b1d37, 0x00000060, 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x866fff6d, - 0xf8000000, 0x8f6f9b6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x04000000, 0x8f6f9a6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff7a, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e837a, 0xb96ee0c2, - 0xbf800002, 0xb97a0002, - 0xbf8a0000, 0x95806f6c, - 0xbf810000, 0x00000000, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e837a, + 0xb96ee0c2, 0xbf800002, + 0xb97a0002, 0xbf8a0000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202ce, - 0xb8f8f802, 0x89788678, - 0xb8eef801, 0x866eff6e, - 0x00000800, 0xbf840003, + 0xbf820001, 0xbf8202db, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840016, 0xb8fbf803, + 0xbf840009, 0x866eff6d, + 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf85003b, 0x866eff7b, - 0x00000800, 0xbf850003, - 0x866eff7b, 0x00000100, - 0xbf84000c, 0x866eff78, - 0x00002000, 0xbf840005, - 0xbf8e0010, 0xb8eef803, - 0x866eff6e, 0x00000400, - 0xbf84fffb, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xb8eef807, - 0x866fff6e, 0x001f8000, - 0x8e6f8b6f, 0x8977ff77, - 0xfc000000, 0x87776f77, - 0x896eff6e, 0x001f8000, - 0xb96ef807, 0xb8faf812, + 0xbf850051, 0xbf8e0010, + 0xb8fbf803, 0xbf82fffa, + 0x866eff7b, 0x00000900, + 0xbf850015, 0x866eff7b, + 0x000071ff, 0xbf840008, + 0x866fff7b, 0x00007080, + 0xbf840001, 0xbeee1a87, + 0xb8eff801, 0x8e6e8c6e, + 0x866e6f6e, 0xbf85000a, + 0x866eff6d, 0x00ff0000, + 0xbf850007, 0xb8eef801, + 0x866eff6e, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000400, 0xbf850036, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xb8faf812, 0xb8fbf813, 0x8efa887a, - 0xc0071bbd, 0x00000000, - 0xbf8cc07f, 0xc0071ebd, - 0x00000008, 0xbf8cc07f, - 0x86ee6e6e, 0xbf840001, - 0xbe801d6e, 0xb8fbf803, - 0x867bff7b, 0x000001ff, + 0xc0031bbd, 0x00000010, + 0xbf8cc07f, 0x8e6e976e, + 0x8977ff77, 0x00800000, + 0x87776e77, 0xc0071bbd, + 0x00000000, 0xbf8cc07f, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0x866eff6d, 0x01ff0000, + 0xbf850005, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x866eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x86fe7e7e, + 0x0000ffff, 0x8f7a8b77, + 0x867aff7a, 0x001f8000, + 0xb97af807, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbefa0080, 0xb97a0283, - 0xb8fa2407, 0x8e7a9b7a, - 0x876d7a6d, 0xb8fa03c7, - 0x8e7a9a7a, 0x876d7a6d, 0xb8faf807, 0x867aff7a, - 0x00007fff, 0xb97af807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x877a8478, 0xb97af802, - 0xbf8e0002, 0xbf88fffe, - 0xb8fa2985, 0x807a817a, - 0x8e7a8a7a, 0x8e7a817a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x87777a77, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x87777a77, 0xbef1007c, - 0xbef00080, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bba, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8fbf803, 0xbefe007c, - 0xbefc0070, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0xb8f1f801, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, - 0x867aff7f, 0x04000000, - 0xbeef0080, 0x876f6f7a, - 0xb8f02985, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, - 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, - 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840064, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf840060, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, + 0x001f8000, 0x8e7a8b7a, + 0x8977ff77, 0xfc000000, + 0x87777a77, 0xba7ff807, + 0x00000000, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2985, + 0x807a817a, 0x8e7a8a7a, + 0x8e7a817a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, 0xb8f02985, 0x80708170, 0x8e708a70, 0x8e708170, 0xb8fa1605, 0x807a817a, 0x8e7a867a, 0x80707a70, - 0x8070ff70, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0x867aff78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, - 0x00000002, 0xbf8cc07f, + 0x10000000, 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1796,31 +1736,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, - 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, - 0xbef70000, 0xbef000ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8fb2b05, - 0x807b817b, 0x8e7b827b, - 0xbef600ff, 0x01000000, - 0xbefc0084, 0xbf0a7b7c, - 0xbf84006d, 0xbf11017c, - 0x807bff7b, 0x00001000, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840064, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf840060, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850051, 0xbe840080, + 0xbf850030, 0x24040682, + 0xd86e4000, 0x00000002, + 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -1839,51 +1798,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, - 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, - 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, - 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffb1, - 0xbf9c0000, 0xbf820012, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0x807c847c, 0x8070ff70, - 0x00000400, 0xbf0a7b7c, - 0xbf85ffef, 0xbf9c0000, - 0xb8fb2985, 0x807b817b, - 0x8e7b837b, 0xb8fa2b05, - 0x807a817a, 0x8e7a827a, - 0x80fb7a7b, 0x867b7b7b, - 0xbf84007a, 0x807bff7b, - 0x00001000, 0xbefc0080, - 0xbf11017c, 0x867aff78, + 0xbf84ffee, 0x680404ff, + 0x00000200, 0xd0c9006a, + 0x0000f702, 0xbf87ffd2, + 0xbf820015, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, + 0xd0c9006a, 0x0000f702, + 0xbf87fff7, 0xbef70000, + 0xbef000ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb2b05, 0x807b817b, + 0x8e7b827b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf84006d, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850059, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0x10000000, 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1923,169 +1862,241 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, 0xbf0a7b7c, - 0xbf85ffa9, 0xbf9c0000, - 0xbf820016, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xe0724000, + 0xbf85ffb1, 0xbf9c0000, + 0xbf820012, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf820101, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, - 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf84001a, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xb8fb2985, + 0x807b817b, 0x8e7b837b, + 0xb8fa2b05, 0x807a817a, + 0x8e7a827a, 0x80fb7a7b, + 0x867b7b7b, 0xbf84007a, + 0x807bff7b, 0x00001000, + 0xbefc0080, 0xbf11017c, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850059, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffa9, + 0xbf9c0000, 0xbf820016, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffeb, 0xbf9c0000, + 0xbf8200ee, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001f, 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf84001a, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xb8ef2b05, 0x806f816f, - 0x8e6f826f, 0x806fff6f, - 0x00008000, 0xbef80080, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2b05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xb8ef2985, 0x806f816f, + 0x8e6f836f, 0xb8f92b05, + 0x80798179, 0x8e798279, + 0x80ef796f, 0x866f6f6f, + 0xbf84001a, 0x806fff6f, + 0x00008000, 0xbefc0080, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, + 0xd3d94000, 0x18000100, + 0xd3d94001, 0x18000101, + 0xd3d94002, 0x18000102, + 0xd3d94003, 0x18000103, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xb8ef2985, - 0x806f816f, 0x8e6f836f, - 0xb8f92b05, 0x80798179, - 0x8e798279, 0x80ef796f, - 0x866f6f6f, 0xbf84001a, - 0x806fff6f, 0x00008000, - 0xbefc0080, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0xd3d94000, - 0x18000100, 0xd3d94001, - 0x18000101, 0xd3d94002, - 0x18000102, 0xd3d94003, - 0x18000103, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffea, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xbf8c0f70, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x80f8c078, 0xb8ef1605, - 0x806f816f, 0x8e6f846f, - 0x8e76826f, 0xbef600ff, - 0x01000000, 0xbefc006f, - 0xc031003a, 0x00000078, - 0x80f8c078, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe802d00, 0xbe822d02, - 0xbe842d04, 0xbe862d06, - 0xbe882d08, 0xbe8a2d0a, - 0xbe8c2d0c, 0xbe8e2d0e, - 0xbf06807c, 0xbf84fff0, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0xbf85ffea, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe0070, - 0xbeff0071, 0x866f7bff, - 0x000003ff, 0xb96f4803, - 0x866f7bff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2985, - 0x806e816e, 0x8e6e8a6e, - 0x8e6e816e, 0xb8ef1605, - 0x806f816f, 0x8e6f866f, - 0x806e6f6e, 0x806e746e, - 0x826f8075, 0x866fff6f, - 0x0000ffff, 0xc00b1c37, - 0x00000050, 0xc00b1d37, - 0x00000060, 0xc0031e77, - 0x00000074, 0xbf8cc07f, - 0x866fff6d, 0xf8000000, - 0x8f6f9b6f, 0x8e6f906f, - 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x04000000, - 0x8f6f9a6f, 0x8e6f8f6f, - 0x876e6f6e, 0x866fff7a, - 0x00800000, 0x8f6f976f, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2985, 0x806e816e, + 0x8e6e8a6e, 0x8e6e816e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, 0xb96ef807, 0x866dff6d, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0x95806f6c, 0xbf810000, + 0xbe801f6c, 0xbf810000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { - 0xbf820001, 0xbf8201cf, + 0xbf820001, 0xbf82021c, 0xb0804004, 0xb978f802, - 0x8a788678, 0xb96ef801, - 0x876eff6e, 0x00000800, - 0xbf840003, 0x876eff78, + 0x8a78ff78, 0x00020006, + 0xb97bf803, 0x876eff78, 0x00002000, 0xbf840009, - 0xb97bf803, 0x876eff7b, - 0x00000400, 0xbf85001d, - 0x876eff7b, 0x00000100, - 0xbf840002, 0x8878ff78, - 0x00002000, 0xb97af812, + 0x876eff6d, 0x00ff0000, + 0xbf85001e, 0x876eff7b, + 0x00000400, 0xbf850041, + 0xbf8e0010, 0xb97bf803, + 0xbf82fffa, 0x876eff7b, + 0x00000900, 0xbf850015, + 0x876eff7b, 0x000071ff, + 0xbf840008, 0x876fff7b, + 0x00007080, 0xbf840001, + 0xbeee1d87, 0xb96ff801, + 0x8f6e8c6e, 0x876e6f6e, + 0xbf85000a, 0x876eff6d, + 0x00ff0000, 0xbf850007, + 0xb96ef801, 0x876eff6e, + 0x00000800, 0xbf850003, + 0x876eff7b, 0x00000400, + 0xbf850026, 0xb97af812, 0xb97bf813, 0x8ffa887a, - 0xf4051bbd, 0xfa000000, - 0xbf8cc07f, 0xf4051ebd, - 0xfa000008, 0xbf8cc07f, - 0x87ee6e6e, 0xbf840001, - 0xbe80206e, 0xb97bf803, - 0x877bff7b, 0x000001ff, + 0xf4011bbd, 0xfa000010, + 0xbf8cc07f, 0x8f6e976e, + 0x8a77ff77, 0x00800000, + 0x88776e77, 0xf4051bbd, + 0xfa000000, 0xbf8cc07f, + 0xf4051ebd, 0xfa000008, + 0xbf8cc07f, 0x87ee6e6e, + 0xbf840001, 0xbe80206e, + 0x876eff6d, 0x01ff0000, + 0xbf850005, 0x8878ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xbf820005, + 0x876eff6d, 0x01000000, 0xbf850002, 0x806c846c, 0x826d806d, 0x876dff6d, 0x0000ffff, 0x87fe7e7e, @@ -2095,37 +2106,55 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb9fa0283, 0xbeee037e, 0xbeef037f, 0xbefe0480, 0xbf900004, 0xbf8cc07f, + 0x877aff7f, 0x04000000, + 0x8f7a857a, 0x886d7a6d, + 0xbefa037e, 0x877bff7f, + 0x0000ffff, 0xbefe03c1, + 0xbeff03c1, 0xdc5f8000, + 0x007a0000, 0x7e000280, + 0xbefe037a, 0xbeff037b, 0xb97b02dc, 0x8f7b997b, - 0x887b7b7f, 0xb97a2a05, - 0x807a817a, 0xbf0d997b, - 0xbf850002, 0x8f7a897a, - 0xbf820001, 0x8f7a8a7a, + 0xb97a2a05, 0x807a817a, + 0xbf0d997b, 0xbf850002, + 0x8f7a897a, 0xbf820001, + 0x8f7a8a7a, 0xb97b1e06, + 0x8f7b8a7b, 0x807a7b7a, 0x877bff7f, 0x0000ffff, 0x807aff7a, 0x00000200, 0x807a7e7a, 0x827b807b, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0x877aff7f, 0x08000000, - 0x907a837a, 0x88777a77, - 0x877aff7f, 0x70000000, - 0x907a817a, 0x88777a77, - 0xbef1037c, 0xbef00380, - 0xb97302dc, 0x8f739973, - 0x8873737f, 0xbefe03c1, + 0xd7610000, 0x00010870, + 0xd7610000, 0x00010a71, + 0xd7610000, 0x00010c72, + 0xd7610000, 0x00010e73, + 0xd7610000, 0x00011074, + 0xd7610000, 0x00011275, + 0xd7610000, 0x00011476, + 0xd7610000, 0x00011677, + 0xd7610000, 0x00011a79, + 0xd7610000, 0x00011c7e, + 0xd7610000, 0x00011e7f, + 0xbefe03ff, 0x00003fff, + 0xbeff0380, 0xdc5f8040, + 0x007a0000, 0xd760007a, + 0x00011d00, 0xd760007b, + 0x00011f00, 0xbefe037a, + 0xbeff037b, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xbef1037c, + 0xbef00380, 0xb97302dc, + 0x8f739973, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820002, - 0xbeff03c1, 0xbf82000b, + 0xbeff03c1, 0xbf820009, 0xbef603ff, 0x01000000, - 0xe0704000, 0x705d0000, 0xe0704080, 0x705d0100, 0xe0704100, 0x705d0200, 0xe0704180, 0x705d0300, - 0xbf82000a, 0xbef603ff, - 0x01000000, 0xe0704000, - 0x705d0000, 0xe0704100, + 0xbf820008, 0xbef603ff, + 0x01000000, 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, 0x705d0300, 0xb9702a05, @@ -2140,8 +2169,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbefc0380, 0xd7610002, 0x0000f871, 0x807c817c, 0xd7610002, 0x0000f86c, - 0x807c817c, 0xd7610002, - 0x0000f86d, 0x807c817c, + 0x807c817c, 0x8a7aff6d, + 0x80000000, 0xd7610002, + 0x0000f87a, 0x807c817c, 0xd7610002, 0x0000f86e, 0x807c817c, 0xd7610002, 0x0000f86f, 0x807c817c, @@ -2156,160 +2186,157 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x0000f871, 0x807c817c, 0xb971f815, 0xd7610002, 0x0000f871, 0x807c817c, + 0xbefe03ff, 0x0000ffff, 0xbeff0380, 0xe0704000, - 0x705d0200, 0xb9702a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0xbef603ff, - 0x01000000, 0xbef90380, - 0xbefc0380, 0xbf800000, - 0xbe802f00, 0xbe822f02, - 0xbe842f04, 0xbe862f06, - 0xbe882f08, 0xbe8a2f0a, - 0xbe8c2f0c, 0xbe8e2f0e, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, - 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, - 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, - 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, - 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xd7610002, 0x0000f20c, - 0x80798179, 0xd7610002, - 0x0000f20d, 0x80798179, - 0xd7610002, 0x0000f20e, - 0x80798179, 0xd7610002, - 0x0000f20f, 0x80798179, - 0xbf06a079, 0xbf840006, - 0xe0704000, 0x705d0200, - 0x8070ff70, 0x00000080, - 0xbef90380, 0x7e040280, - 0x807c907c, 0xbf0aff7c, - 0x00000060, 0xbf85ffbc, - 0xbe802f00, 0xbe822f02, - 0xbe842f04, 0xbe862f06, - 0xbe882f08, 0xbe8a2f0a, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, - 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, - 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, - 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, - 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xe0704000, 0x705d0200, - 0xbefe03c1, 0x907c9973, - 0x877c817c, 0xbf06817c, - 0xbf850002, 0xbeff0380, - 0xbf820001, 0xbeff03c1, - 0xb97b4306, 0x877bc17b, - 0xbf840044, 0xbf8a0000, - 0x877aff73, 0x04000000, - 0xbf840040, 0x8f7b867b, - 0x8f7b827b, 0xbef6037b, + 0x705d0200, 0xbefe03c1, 0xb9702a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, 0x8f7a8a7a, 0x80707a70, - 0x8070ff70, 0x00000200, - 0x8070ff70, 0x00000080, 0xbef603ff, 0x01000000, - 0xd7650000, 0x000100c1, - 0xd7660000, 0x000200c1, - 0x16000084, 0x907c9973, + 0xbef90380, 0xbefc0380, + 0xbf800000, 0xbe802f00, + 0xbe822f02, 0xbe842f04, + 0xbe862f06, 0xbe882f08, + 0xbe8a2f0a, 0xbe8c2f0c, + 0xbe8e2f0e, 0xd7610002, + 0x0000f200, 0x80798179, + 0xd7610002, 0x0000f201, + 0x80798179, 0xd7610002, + 0x0000f202, 0x80798179, + 0xd7610002, 0x0000f203, + 0x80798179, 0xd7610002, + 0x0000f204, 0x80798179, + 0xd7610002, 0x0000f205, + 0x80798179, 0xd7610002, + 0x0000f206, 0x80798179, + 0xd7610002, 0x0000f207, + 0x80798179, 0xd7610002, + 0x0000f208, 0x80798179, + 0xd7610002, 0x0000f209, + 0x80798179, 0xd7610002, + 0x0000f20a, 0x80798179, + 0xd7610002, 0x0000f20b, + 0x80798179, 0xd7610002, + 0x0000f20c, 0x80798179, + 0xd7610002, 0x0000f20d, + 0x80798179, 0xd7610002, + 0x0000f20e, 0x80798179, + 0xd7610002, 0x0000f20f, + 0x80798179, 0xbf06a079, + 0xbf840006, 0xe0704000, + 0x705d0200, 0x8070ff70, + 0x00000080, 0xbef90380, + 0x7e040280, 0x807c907c, + 0xbf0aff7c, 0x00000060, + 0xbf85ffbc, 0xbe802f00, + 0xbe822f02, 0xbe842f04, + 0xbe862f06, 0xbe882f08, + 0xbe8a2f0a, 0xd7610002, + 0x0000f200, 0x80798179, + 0xd7610002, 0x0000f201, + 0x80798179, 0xd7610002, + 0x0000f202, 0x80798179, + 0xd7610002, 0x0000f203, + 0x80798179, 0xd7610002, + 0x0000f204, 0x80798179, + 0xd7610002, 0x0000f205, + 0x80798179, 0xd7610002, + 0x0000f206, 0x80798179, + 0xd7610002, 0x0000f207, + 0x80798179, 0xd7610002, + 0x0000f208, 0x80798179, + 0xd7610002, 0x0000f209, + 0x80798179, 0xd7610002, + 0x0000f20a, 0x80798179, + 0xd7610002, 0x0000f20b, + 0x80798179, 0xe0704000, + 0x705d0200, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb97b4306, + 0x877bc17b, 0xbf840044, + 0xbf8a0000, 0x877aff6d, + 0x80000000, 0xbf840040, + 0x8f7b867b, 0x8f7b827b, + 0xbef6037b, 0xb9702a05, + 0x80708170, 0xbf0d9973, + 0xbf850002, 0x8f708970, + 0xbf820001, 0x8f708a70, + 0xb97a1e06, 0x8f7a8a7a, + 0x80707a70, 0x8070ff70, + 0x00000200, 0x8070ff70, + 0x00000080, 0xbef603ff, + 0x01000000, 0xd7650000, + 0x000100c1, 0xd7660000, + 0x000200c1, 0x16000084, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850012, 0xbe8303ff, + 0x00000080, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xe0704000, + 0x705d0100, 0x807c037c, + 0x80700370, 0xd5250000, + 0x0001ff00, 0x00000080, + 0xbf0a7b7c, 0xbf85fff4, + 0xbf820011, 0xbe8303ff, + 0x00000100, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xe0704000, + 0x705d0100, 0x807c037c, + 0x80700370, 0xd5250000, + 0x0001ff00, 0x00000100, + 0xbf0a7b7c, 0xbf85fff4, + 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850012, - 0xbe8303ff, 0x00000080, - 0xbf800000, 0xbf800000, - 0xbf800000, 0xd8d80000, - 0x01000000, 0xbf8c0000, - 0xe0704000, 0x705d0100, - 0x807c037c, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7c, - 0xbf85fff4, 0xbf820011, - 0xbe8303ff, 0x00000100, - 0xbf800000, 0xbf800000, - 0xbf800000, 0xd8d80000, - 0x01000000, 0xbf8c0000, - 0xe0704000, 0x705d0100, - 0x807c037c, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000100, 0xbf0a7b7c, - 0xbf85fff4, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850004, - 0xbef003ff, 0x00000200, - 0xbeff0380, 0xbf820003, - 0xbef003ff, 0x00000400, - 0xbeff03c1, 0xb97b2a05, - 0x807b817b, 0x8f7b827b, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850017, - 0xbef603ff, 0x01000000, - 0xbefc0384, 0xbf0a7b7c, - 0xbf840037, 0x7e008700, - 0x7e028701, 0x7e048702, - 0x7e068703, 0xe0704000, - 0x705d0000, 0xe0704080, - 0x705d0100, 0xe0704100, - 0x705d0200, 0xe0704180, - 0x705d0300, 0x807c847c, - 0x8070ff70, 0x00000200, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf820025, 0xbef603ff, + 0xbf850004, 0xbef003ff, + 0x00000200, 0xbeff0380, + 0xbf820003, 0xbef003ff, + 0x00000400, 0xbeff03c1, + 0xb97b2a05, 0x807b817b, + 0x8f7b827b, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850017, 0xbef603ff, 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840020, + 0xbf0a7b7c, 0xbf840037, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, - 0xe0704100, 0x705d0100, - 0xe0704200, 0x705d0200, - 0xe0704300, 0x705d0300, + 0xe0704080, 0x705d0100, + 0xe0704100, 0x705d0200, + 0xe0704180, 0x705d0300, 0x807c847c, 0x8070ff70, - 0x00000400, 0xbf0a7b7c, - 0xbf85ffef, 0xb97b1e06, - 0x877bc17b, 0xbf84000c, - 0x8f7b837b, 0x807b7c7b, - 0xbefe03c1, 0xbeff0380, - 0x7e008700, 0xe0704000, - 0x705d0000, 0x807c817c, - 0x8070ff70, 0x00000080, - 0xbf0a7b7c, 0xbf85fff8, - 0xbf82013c, 0xbef4037e, - 0x8775ff7f, 0x0000ffff, - 0x8875ff75, 0x00040000, - 0xbef60380, 0xbef703ff, - 0x10807fac, 0x876eff7f, - 0x08000000, 0x906e836e, - 0x88776e77, 0x876eff7f, - 0x70000000, 0x906e816e, - 0x88776e77, 0xb97202dc, - 0x8f729972, 0x8872727f, + 0x00000200, 0xbf0a7b7c, + 0xbf85ffef, 0xbf820025, + 0xbef603ff, 0x01000000, + 0xbefc0384, 0xbf0a7b7c, + 0xbf840011, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xe0704000, + 0x705d0000, 0xe0704100, + 0x705d0100, 0xe0704200, + 0x705d0200, 0xe0704300, + 0x705d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffef, + 0xb97b1e06, 0x877bc17b, + 0xbf84000c, 0x8f7b837b, + 0x807b7c7b, 0xbefe03c1, + 0xbeff0380, 0x7e008700, + 0xe0704000, 0x705d0000, + 0x807c817c, 0x8070ff70, + 0x00000080, 0xbf0a7b7c, + 0xbf85fff8, 0xbf82013b, + 0xbef4037e, 0x8775ff7f, + 0x0000ffff, 0x8875ff75, + 0x00040000, 0xbef60380, + 0xbef703ff, 0x10807fac, + 0xb97202dc, 0x8f729972, 0x876eff7f, 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, @@ -2345,10 +2372,11 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb96f2a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, - 0xbf850021, 0xbef603ff, + 0xbf850024, 0xbef603ff, 0x01000000, 0xbeee0378, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xe0304000, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840050, 0xe0304000, 0x785d0000, 0xe0304080, 0x785d0100, 0xe0304100, 0x785d0200, 0xe0304180, @@ -2361,94 +2389,97 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x6e5d0000, 0xe0304080, 0x6e5d0100, 0xe0304100, 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf820032, - 0xbef603ff, 0x01000000, - 0xbeee0378, 0x8078ff78, - 0x00000400, 0xbefc0384, - 0xe0304000, 0x785d0000, - 0xe0304100, 0x785d0100, - 0xe0304200, 0x785d0200, - 0xe0304300, 0x785d0300, - 0xbf8c3f70, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb96f1e06, 0x876fc16f, - 0xbf84000e, 0x8f6f836f, - 0x806f7c6f, 0xbefe03c1, - 0xbeff0380, 0xe0304000, - 0x785d0000, 0xbf8c3f70, - 0x7e008500, 0x807c817c, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff7, - 0xbeff03c1, 0xe0304000, - 0x6e5d0000, 0xe0304100, - 0x6e5d0100, 0xe0304200, - 0x6e5d0200, 0xe0304300, 0x6e5d0300, 0xbf8c3f70, + 0xbf820034, 0xbef603ff, + 0x01000000, 0xbeee0378, + 0x8078ff78, 0x00000400, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840012, 0xe0304000, + 0x785d0000, 0xe0304100, + 0x785d0100, 0xe0304200, + 0x785d0200, 0xe0304300, + 0x785d0300, 0xbf8c3f70, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb96f1e06, + 0x876fc16f, 0xbf84000e, + 0x8f6f836f, 0x806f7c6f, + 0xbefe03c1, 0xbeff0380, + 0xe0304000, 0x785d0000, + 0xbf8c3f70, 0x7e008500, + 0x807c817c, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff7, 0xbeff03c1, + 0xe0304000, 0x6e5d0000, + 0xe0304100, 0x6e5d0100, + 0xe0304200, 0x6e5d0200, + 0xe0304300, 0x6e5d0300, + 0xbf8c3f70, 0xb9782a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x80f8ff78, + 0x00000050, 0xbef603ff, + 0x01000000, 0xbefc03ff, + 0x0000006c, 0x80f89078, + 0xf429003a, 0xf0000000, + 0xbf8cc07f, 0x80fc847c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0x80f8a078, + 0xf42d003a, 0xf0000000, + 0xbf8cc07f, 0x80fc887c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0x80f8c078, + 0xf431003a, 0xf0000000, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0xbe883108, + 0xbe8a310a, 0xbe8c310c, + 0xbe8e310e, 0xbf06807c, + 0xbf84fff0, 0xba80f801, + 0x00000000, 0xbf8a0000, 0xb9782a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x80f8ff78, 0x00000050, 0xbef603ff, 0x01000000, - 0xbefc03ff, 0x0000006c, - 0x80f89078, 0xf429003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc847c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0x80f8a078, 0xf42d003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc887c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0x80f8c078, 0xf431003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0xbe883108, 0xbe8a310a, - 0xbe8c310c, 0xbe8e310e, - 0xbf06807c, 0xbf84fff0, - 0xba80f801, 0x00000000, - 0xbf8a0000, 0xb9782a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef603ff, - 0x01000000, 0xf4211bfa, + 0xf4211bfa, 0xf0000000, + 0x80788478, 0xf4211b3a, 0xf0000000, 0x80788478, - 0xf4211b3a, 0xf0000000, - 0x80788478, 0xf4211b7a, + 0xf4211b7a, 0xf0000000, + 0x80788478, 0xf4211c3a, 0xf0000000, 0x80788478, - 0xf4211c3a, 0xf0000000, - 0x80788478, 0xf4211c7a, + 0xf4211c7a, 0xf0000000, + 0x80788478, 0xf4211eba, 0xf0000000, 0x80788478, - 0xf4211eba, 0xf0000000, - 0x80788478, 0xf4211efa, + 0xf4211efa, 0xf0000000, + 0x80788478, 0xf4211e7a, 0xf0000000, 0x80788478, - 0xf4211e7a, 0xf0000000, - 0x80788478, 0xf4211cfa, + 0xf4211cfa, 0xf0000000, + 0x80788478, 0xf4211bba, 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef814, 0xf4211bba, - 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef815, - 0xbefc036f, 0xbefe0370, - 0xbeff0371, 0x876f7bff, - 0x000003ff, 0xb9ef4803, - 0x876f7bff, 0xfffff800, - 0x906f8b6f, 0xb9efa2c3, - 0xb9f3f801, 0xb96e2a05, - 0x806e816e, 0xbf0d9972, - 0xbf850002, 0x8f6e896e, - 0xbf820001, 0x8f6e8a6e, + 0xb9eef815, 0xbefc036f, + 0xbefe0370, 0xbeff0371, + 0x876f7bff, 0x000003ff, + 0xb9ef4803, 0x876f7bff, + 0xfffff800, 0x906f8b6f, + 0xb9efa2c3, 0xb9f3f801, + 0xb96e2a05, 0x806e816e, + 0xbf0d9972, 0xbf850002, + 0x8f6e896e, 0xbf820001, + 0x8f6e8a6e, 0xb96f1e06, + 0x8f6f8a6f, 0x806e6f6e, 0x806eff6e, 0x00000200, 0x806e746e, 0x826f8075, 0x876fff6f, 0x0000ffff, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 5081f91190b8..0348191e8592 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -35,10 +35,9 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 +var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -52,8 +51,10 @@ var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF +var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF @@ -63,46 +64,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 +var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 + +var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12 +var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19 -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25 -var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1 var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000 -var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 -var SQ_WAVE_IB_STS_RCNT_SIZE = 6 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000 var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24 var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000 +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 // SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] // when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 +var S_SAVE_PC_HI_HT_MASK = 0x01000000 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000 -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25 -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000 -var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24 -var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000 +var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000 +var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31 var s_sgpr_save_num = 108 @@ -130,19 +122,10 @@ var s_save_ttmps_hi = s_save_trapsts var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 var S_WAVE_SIZE = 25 -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - var s_restore_spi_init_lo = exec_lo var s_restore_spi_init_hi = exec_hi var s_restore_mem_offset = ttmp12 @@ -179,51 +162,77 @@ L_JUMP_TO_RESTORE: L_SKIP_RESTORE: s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK - -if SINGLE_STEP_MISSED_WORKAROUND - // No single step exceptions if MODE.DEBUG_EN=0. - s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK - s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND - - // Second-level trap already handled exception if STATUS.HALT=1. - s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK - - // Prioritize single step exception over context save. - // Second-level trap will halt wave and RFE, re-entering for SAVECTX. - s_cbranch_scc0 L_FETCH_2ND_TRAP - -L_NO_SINGLE_STEP_WORKAROUND: -end + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_SAVE: + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK s_cbranch_scc1 L_SAVE - // If STATUS.MEM_VIOL is asserted then halt the wave to prevent - // the exception raising again and blocking context save. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_FETCH_2ND_TRAP - s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. + s_sleep 0x10 + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_branch L_CHECK_SAVE + +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. + + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_CHECK_TRAP_ID + + s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_MODE) + s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT + s_and_b32 ttmp2, ttmp2, ttmp3 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +end + + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: - #if ASIC_TARGET_NAVI1X - // Preserve and clear scalar XNACK state before issuing scalar loads. - // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into - // unused space ttmp11[31:24]. - s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) - s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) - s_or_b32 ttmp11, ttmp11, ttmp3 - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_or_b32 ttmp11, ttmp11, ttmp3 - s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + save_and_clear_ib_sts(ttmp14, ttmp15) #endif // Read second-level TBA/TMA from first-level TMA and jump if available. @@ -232,31 +241,49 @@ L_FETCH_2ND_TRAP: s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag + s_waitcnt lgkmcnt(0) + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler L_NO_NEXT_TRAP: - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK - s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. - s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 - s_addc_u32 ttmp1, ttmp1, 0 -L_EXCP_CASE: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK) + s_cbranch_scc1 L_TRAP_CASE + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Host trap will not cause trap re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK + s_cbranch_scc1 L_EXIT_TRAP + + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF #if ASIC_TARGET_NAVI1X - // Restore SQ_WAVE_IB_STS. - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK - s_or_b32 ttmp2, ttmp2, ttmp3 - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + restore_ib_sts(ttmp14, ttmp15) #endif // Restore SQ_WAVE_STATUS. @@ -272,19 +299,7 @@ L_SAVE: s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit #if ASIC_TARGET_NAVI1X - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE) - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + save_and_clear_ib_sts(s_save_tmp, s_save_trapsts) #endif /* inform SPI the readiness and wait for SPI's go signal */ @@ -305,16 +320,57 @@ L_SLEEP: s_waitcnt lgkmcnt(0) #endif + // Save first_wave flag so we can clear high bits of save address. + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT) + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + +#if NO_SQC_STORE + // Trap temporaries must be saved via VGPR but all VGPRs are in use. + // There is no ttmp space to hold the resource constant for VGPR save. + // Save v0 by itself since it requires only two SGPRs. + s_mov_b32 s_save_ttmps_lo, exec_lo + s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0xFFFFFFFF + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1 + v_mov_b32 v0, 0x0 + s_mov_b32 exec_lo, s_save_ttmps_lo + s_mov_b32 exec_hi, s_save_ttmps_hi +#endif + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic - // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 get_wave_size(s_save_ttmps_hi) get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi) + get_svgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes() s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0 -#if ASIC_TARGET_NAVI1X +#if NO_SQC_STORE + v_writelane_b32 v0, ttmp4, 0x4 + v_writelane_b32 v0, ttmp5, 0x5 + v_writelane_b32 v0, ttmp6, 0x6 + v_writelane_b32 v0, ttmp7, 0x7 + v_writelane_b32 v0, ttmp8, 0x8 + v_writelane_b32 v0, ttmp9, 0x9 + v_writelane_b32 v0, ttmp10, 0xA + v_writelane_b32 v0, ttmp11, 0xB + v_writelane_b32 v0, ttmp13, 0xD + v_writelane_b32 v0, exec_lo, 0xE + v_writelane_b32 v0, exec_hi, 0xF + + s_mov_b32 exec_lo, 0x3FFF + s_mov_b32 exec_hi, 0x0 + global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1 + v_readlane_b32 ttmp14, v0, 0xE + v_readlane_b32 ttmp15, v0, 0xF + s_mov_b32 exec_lo, ttmp14 + s_mov_b32 exec_hi, ttmp15 +#else s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1 s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1 s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1 @@ -326,12 +382,6 @@ L_SLEEP: s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE s_mov_b32 s_save_m0, m0 @@ -361,7 +411,9 @@ L_SAVE_4VGPR_WAVE32: // VGPR Allocated in 4-GPR granularity +#if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 +#endif buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 @@ -372,7 +424,9 @@ L_SAVE_4VGPR_WAVE64: // VGPR Allocated in 4-GPR granularity +#if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 +#endif buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 @@ -397,7 +451,8 @@ L_SAVE_HWREG: write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) - write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK + write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) @@ -418,9 +473,13 @@ L_SAVE_HWREG: write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) #if NO_SQC_STORE - // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case. + // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. + s_mov_b32 exec_lo, 0xFFFF s_mov_b32 exec_hi, 0x0 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode. + s_mov_b32 exec_lo, 0xFFFFFFFF #endif /* save SGPRs */ @@ -506,7 +565,7 @@ L_SAVE_LDS_NORMAL: s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE s_barrier //LDS is used? wait for other waves in the same TG - s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK s_cbranch_scc0 L_SAVE_LDS_DONE // first wave do LDS save; @@ -628,7 +687,7 @@ L_SAVE_VGPR_WAVE64: // VGPR store using dw burst s_mov_b32 m0, 0x4 //VGPR initial index value =4 s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc0 L_SAVE_VGPR_END + s_cbranch_scc0 L_SAVE_SHARED_VGPR L_SAVE_VGPR_W64_LOOP: v_movrels_b32 v0, v0 //v0 = v[0+m0] @@ -646,6 +705,7 @@ L_SAVE_VGPR_W64_LOOP: s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? +L_SAVE_SHARED_VGPR: //Below part will be the save shared vgpr part (new for gfx10) s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? @@ -674,12 +734,7 @@ L_RESTORE: s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + //determine it is wave32 or wave64 get_wave_size(s_restore_size) @@ -765,6 +820,8 @@ L_RESTORE_VGPR_NORMAL: s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SGPR L_RESTORE_VGPR_WAVE32_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 @@ -786,6 +843,7 @@ L_RESTORE_VGPR_WAVE32_LOOP: buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3 + s_waitcnt vmcnt(0) s_branch L_RESTORE_SGPR @@ -796,6 +854,8 @@ L_RESTORE_VGPR_WAVE64: s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 s_mov_b32 m0, 4 //VGPR initial index value = 4 + s_cmp_lt_u32 m0, s_restore_alloc_size + s_cbranch_scc0 L_RESTORE_SHARED_VGPR L_RESTORE_VGPR_WAVE64_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 @@ -812,6 +872,7 @@ L_RESTORE_VGPR_WAVE64_LOOP: s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? +L_RESTORE_SHARED_VGPR: //Below part will be the restore shared vgpr part (new for gfx10) s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? @@ -945,8 +1006,10 @@ L_RESTORE_HWREG: s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic - // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size) + get_svgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes() s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 @@ -957,23 +1020,7 @@ L_RESTORE_HWREG: s_waitcnt lgkmcnt(0) #if ASIC_TARGET_NAVI1X - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + restore_ib_sts(s_restore_tmp, s_restore_m0) #endif s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS @@ -1089,5 +1136,29 @@ end function get_wave_size(s_reg) s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE - s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25 +end + +function save_and_clear_ib_sts(tmp1, tmp2) + // Preserve and clear scalar XNACK state before issuing scalar loads. + // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into + // unused space ttmp11[31:24]. + s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) + s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS) + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_or_b32 ttmp11, ttmp11, tmp2 + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_or_b32 ttmp11, ttmp11, tmp2 + s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1 +end + +function restore_ib_sts(tmp1, tmp2) + s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_or_b32 tmp1, tmp1, tmp2 + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1 end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index eed78a04e7c7..6770cbe3250a 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN /**************************************************************************/ /* variables */ /**************************************************************************/ -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 @@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 +var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 #endif var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 +var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF @@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 +var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000 -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12 +var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19 + var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 +var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 /* Save */ var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 +var S_SAVE_PC_HI_HT_MASK = 0x01000000 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME - var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi @@ -140,18 +134,9 @@ var s_save_ttmps_hi = s_save_trapsts //no conflict var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - var s_restore_spi_init_lo = exec_lo var s_restore_spi_init_hi = exec_hi @@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE: L_SKIP_RESTORE: s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save -if SINGLE_STEP_MISSED_WORKAROUND - // No single step exceptions if MODE.DEBUG_EN=0. - s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK - s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND - - // Second-level trap already handled exception if STATUS.HALT=1. - s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK - - // Prioritize single step exception over context save. - // Second-level trap will halt wave and RFE, re-entering for SAVECTX. - s_cbranch_scc0 L_FETCH_2ND_TRAP - -L_NO_SINGLE_STEP_WORKAROUND: -end + // Clear SPI_PRIO: do not save with elevated priority. + // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_HALTED + +L_HALTED: + // Host trap may occur while wave is halted. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_SAVE: s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save s_cbranch_scc1 L_SAVE //this is the operation for save - // ********* Handle non-CWSR traps ******************* - - // Illegal instruction is a non-maskable exception which blocks context save. - // Halt the wavefront and return from the trap. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK - s_cbranch_scc1 L_HALT_WAVE - - // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. - // Instead, halt the wavefront and return from the trap. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_FETCH_2ND_TRAP - -L_HALT_WAVE: - // If STATUS.HALT is set then this fault must come from SQC instruction fetch. - // We cannot prevent further faults. Spin wait until context saved. - s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK - s_cbranch_scc0 L_NOT_ALREADY_HALTED - -L_WAIT_CTX_SAVE: + // Wave is halted but neither host trap nor SAVECTX is raised. + // Caused by instruction fetch memory violation. + // Spin wait until context saved to prevent interrupt storm. s_sleep 0x10 - s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK - s_cbranch_scc0 L_WAIT_CTX_SAVE + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_branch L_CHECK_SAVE -L_NOT_ALREADY_HALTED: - s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK +L_NOT_HALTED: + // Let second-level handle non-SAVECTX exception or trap. + // Any concurrent SAVECTX will be handled upon re-entry once halted. - // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. - // Rewind the PC to prevent this from occurring. The debugger compensates for this. - s_sub_u32 ttmp0, ttmp0, 0x8 - s_subb_u32 ttmp1, ttmp1, 0x0 + // Check non-maskable exceptions. memory_violation, illegal_instruction + // and xnack_error exceptions always cause the wave to enter the trap + // handler. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + + // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. + // Maskable exceptions only cause the wave to enter the trap handler if + // their respective bit in mode.excp_en is set. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_CHECK_TRAP_ID + + s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK + s_cbranch_scc0 L_NOT_ADDR_WATCH + s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch + +L_NOT_ADDR_WATCH: + s_getreg_b32 ttmp3, hwreg(HW_REG_MODE) + s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT + s_and_b32 ttmp2, ttmp2, ttmp3 + s_cbranch_scc1 L_FETCH_2ND_TRAP + +L_CHECK_TRAP_ID: + // Check trap_id != 0 + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP + +if SINGLE_STEP_MISSED_WORKAROUND + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK + s_cbranch_scc1 L_FETCH_2ND_TRAP +end + + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: // Preserve and clear scalar XNACK state before issuing scalar reads. - // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. - s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) - s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK - s_or_b32 ttmp11, ttmp11, ttmp3 - - s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + save_and_clear_ib_sts(ttmp14) // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) @@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP: s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + + s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag + s_waitcnt lgkmcnt(0) + s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK + s_or_b32 ttmp11, ttmp11, ttmp2 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler L_NO_NEXT_TRAP: - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception - s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. - s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 - s_addc_u32 ttmp1, ttmp1, 0 -L_EXCP_CASE: + // If not caused by trap then halt wave to prevent re-entry. + s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK) + s_cbranch_scc1 L_TRAP_CASE + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + + s_branch L_EXIT_TRAP + +L_TRAP_CASE: + // Host trap will not cause trap re-entry. + s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK + s_cbranch_scc1 L_EXIT_TRAP + + // Advance past trap instruction to prevent re-entry. + s_add_u32 ttmp0, ttmp0, 0x4 + s_addc_u32 ttmp1, ttmp1, 0x0 + +L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF - // Restore SQ_WAVE_IB_STS. - s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + restore_ib_sts(ttmp14) // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 @@ -312,16 +324,7 @@ L_SAVE: s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG - - s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + save_and_clear_ib_sts(s_save_tmp) /* inform SPI the readiness and wait for SPI's go signal */ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI @@ -360,12 +363,6 @@ L_SAVE: s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) s_mov_b32 s_save_m0, m0 //save M0 @@ -690,12 +687,6 @@ L_RESTORE: s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE /* global mem offset */ // s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 @@ -889,19 +880,7 @@ L_RESTORE: s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) - //reuse s_restore_m0 as a temp register - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + restore_ib_sts(s_restore_tmp) s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 @@ -910,8 +889,7 @@ L_RESTORE: s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time -// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution - s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution /**************************************************************************/ @@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp) s_nop 0x2 // avoid S_SETREG => S_SETREG hazard s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status end + +function save_and_clear_ib_sts(tmp) + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 tmp, hwreg(HW_REG_IB_STS) + s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, tmp + s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0 +end + +function restore_ib_sts(tmp) + s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), tmp +end From 6a8170383c7acdf6fb8da1f3774fa2bc9191d628 Mon Sep 17 00:00:00 2001 From: Jay Cornwall <jay.cornwall@amd.com> Date: Thu, 30 Dec 2021 21:32:06 +0800 Subject: [PATCH 33/46] drm/amdkfd: Add gfx11 trap handler Based on gfx10 with following changes: - GPR_ALLOC.VGPR_SIZE field moved (and size corrected in gfx10) - s_sendmsg_rtn_b64 replaces some s_sendmsg/s_getreg - Buffer instructions no longer have direct-to-LDS modifier Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Reviewed-by: Laurent Morichetti <laurent.morichetti@amd.com> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 463 +++++++++++++++++- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 69 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +- 3 files changed, 507 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 8cbdc7f519c6..60a81649cf12 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -776,7 +776,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, 0x705d0300, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -855,7 +855,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, 0xbef6037b, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -891,7 +891,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbef003ff, 0x00000200, 0xbeff0380, 0xbf820003, 0xbef003ff, 0x00000400, - 0xbeff03c1, 0xb97b2a05, + 0xbeff03c1, 0xb97b3a05, 0x807b817b, 0x8f7b827b, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850017, @@ -939,7 +939,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb96f4306, 0x876fc16f, 0xbf840029, 0x8f6f866f, 0x8f6f826f, 0xbef6036f, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -962,7 +962,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f2a05, + 0xbeff03c1, 0xb96f3a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850024, @@ -1010,7 +1010,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x6e5d0100, 0xe0304200, 0x6e5d0200, 0xe0304300, 0x6e5d0300, 0xbf8c3f70, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -1037,7 +1037,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbe8c310c, 0xbe8e310e, 0xbf06807c, 0xbf84fff0, 0xba80f801, 0x00000000, - 0xbf8a0000, 0xb9782a05, + 0xbf8a0000, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2261,7 +2261,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf8a0000, 0x877aff6d, 0x80000000, 0xbf840040, 0x8f7b867b, 0x8f7b827b, - 0xbef6037b, 0xb9702a05, + 0xbef6037b, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, @@ -2298,7 +2298,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x00000200, 0xbeff0380, 0xbf820003, 0xbef003ff, 0x00000400, 0xbeff03c1, - 0xb97b2a05, 0x807b817b, + 0xb97b3a05, 0x807b817b, 0x8f7b827b, 0x907c9973, 0x877c817c, 0xbf06817c, 0xbf850017, 0xbef603ff, @@ -2345,7 +2345,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbeff03c1, 0xb96f4306, 0x876fc16f, 0xbf840029, 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9782a05, + 0xbef6036f, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2369,7 +2369,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f2a05, 0x806f816f, + 0xb96f3a05, 0x806f816f, 0x8f6f826f, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850024, 0xbef603ff, @@ -2416,7 +2416,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xe0304100, 0x6e5d0100, 0xe0304200, 0x6e5d0200, 0xe0304300, 0x6e5d0300, - 0xbf8c3f70, 0xb9782a05, + 0xbf8c3f70, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, @@ -2444,7 +2444,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbe8e310e, 0xbf06807c, 0xbf84fff0, 0xba80f801, 0x00000000, 0xbf8a0000, - 0xb9782a05, 0x80788178, + 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, @@ -2494,3 +2494,440 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, }; + +static const uint32_t cwsr_trap_gfx11_hex[] = { + 0xbfa00001, 0xbfa0021b, + 0xb0804006, 0xb8f8f802, + 0x91788678, 0xb8fbf803, + 0x8b6eff78, 0x00002000, + 0xbfa10009, 0x8b6eff6d, + 0x00ff0000, 0xbfa2001e, + 0x8b6eff7b, 0x00000400, + 0xbfa20041, 0xbf830010, + 0xb8fbf803, 0xbfa0fffa, + 0x8b6eff7b, 0x00000900, + 0xbfa20015, 0x8b6eff7b, + 0x000071ff, 0xbfa10008, + 0x8b6fff7b, 0x00007080, + 0xbfa10001, 0xbeee1287, + 0xb8eff801, 0x846e8c6e, + 0x8b6e6f6e, 0xbfa2000a, + 0x8b6eff6d, 0x00ff0000, + 0xbfa20007, 0xb8eef801, + 0x8b6eff6e, 0x00000800, + 0xbfa20003, 0x8b6eff7b, + 0x00000400, 0xbfa20026, + 0xbefa4d82, 0xbf89fc07, + 0x84fa887a, 0xf4005bbd, + 0xf8000010, 0xbf89fc07, + 0x846e976e, 0x9177ff77, + 0x00800000, 0x8c776e77, + 0xf4045bbd, 0xf8000000, + 0xbf89fc07, 0xf4045ebd, + 0xf8000008, 0xbf89fc07, + 0x8bee6e6e, 0xbfa10001, + 0xbe80486e, 0x8b6eff6d, + 0x01ff0000, 0xbfa20005, + 0x8c78ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbfa00005, 0x8b6eff6d, + 0x01000000, 0xbfa20002, + 0x806c846c, 0x826d806d, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb978f802, 0xbe804a6c, + 0x8b6dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf89fc07, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xbefa007e, + 0x8b7bff7f, 0x0000ffff, + 0xbefe00c1, 0xbeff00c1, + 0xdca6c000, 0x007a0000, + 0x7e000280, 0xbefe007a, + 0xbeff007b, 0xb8fb02dc, + 0x847b997b, 0xb8fa3b05, + 0x807a817a, 0xbf0d997b, + 0xbfa20002, 0x847a897a, + 0xbfa00001, 0x847a8a7a, + 0xb8fb1e06, 0x847b8a7b, + 0x807a7b7a, 0x8b7bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xdca6c040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe007a, 0xbeff007b, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007d, 0xbef00080, + 0xb8f302dc, 0x84739973, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00002, 0xbeff00c1, + 0xbfa00009, 0xbef600ff, + 0x01000000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0xbfa00008, + 0xbef600ff, 0x01000000, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0xbef600ff, 0x01000000, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xd7610002, 0x0000fa6f, + 0x807d817d, 0xd7610002, + 0x0000fa78, 0x807d817d, + 0xb8faf803, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xd7610002, 0x0000fa7b, + 0x807d817d, 0xb8f1f801, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f814, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f815, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xbefe00ff, + 0x0000ffff, 0xbeff0080, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0xb8f03b05, + 0x80708170, 0xbf0d9973, + 0xbfa20002, 0x84708970, + 0xbfa00001, 0x84708a70, + 0xb8fa1e06, 0x847a8a7a, + 0x80707a70, 0xbef600ff, + 0x01000000, 0xbef90080, + 0xbefd0080, 0xbf800000, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xbe8c410c, 0xbe8e410e, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, + 0x80798179, 0xd7610002, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, + 0x80798179, 0xd7610002, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbfa10006, + 0xe0685000, 0x701d0200, + 0x8070ff70, 0x00000080, + 0xbef90080, 0x7e040280, + 0x807d907d, 0xbf0aff7d, + 0x00000060, 0xbfa2ffbc, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0xbfbd0000, + 0x8b7aff6d, 0x80000000, + 0xbfa10040, 0x847b867b, + 0x847b827b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa20012, + 0xbe8300ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff4, 0xbfa00011, + 0xbe8300ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff4, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20017, + 0xbef600ff, 0x01000000, + 0xbefd0084, 0xbf0a7b7d, + 0xbfa10037, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xe0685000, + 0x701d0000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0x807d847d, + 0x8070ff70, 0x00000200, + 0xbf0a7b7d, 0xbfa2ffef, + 0xbfa00025, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, + 0xe0685000, 0x701d0000, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffef, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000c, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xe0685000, + 0x701d0000, 0x807d817d, + 0x8070ff70, 0x00000080, + 0xbf0a7b7d, 0xbfa2fff8, + 0xbfa00141, 0xbef4007e, + 0x8b75ff7f, 0x0000ffff, + 0x8c75ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x10807fac, 0xb8f202dc, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003a, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa1002f, 0x846f866f, + 0x846f826f, 0xbef6006f, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, + 0x8078ff78, 0x00000200, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbefd0080, + 0xbfa2000c, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbfa0000b, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbef80080, 0xbefe00c1, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00001, + 0xbeff00c1, 0xb8ef3b05, + 0x806f816f, 0x846f826f, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20024, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000200, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10050, + 0xe0505000, 0x781d0000, + 0xe0505080, 0x781d0100, + 0xe0505100, 0x781d0200, + 0xe0505180, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000200, + 0xbf0a6f7d, 0xbfa2ffee, + 0xe0505000, 0x6e1d0000, + 0xe0505080, 0x6e1d0100, + 0xe0505100, 0x6e1d0200, + 0xe0505180, 0x6e1d0300, + 0xbf8903f7, 0xbfa00034, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10012, + 0xe0505000, 0x781d0000, + 0xe0505100, 0x781d0100, + 0xe0505200, 0x781d0200, + 0xe0505300, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000400, + 0xbf0a6f7d, 0xbfa2ffee, + 0xb8ef1e06, 0x8b6fc16f, + 0xbfa1000e, 0x846f836f, + 0x806f7d6f, 0xbefe00c1, + 0xbeff0080, 0xe0505000, + 0x781d0000, 0xbf8903f7, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff7, + 0xbeff00c1, 0xe0505000, + 0x6e1d0000, 0xe0505100, + 0x6e1d0100, 0xe0505200, + 0x6e1d0200, 0xe0505300, + 0x6e1d0300, 0xbf8903f7, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, + 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, + 0xbef600ff, 0x01000000, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf428403a, + 0xf0000000, 0xbf89fc07, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf42c403a, + 0xf0000000, 0xbf89fc07, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf430403a, + 0xf0000000, 0xbf89fc07, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xbfbd0000, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef600ff, + 0x01000000, 0xf4205bfa, + 0xf0000000, 0x80788478, + 0xf4205b3a, 0xf0000000, + 0x80788478, 0xf4205b7a, + 0xf0000000, 0x80788478, + 0xf4205c3a, 0xf0000000, + 0x80788478, 0xf4205c7a, + 0xf0000000, 0x80788478, + 0xf4205eba, 0xf0000000, + 0x80788478, 0xf4205efa, + 0xf0000000, 0x80788478, + 0xf4205e7a, 0xf0000000, + 0x80788478, 0xf4205cfa, + 0xf0000000, 0x80788478, + 0xf4205bba, 0xf0000000, + 0x80788478, 0xbf89fc07, + 0xb96ef814, 0xf4205bba, + 0xf0000000, 0x80788478, + 0xbf89fc07, 0xb96ef815, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0x8b6f7bff, + 0x000003ff, 0xb96f4803, + 0x8b6f7bff, 0xfffff800, + 0x856f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4085c37, + 0xf8000050, 0xf4085d37, + 0xf8000060, 0xf4005e77, + 0xf8000074, 0xbf89fc07, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb97af802, 0xbe804a6c, + 0xbfb00000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 0348191e8592..250ab007399b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -23,15 +23,26 @@ /* To compile this assembly code: * * Navi1x: - * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3 - * sp3-nv1x nv1x.sp3 -hex nv1x.hex + * cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3 + * sp3 nv1x.sp3 -hex nv1x.hex * - * Others: - * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3 - * sp3-gfx10 gfx10.sp3 -hex gfx10.hex + * gfx10: + * cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3 + * sp3 gfx10.sp3 -hex gfx10.hex + * + * gfx11: + * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3 + * sp3 gfx11.sp3 -hex gfx11.hex */ -#define NO_SQC_STORE !ASIC_TARGET_NAVI1X +#define CHIP_NAVI10 26 +#define CHIP_SIENNA_CICHLID 30 +#define CHIP_PLUM_BONITO 36 + +#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID) +#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID) +#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) +#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised @@ -41,15 +52,18 @@ var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 +#if ASIC_FAMILY < CHIP_PLUM_BONITO +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +#else +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12 +#endif + var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 @@ -231,15 +245,20 @@ end s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK save_and_clear_ib_sts(ttmp14, ttmp15) #endif // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS +#if HAVE_SENDMSG_RTN + s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA) + s_waitcnt lgkmcnt(0) +#else s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) +#endif s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag @@ -282,7 +301,7 @@ L_TRAP_CASE: L_EXIT_TRAP: s_and_b32 ttmp1, ttmp1, 0xFFFF -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK restore_ib_sts(ttmp14, ttmp15) #endif @@ -298,7 +317,7 @@ L_SAVE: s_mov_b32 s_save_tmp, 0 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK save_and_clear_ib_sts(s_save_tmp, s_save_trapsts) #endif @@ -307,9 +326,13 @@ L_SAVE: s_mov_b32 s_save_exec_hi, exec_hi s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive +#if HAVE_SENDMSG_RTN + s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE) +#else s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC +#endif -#if ASIC_TARGET_NAVI1X +#if ASIC_FAMILY < CHIP_SIENNA_CICHLID L_SLEEP: // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause // SQ hang, since the 7,8th wave could not get arbit to exec inst, while @@ -389,7 +412,7 @@ L_SLEEP: s_mov_b32 s_save_mem_offset, 0x0 get_wave_size(s_wave_size) -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK // Save and clear vector XNACK state late to free up SGPRs. s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0 @@ -777,7 +800,13 @@ L_RESTORE_LDS_NORMAL: s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 L_RESTORE_LDS_LOOP_W32: +#if HAVE_BUFFER_LDS_LOAD buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW +#else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_waitcnt vmcnt(0) + ds_store_addtid_b32 v0 +#endif s_add_u32 m0, m0, 128 // 128 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -785,7 +814,13 @@ L_RESTORE_LDS_LOOP_W32: s_branch L_RESTORE_VGPR L_RESTORE_LDS_LOOP_W64: +#if HAVE_BUFFER_LDS_LOAD buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW +#else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset + s_waitcnt vmcnt(0) + ds_store_addtid_b32 v0 +#endif s_add_u32 m0, m0, 256 // 256 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -996,7 +1031,7 @@ L_RESTORE_HWREG: s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask #endif @@ -1019,7 +1054,7 @@ L_RESTORE_HWREG: s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) -#if ASIC_TARGET_NAVI1X +#if HAVE_XNACK restore_ib_sts(s_restore_tmp, s_restore_m0) #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f1a225a20719..8667e3df2d0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -441,10 +441,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); - } else { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx10_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx11_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); } kfd->cwsr_enabled = true; From 396beb91a9eb86cbfa404e4220cca8f3ada70777 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Wed, 6 Apr 2022 14:14:50 +0800 Subject: [PATCH 34/46] drm/amd/pm: correct the metrics version for SMU 11.0.11/12/13 Correct the metrics version used for SMU 11.0.11/12/13. Fixes misreported GPU metrics (e.g., fan speed, etc.) depending on which version of SMU firmware is loaded. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1925 Signed-off-by: Evan Quan <evan.quan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 57 ++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d68be8f8850e..78f3d9e722bb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -697,12 +697,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, uint32_t apu_percent = 0; uint32_t dgpu_percent = 0; - if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, NULL, @@ -3833,13 +3849,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, uint16_t average_gfx_activity; int ret = 0; - if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; - + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, &metrics_external, From caa5eadc140ca3748b2ae187da36383edc779300 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Thu, 19 May 2022 17:28:12 +0800 Subject: [PATCH 35/46] drm/amdgpu: suppress some compile warnings Suppress two compile warnings about "no previous prototype". Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 881570dced41..16cdfb30b013 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1361,7 +1361,7 @@ union mall_info { struct mall_info_v1_0 v1; }; -int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) +static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) { struct binary_header *bhdr; union mall_info *mall_info; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index c6a8520053bb..9e18a2b22607 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -42,6 +42,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc21.h" static const struct amd_ip_funcs soc21_common_ip_funcs; From a35faec3db0e13aac8ea720bc1a3503081dd5a3d Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Mon, 16 May 2022 10:05:48 +0300 Subject: [PATCH 36/46] drm/amdgpu: Off by one in dm_dmub_outbox1_low_irq() The > ARRAY_SIZE() should be >= ARRAY_SIZE() to prevent an out of bounds access. Fixes: e27c41d5b068 ("drm/amd/display: Support for DMUB HPD interrupt handling") Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a92cfb055c15..5ea5e14b83c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -769,7 +769,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); - if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) { + if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { DRM_ERROR("DM: notify type %d invalid!", notify.type); continue; } From 76818cdd11a25ac6cb1d98875719935d8d0e2e51 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim <Sungjoon.Kim@amd.com> Date: Thu, 19 May 2022 17:46:36 -0400 Subject: [PATCH 37/46] drm/amd/display: add Coverage blend mode for overlay plane According to the KMS man page, there is a "Coverage" alpha blend mode that assumes the pixel color values have NOT been pre-multiplied and will be done when the actual blending to the background color values happens. Previously, this mode hasn't been enabled in our driver and it was assumed that all normal overlay planes are pre-multiplied by default. When a 3rd party app is used to input a image in a specific format, e.g. PNG, as a source of a overlay plane to blend with the background primary plane, the pixel color values are not pre-multiplied. So by adding "Coverage" blend mode, our driver will support those cases. Issue fixed: Overlay plane alpha channel blending is incorrect Issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/1769 Reference: https://dri.freedesktop.org/docs/drm/gpu/drm-kms.html#plane-composition-properties Adding Coverage support also enables IGT kms_plane_alpha_blend Coverage subtests: 1. coverage-7efc 2. coverage-vs-premult-vs-constant Changes 1. Add DRM_MODE_BLEND_COVERAGE blend mode capability 2. Add "pre_multiplied_alpha" flag for Coverage case 3. Read the correct flag and set the DCN MPCC pre_multiplied register bit (only on overlay plane) Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1769 Signed-off-by: Sung Joon Kim <Sungjoon.Kim@amd.com> Reviewed-by: Melissa Wen <mwen@igalia.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 ++++++++---- .../gpu/drm/amd/display/dc/core/dc_surface.c | 2 ++ drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 27 ++++++++++--------- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 16 ++++++----- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5ea5e14b83c8..70be67a56673 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5381,17 +5381,19 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, static void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, - bool *per_pixel_alpha, bool *global_alpha, - int *global_alpha_value) + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value) { *per_pixel_alpha = false; + *pre_multiplied_alpha = true; *global_alpha = false; *global_alpha_value = 0xff; if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) return; - if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) { + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || + plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { static const uint32_t alpha_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_RGBA8888, @@ -5406,6 +5408,9 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state, break; } } + + if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + *pre_multiplied_alpha = false; } if (plane_state->alpha < 0xffff) { @@ -5568,7 +5573,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, return ret; fill_blending_from_plane_state( - plane_state, &plane_info->per_pixel_alpha, + plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, &plane_info->global_alpha, &plane_info->global_alpha_value); return 0; @@ -5615,6 +5620,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->tiling_info = plane_info.tiling_info; dc_plane_state->visible = plane_info.visible; dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; + dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; dc_plane_state->global_alpha = plane_info.global_alpha; dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; @@ -7911,7 +7917,8 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, if (plane->type == DRM_PLANE_TYPE_OVERLAY && plane_cap && plane_cap->per_pixel_alpha) { unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | - BIT(DRM_MODE_BLEND_PREMULTI); + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); drm_plane_create_alpha_property(plane); drm_plane_create_blend_mode_property(plane, blend_caps); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index e6b9c6a71841..5bc6ff2fa73e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -61,6 +61,8 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl plane_state->blend_tf->type = TF_TYPE_BYPASS; } + plane_state->pre_multiplied_alpha = true; + } static void dc_plane_destruct(struct dc_plane_state *plane_state) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a31ea3644ec2..3960c74482be 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1011,6 +1011,7 @@ struct dc_plane_state { bool is_tiling_rotated; bool per_pixel_alpha; + bool pre_multiplied_alpha; bool global_alpha; int global_alpha_value; bool visible; @@ -1045,6 +1046,7 @@ struct dc_plane_info { bool horizontal_mirror; bool visible; bool per_pixel_alpha; + bool pre_multiplied_alpha; bool global_alpha; int global_alpha_value; bool input_csc_enabled; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index e02ac75afbf7..e3a62873c0e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2550,12 +2550,21 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; - if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; - blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; - } else if (per_pixel_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + if (per_pixel_alpha) { + /* DCN1.0 has output CM before MPC which seems to screw with + * pre-multiplied alpha. + */ + blnd_cfg.pre_multiplied_alpha = (is_rgb_cspace( + pipe_ctx->stream->output_color_space) + && pipe_ctx->plane_state->pre_multiplied_alpha); + if (pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } } else { + blnd_cfg.pre_multiplied_alpha = false; blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; } @@ -2564,14 +2573,6 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) else blnd_cfg.global_alpha = 0xff; - /* DCN1.0 has output CM before MPC which seems to screw with - * pre-multiplied alpha. - */ - blnd_cfg.pre_multiplied_alpha = is_rgb_cspace( - pipe_ctx->stream->output_color_space) - && per_pixel_alpha; - - /* * TODO: remove hack * Note: currently there is a bug in init_hw such that diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 0da024912dbe..ec6aa8d8b251 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2345,12 +2345,16 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; - if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; - blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; - } else if (per_pixel_alpha) { - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + if (per_pixel_alpha) { + blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha; + if (pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } } else { + blnd_cfg.pre_multiplied_alpha = false; blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; } @@ -2364,7 +2368,7 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) blnd_cfg.top_gain = 0x1f000; blnd_cfg.bottom_inside_gain = 0x1f000; blnd_cfg.bottom_outside_gain = 0x1f000; - blnd_cfg.pre_multiplied_alpha = per_pixel_alpha; + if (pipe_ctx->plane_state->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA) blnd_cfg.pre_multiplied_alpha = false; From a0ccc717c4ab3ef572f023fdceffb4b6df496a0d Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Mon, 16 May 2022 14:12:33 -0400 Subject: [PATCH 38/46] drm/amdgpu/discovery: validate VCN and SDMA instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validate the VCN and SDMA instances against the driver structure sizes to make sure we don't get into a situation where the firmware reports more instances than the driver supports. Reviewed-by: Guchun Chen <guchun.chen@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 16cdfb30b013..47f0344205ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = ip->revision & 0xc0; ip->revision &= ~0xc0; - adev->vcn.num_vcn_inst++; + if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) + adev->vcn.num_vcn_inst++; + else + dev_err(adev->dev, "Too many VCN instances: %d vs %d\n", + adev->vcn.num_vcn_inst + 1, + AMDGPU_MAX_VCN_INSTANCES); } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || - le16_to_cpu(ip->hw_id) == SDMA3_HWID) - adev->sdma.num_instances++; + le16_to_cpu(ip->hw_id) == SDMA3_HWID) { + if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES) + adev->sdma.num_instances++; + else + dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n", + adev->sdma.num_instances + 1, + AMDGPU_MAX_SDMA_INSTANCES); + } if (le16_to_cpu(ip->hw_id) == UMC_HWID) adev->gmc.num_umc++; From 1c755241463bab5d90404a782abf3baf7b7a3217 Mon Sep 17 00:00:00 2001 From: Prike Liang <Prike.Liang@amd.com> Date: Fri, 20 May 2022 11:04:35 +0800 Subject: [PATCH 39/46] drm/amdgpu: clean up asd on the ta_firmware_header_v2_0 On the psp13 series use ta_firmware_header_v2_0 and the asd firmware was buildin ta, so needn't request asd firmware separately. Signed-off-by: Prike Liang <Prike.Liang@amd.com> Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com> Acked-by: Huang Rui <ray.huang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 18014ed0e853..9e1ef81933ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -32,13 +32,10 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); -MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); @@ -96,9 +93,6 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): - err = psp_init_asd_microcode(psp, chip_name); - if (err) - return err; err = psp_init_toc_microcode(psp, chip_name); if (err) return err; From a5457087eb10322864dedb7768b7a95332393efe Mon Sep 17 00:00:00 2001 From: Candice Li <candice.li@amd.com> Date: Fri, 20 May 2022 20:51:53 +0800 Subject: [PATCH 40/46] drm/amdgpu: Resolve pcie_bif RAS recovery bug Check shared buf instead of init flag for xgmi ta shared buf init during xgmi ta initialization. Signed-off-by: Candice Li <candice.li@amd.com> Reviewed-by: John Clements <john.clements@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 214e4e89a028..e9411c28d88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE; psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; - if (!psp->xgmi_context.context.initialized) { + if (!psp->xgmi_context.context.mem_context.shared_buf) { ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); if (ret) return ret; From d534ca7128d7bf681ed6d462c09b9d6ffb3bed91 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Mon, 23 May 2022 11:24:31 -0400 Subject: [PATCH 41/46] drm/amdgpu: differentiate between LP and non-LP DDR memory Some applications want to know whether the memory is LP or not. Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +++- include/uapi/drm/amdgpu_drm.h | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 63e0293edc5f..fd8f3731758e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, vram_type = AMDGPU_VRAM_TYPE_DDR3; break; case Ddr4MemType: - case LpDdr4MemType: vram_type = AMDGPU_VRAM_TYPE_DDR4; break; + case LpDdr4MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR4; + break; case Ddr5MemType: - case LpDdr5MemType: vram_type = AMDGPU_VRAM_TYPE_DDR5; break; + case LpDdr5MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR5; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5444515c1476..e1e6441c475f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1018,7 +1018,9 @@ static const char *amdgpu_vram_names[] = { "DDR3", "DDR4", "GDDR6", - "DDR5" + "DDR5", + "LPDDR4", + "LPDDR5" }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 9a1d210d135d..d9d475d65c76 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -988,6 +988,8 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_DDR4 8 #define AMDGPU_VRAM_TYPE_GDDR6 9 #define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 struct drm_amdgpu_info_device { /** PCI Device ID */ From 31ab27b14daaa75541a415c6794d6f3567fea44a Mon Sep 17 00:00:00 2001 From: Dave Airlie <airlied@redhat.com> Date: Mon, 23 May 2022 10:24:18 +1000 Subject: [PATCH 42/46] drm/amdgpu/cs: make commands with 0 chunks illegal behaviour. Submitting a cs with 0 chunks, causes an oops later, found trying to execute the wrong userspace driver. MESA_LOADER_DRIVER_OVERRIDE=v3d glxinfo [172536.665184] BUG: kernel NULL pointer dereference, address: 00000000000001d8 [172536.665188] #PF: supervisor read access in kernel mode [172536.665189] #PF: error_code(0x0000) - not-present page [172536.665191] PGD 6712a0067 P4D 6712a0067 PUD 5af9ff067 PMD 0 [172536.665195] Oops: 0000 [#1] SMP NOPTI [172536.665197] CPU: 7 PID: 2769838 Comm: glxinfo Tainted: P O 5.10.81 #1-NixOS [172536.665199] Hardware name: To be filled by O.E.M. To be filled by O.E.M./CROSSHAIR V FORMULA-Z, BIOS 2201 03/23/2015 [172536.665272] RIP: 0010:amdgpu_cs_ioctl+0x96/0x1ce0 [amdgpu] [172536.665274] Code: 75 18 00 00 4c 8b b2 88 00 00 00 8b 46 08 48 89 54 24 68 49 89 f7 4c 89 5c 24 60 31 d2 4c 89 74 24 30 85 c0 0f 85 c0 01 00 00 <48> 83 ba d8 01 00 00 00 48 8b b4 24 90 00 00 00 74 16 48 8b 46 10 [172536.665276] RSP: 0018:ffffb47c0e81bbe0 EFLAGS: 00010246 [172536.665277] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [172536.665278] RDX: 0000000000000000 RSI: ffffb47c0e81be28 RDI: ffffb47c0e81bd68 [172536.665279] RBP: ffff936524080010 R08: 0000000000000000 R09: ffffb47c0e81be38 [172536.665281] R10: ffff936524080010 R11: ffff936524080000 R12: ffffb47c0e81bc40 [172536.665282] R13: ffffb47c0e81be28 R14: ffff9367bc410000 R15: ffffb47c0e81be28 [172536.665283] FS: 00007fe35e05d740(0000) GS:ffff936c1edc0000(0000) knlGS:0000000000000000 [172536.665284] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [172536.665286] CR2: 00000000000001d8 CR3: 0000000532e46000 CR4: 00000000000406e0 [172536.665287] Call Trace: [172536.665322] ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu] [172536.665332] drm_ioctl_kernel+0xaa/0xf0 [drm] [172536.665338] drm_ioctl+0x201/0x3b0 [drm] [172536.665369] ? amdgpu_cs_find_mapping+0x110/0x110 [amdgpu] [172536.665372] ? selinux_file_ioctl+0x135/0x230 [172536.665399] amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [172536.665403] __x64_sys_ioctl+0x83/0xb0 [172536.665406] do_syscall_64+0x33/0x40 [172536.665409] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2018 Signed-off-by: Dave Airlie <airlied@redhat.com> Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 84caab5e4d22..b28af04b0c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) From ab5a7fb6d2296b9486d17d1e24f4bde90822e644 Mon Sep 17 00:00:00 2001 From: Julia Lawall <Julia.Lawall@inria.fr> Date: Sat, 21 May 2022 13:11:14 +0200 Subject: [PATCH 43/46] drm/amdgpu/gfx: fix typos in comments Spelling mistakes (triple letters) in comments. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 02754ee86c81..c5f46d264b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index fb9302910742..7f0b18b0d4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); @@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* write cmd to Set CGCG Overrride */ + /* write cmd to Set CGCG Override */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f12ae6e2359a..5349ca4d19e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); From 6bd8d4b7d511f00a9e02f89b250fba3013200843 Mon Sep 17 00:00:00 2001 From: Julia Lawall <Julia.Lawall@inria.fr> Date: Sat, 21 May 2022 13:11:24 +0200 Subject: [PATCH 44/46] drm/amdkfd: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8b5452a8d330..67abf8dcd30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(&mem->lock); - /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { From 39dbde650f9377f97ad985bfa16af93381766232 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Tue, 24 May 2022 17:35:55 +0530 Subject: [PATCH 45/46] drm/amd/pm: Return auto perf level, if unsupported When powerplay is not enabled, return AUTO as default level. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5472f9936feb..d1bf073adf54 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -770,6 +770,9 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum amd_dpm_forced_level level; + if (!pp_funcs) + return AMD_DPM_FORCED_LEVEL_AUTO; + mutex_lock(&adev->pm.mutex); if (pp_funcs->get_performance_level) level = pp_funcs->get_performance_level(adev->powerplay.pp_handle); From 62e9bd20035b53ff6c679499c08546d96c6c60a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 24 May 2022 23:23:59 -0400 Subject: [PATCH 46/46] drm/amdgpu: add beige goby PCI ID Add a beige goby PCI ID. Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8592d43a79b0..afabdbbb22c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1940,6 +1940,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, { PCI_DEVICE(0x1002, PCI_ANY_ID),