RDMA/bnxt_re: Implement doorbell pacing algorithm

User applications alert the driver when the Doorbell FIFO
reaches the alarm threshold. The driver updates the pacing
parameters in the shared page to do the maximum pacing
by the application till the DB FIFO congestion reduces to
pacing threshold. Driver keeps checking the DB FIFO depth
at the pacing interval and gradually adjusts the pacing level.
Once the pacing level reaches default values (no congestion in
the FIFO) pacing gets completed.

Link: https://lore.kernel.org/r/1689742977-9128-7-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Chandramohan Akula <chandramohan.akula@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Chandramohan Akula 2023-07-18 22:02:56 -07:00 committed by Jason Gunthorpe
parent ea22248578
commit 2ad4e6303a
2 changed files with 129 additions and 0 deletions

View File

@ -121,8 +121,10 @@ struct bnxt_re_pacing {
u32 dbq_pacing_time; /* ms */
u32 dbr_def_do_pacing;
bool dbr_pacing;
struct mutex dbq_lock; /* synchronize db pacing algo */
};
#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF
#define BNXT_RE_DBR_PACING_TIME 5 /* ms */
#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */
#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */
@ -193,6 +195,8 @@ struct bnxt_re_dev {
u32 is_virtfn;
u32 num_vfs;
struct bnxt_re_pacing pacing;
struct work_struct dbq_fifo_check_work;
struct delayed_work dbq_pacing_work;
};
#define to_bnxt_re_dev(ptr, member) \
@ -203,6 +207,7 @@ struct bnxt_re_dev {
#define BNXT_RE_ROCEV2_IPV6_PACKET 3
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{

View File

@ -475,6 +475,125 @@ static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
}
static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
{
u32 read_val, fifo_occup;
/* loop shouldn't run infintely as the occupancy usually goes
* below pacing algo threshold as soon as pacing kicks in.
*/
while (1) {
read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
fifo_occup = BNXT_RE_MAX_FIFO_DEPTH -
((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >>
BNXT_RE_DB_FIFO_ROOM_SHIFT);
/* Fifo occupancy cannot be greater the MAX FIFO depth */
if (fifo_occup > BNXT_RE_MAX_FIFO_DEPTH)
break;
if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th)
break;
}
}
static void bnxt_re_db_fifo_check(struct work_struct *work)
{
struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
dbq_fifo_check_work);
struct bnxt_qplib_db_pacing_data *pacing_data;
u32 pacing_save;
if (!mutex_trylock(&rdev->pacing.dbq_lock))
return;
pacing_data = rdev->qplib_res.pacing_data;
pacing_save = rdev->pacing.do_pacing_save;
__wait_for_fifo_occupancy_below_th(rdev);
cancel_delayed_work_sync(&rdev->dbq_pacing_work);
if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
/* Double the do_pacing value during the congestion */
pacing_save = pacing_save << 1;
} else {
/*
* when a new congestion is detected increase the do_pacing
* by 8 times. And also increase the pacing_th by 4 times. The
* reason to increase pacing_th is to give more space for the
* queue to oscillate down without getting empty, but also more
* room for the queue to increase without causing another alarm.
*/
pacing_save = pacing_save << 3;
pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
}
if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
pacing_data->do_pacing = pacing_save;
rdev->pacing.do_pacing_save = pacing_data->do_pacing;
pacing_data->alarm_th =
pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
schedule_delayed_work(&rdev->dbq_pacing_work,
msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
mutex_unlock(&rdev->pacing.dbq_lock);
}
static void bnxt_re_pacing_timer_exp(struct work_struct *work)
{
struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
dbq_pacing_work.work);
struct bnxt_qplib_db_pacing_data *pacing_data;
u32 read_val, fifo_occup;
if (!mutex_trylock(&rdev->pacing.dbq_lock))
return;
pacing_data = rdev->qplib_res.pacing_data;
read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
fifo_occup = BNXT_RE_MAX_FIFO_DEPTH -
((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >>
BNXT_RE_DB_FIFO_ROOM_SHIFT);
if (fifo_occup > pacing_data->pacing_th)
goto restart_timer;
/*
* Instead of immediately going back to the default do_pacing
* reduce it by 1/8 times and restart the timer.
*/
pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
bnxt_re_set_default_pacing_data(rdev);
goto dbq_unlock;
}
restart_timer:
schedule_delayed_work(&rdev->dbq_pacing_work,
msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
dbq_unlock:
rdev->pacing.do_pacing_save = pacing_data->do_pacing;
mutex_unlock(&rdev->pacing.dbq_lock);
}
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_db_pacing_data *pacing_data;
if (!rdev->pacing.dbr_pacing)
return;
mutex_lock(&rdev->pacing.dbq_lock);
pacing_data = rdev->qplib_res.pacing_data;
/*
* Increase the alarm_th to max so that other user lib instances do not
* keep alerting the driver.
*/
pacing_data->alarm_th = BNXT_RE_MAX_FIFO_DEPTH;
pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
cancel_work_sync(&rdev->dbq_fifo_check_work);
schedule_work(&rdev->dbq_fifo_check_work);
mutex_unlock(&rdev->pacing.dbq_lock);
}
static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
{
if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev))
@ -506,11 +625,16 @@ static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT;
rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
bnxt_re_set_default_pacing_data(rdev);
/* Initialize worker for DBR Pacing */
INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
return 0;
}
static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
{
cancel_work_sync(&rdev->dbq_fifo_check_work);
cancel_delayed_work_sync(&rdev->dbq_pacing_work);
if (rdev->pacing.dbr_page)
free_page((u64)rdev->pacing.dbr_page);