diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index f66788a2ed77ecb7773a4a3a222ab2530b003021..8489b5087d9c607528109a1e6d5e92044332c110 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -107,7 +107,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, true); + mlx5_sync_reset_unload_flow(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 6b17346aa4cef21d6874d69d67d17bc62c0dabf3..b9986a2656083d9d3718d6967c1e21c0ab571ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -12,7 +12,8 @@ enum { MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, - MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, + MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, }; struct mlx5_fw_reset { @@ -26,6 +27,7 @@ struct mlx5_fw_reset { struct work_struct reset_now_work; struct work_struct reset_abort_work; unsigned long reset_flags; + u8 reset_method; struct timer_list timer; struct completion done; int ret; @@ -94,7 +96,7 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, } static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, - u8 *reset_type, u8 *reset_state) + u8 *reset_type, u8 *reset_state, u8 *reset_method) { u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; @@ -110,13 +112,26 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, *reset_type = MLX5_GET(mfrl_reg, out, reset_type); if (reset_state) *reset_state = MLX5_GET(mfrl_reg, out, reset_state); + if (reset_method) + *reset_method = MLX5_GET(mfrl_reg, out, pci_reset_req_method); return 0; } int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) { - return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL); +} + +static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev, + u8 *reset_method) +{ + if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) { + *reset_method = MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE; + return 0; + } + + return mlx5_reg_mfrl_query(dev, NULL, NULL, NULL, reset_method); } static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, @@ -124,7 +139,7 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, { u8 reset_state; - if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state, NULL)) goto out; if (!reset_state) @@ -203,7 +218,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; struct devlink *devlink = priv_to_devlink(dev); @@ -212,8 +227,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - if (!unloaded) - mlx5_unload_one(dev, false); + mlx5_sync_reset_unload_flow(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -256,7 +270,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev, false); + mlx5_fw_reset_complete_reload(dev); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -402,7 +416,11 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || + err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method); + if (err) + mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err); + + if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || !mlx5_is_reset_now_capable(dev)) { err = mlx5_fw_reset_set_reset_sync_nack(dev); mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", @@ -419,21 +437,15 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); } -static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) +static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id) { struct pci_bus *bridge_bus = dev->pdev->bus; struct pci_dev *bridge = bridge_bus->self; unsigned long timeout; struct pci_dev *sdev; - u16 reg16, dev_id; int cap, err; + u16 reg16; - err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); - if (err) - return pcibios_err_to_errno(err); - err = mlx5_check_dev_ids(dev, dev_id); - if (err) - return err; cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); if (!cap) return -EOPNOTSUPP; @@ -503,64 +515,60 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) return err; } -static void mlx5_sync_reset_now_event(struct work_struct *work) +static int mlx5_pci_reset_bus(struct mlx5_core_dev *dev) { - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_now_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; + if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) + return -EOPNOTSUPP; - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; + return pci_reset_bus(dev->pdev); +} - mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); +static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method) +{ + u16 dev_id; + int err; - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) { - mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); - goto done; - } + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return pcibios_err_to_errno(err); + err = mlx5_check_dev_ids(dev, dev_id); + if (err) + return err; - err = mlx5_pci_link_toggle(dev); - if (err) { - mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); - set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + switch (reset_method) { + case MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE: + err = mlx5_pci_link_toggle(dev, dev_id); + if (err) + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed\n"); + break; + case MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET: + err = mlx5_pci_reset_bus(dev); + if (err) + mlx5_core_warn(dev, "mlx5_pci_reset_bus failed\n"); + break; + default: + return -EOPNOTSUPP; } - mlx5_enter_error_state(dev, true); -done: - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev, false); + return err; } -static void mlx5_sync_reset_unload_event(struct work_struct *work) +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked) { - struct mlx5_fw_reset *fw_reset; - struct mlx5_core_dev *dev; + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; unsigned long timeout; bool reset_action; u8 rst_state; int err; - fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); - dev = fw_reset->dev; - - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; - - mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); - - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) - mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); - else - mlx5_enter_error_state(dev, true); - - if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + if (locked) mlx5_unload_one_devl_locked(dev, false); else mlx5_unload_one(dev, false); + if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags)) + return; + mlx5_set_fw_rst_ack(dev); mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); @@ -583,17 +591,73 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) goto done; } - mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", + rst_state); if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { - err = mlx5_pci_link_toggle(dev); + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); if (err) { - mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err); + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", + err); fw_reset->ret = err; } } done: - mlx5_fw_reset_complete_reload(dev, true); + clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); + if (err) { + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + } + + mlx5_enter_error_state(dev, true); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + mlx5_fw_reset_complete_reload(dev); } static void mlx5_sync_reset_abort_event(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index ea527d06a85f07c2406d69a90a9cc023363799df..d5b28525c960dcbd8c8ea24c9f6f80d7933d5650 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -12,6 +12,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, struct netlink_ext_ack *extack); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e6280ceb0f3b7fa420a0f2f33434ad7cbc297453..3cb961732c781ebff816f696024549cf9de5d72a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -627,6 +627,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_with_driver_unload, 1); + if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method)) + MLX5_SET(cmd_hca_cap, set_hca_cap, + pcie_reset_using_hotreset_method, 1); if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) MLX5_SET(cmd_hca_cap, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9106771bb92f01c67ae828c6b71e719a7ed95276..4913d364e977477d9e714f1410d14a36c8e29a86 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1731,7 +1731,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x6]; + u8 reserved_at_330[0x5]; + u8 pcie_reset_using_hotreset_method[0x1]; u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; u8 vport_counter_local_loopback[0x1]; @@ -10824,6 +10825,11 @@ struct mlx5_ifc_mcda_reg_bits { u8 data[][0x20]; }; +enum { + MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE = 0, + MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET = 1, +}; + enum { MLX5_MFRL_REG_RESET_STATE_IDLE = 0, MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, @@ -10851,7 +10857,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x4]; + u8 pci_reset_req_method[0x3]; + u8 reserved_at_2b[0x1]; u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8];