All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: add socket id parameter for psp query address cmd
@ 2024-03-21  3:30 Tao Zhou
  2024-03-21  3:30 ` [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface for UMC v12 Tao Zhou
  0 siblings, 1 reply; 3+ messages in thread
From: Tao Zhou @ 2024-03-21  3:30 UTC (permalink / raw
  To: amd-gfx; +Cc: Tao Zhou

And set the socket id.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/ta_ras_if.h |  1 +
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 056d4df8fa1f..3ac56a9645eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -146,6 +146,7 @@ struct ta_ras_mca_addr {
 	uint32_t ch_inst;
 	uint32_t umc_inst;
 	uint32_t node_inst;
+	uint32_t socket_id;
 };
 
 struct ta_ras_phy_addr {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 77af4e25ff46..0a9cc87e98d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -268,7 +268,7 @@ static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev,
 static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 					    struct ras_err_data *err_data, uint64_t err_addr,
 					    uint32_t ch_inst, uint32_t umc_inst,
-					    uint32_t node_inst)
+					    uint32_t node_inst, uint32_t socket_id)
 {
 	uint32_t col, row, row_xor, bank, channel_index;
 	uint64_t soc_pa, retired_page, column;
@@ -280,6 +280,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 	addr_in.ma.ch_inst = ch_inst;
 	addr_in.ma.umc_inst = umc_inst;
 	addr_in.ma.node_inst = node_inst;
+	addr_in.ma.socket_id = socket_id;
 
 	if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out))
 		/* fallback to old path if fail to get pa from psp */
@@ -331,6 +332,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
 	struct ras_err_data *err_data = (struct ras_err_data *)data;
 	uint64_t umc_reg_offset =
 		get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+	uint32_t socket_id = 0;
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -357,8 +359,13 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
 
 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 
+		if (!adev->aid_mask &&
+		    adev->smuio.funcs &&
+		    adev->smuio.funcs->get_socket_id)
+			socket_id = adev->smuio.funcs->get_socket_id(adev);
+
 		umc_v12_0_convert_error_address(adev, err_data, err_addr,
-					ch_inst, umc_inst, node_inst);
+					ch_inst, umc_inst, node_inst, socket_id);
 	}
 
 	/* clear umc status */
@@ -450,7 +457,8 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
 					err_data, err_addr,
 					MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
 					MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
-					err_info->mcm_info.die_id);
+					err_info->mcm_info.die_id,
+					err_info->mcm_info.socket_id);
 			}
 
 			/* Delete error address node from list and free memory */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface for UMC v12
  2024-03-21  3:30 [PATCH 1/2] drm/amdgpu: add socket id parameter for psp query address cmd Tao Zhou
@ 2024-03-21  3:30 ` Tao Zhou
  2024-03-21  9:33   ` Yang, Stanley
  0 siblings, 1 reply; 3+ messages in thread
From: Tao Zhou @ 2024-03-21  3:30 UTC (permalink / raw
  To: amd-gfx; +Cc: Tao Zhou

Replace separate parameters with struct ta_ras_query_address_input.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 57 ++++++++++++++------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 0a9cc87e98d0..d0fcfcb3404f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -266,26 +266,19 @@ static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev,
 }
 
 static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
-					    struct ras_err_data *err_data, uint64_t err_addr,
-					    uint32_t ch_inst, uint32_t umc_inst,
-					    uint32_t node_inst, uint32_t socket_id)
+					struct ras_err_data *err_data,
+					struct ta_ras_query_address_input *addr_in)
 {
 	uint32_t col, row, row_xor, bank, channel_index;
-	uint64_t soc_pa, retired_page, column;
-	struct ta_ras_query_address_input addr_in;
+	uint64_t soc_pa, retired_page, column, err_addr;
 	struct ta_ras_query_address_output addr_out;
 
-	addr_in.addr_type = TA_RAS_MCA_TO_PA;
-	addr_in.ma.err_addr = err_addr;
-	addr_in.ma.ch_inst = ch_inst;
-	addr_in.ma.umc_inst = umc_inst;
-	addr_in.ma.node_inst = node_inst;
-	addr_in.ma.socket_id = socket_id;
-
-	if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out))
+	err_addr = addr_in->ma.err_addr;
+	addr_in->addr_type = TA_RAS_MCA_TO_PA;
+	if (psp_ras_query_address(&adev->psp, addr_in, &addr_out))
 		/* fallback to old path if fail to get pa from psp */
-		umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst,
-				node_inst, &addr_out);
+		umc_v12_0_mca_addr_to_pa(adev, err_addr, addr_in->ma.ch_inst,
+				addr_in->ma.umc_inst, addr_in->ma.node_inst, &addr_out);
 
 	soc_pa = addr_out.pa.pa;
 	bank = addr_out.pa.bank;
@@ -310,7 +303,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 			"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
 			retired_page, row, col, bank, channel_index);
 		amdgpu_umc_fill_error_record(err_data, err_addr,
-			retired_page, channel_index, umc_inst);
+			retired_page, channel_index, addr_in->ma.umc_inst);
 
 		/* shift R13 bit */
 		retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
@@ -318,7 +311,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 			"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
 			retired_page, row_xor, col, bank, channel_index);
 		amdgpu_umc_fill_error_record(err_data, err_addr,
-			retired_page, channel_index, umc_inst);
+			retired_page, channel_index, addr_in->ma.umc_inst);
 	}
 }
 
@@ -326,13 +319,13 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
 					uint32_t node_inst, uint32_t umc_inst,
 					uint32_t ch_inst, void *data)
 {
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	struct ta_ras_query_address_input addr_in;
 	uint64_t mc_umc_status_addr;
 	uint64_t mc_umc_status, err_addr;
 	uint64_t mc_umc_addrt0;
-	struct ras_err_data *err_data = (struct ras_err_data *)data;
 	uint64_t umc_reg_offset =
 		get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
-	uint32_t socket_id = 0;
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -362,10 +355,16 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
 		if (!adev->aid_mask &&
 		    adev->smuio.funcs &&
 		    adev->smuio.funcs->get_socket_id)
-			socket_id = adev->smuio.funcs->get_socket_id(adev);
+			addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev);
+		else
+			addr_in.ma.socket_id = 0;
+
+		addr_in.ma.err_addr = err_addr;
+		addr_in.ma.ch_inst = ch_inst;
+		addr_in.ma.umc_inst = umc_inst;
+		addr_in.ma.node_inst = node_inst;
 
-		umc_v12_0_convert_error_address(adev, err_data, err_addr,
-					ch_inst, umc_inst, node_inst, socket_id);
+		umc_v12_0_convert_error_address(adev, err_data, &addr_in);
 	}
 
 	/* clear umc status */
@@ -425,12 +424,16 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
 	struct ras_err_info *err_info;
 	struct ras_err_addr *mca_err_addr, *tmp;
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	struct ta_ras_query_address_input addr_in;
 
 	for_each_ras_error(err_node, err_data) {
 		err_info = &err_node->err_info;
 		if (list_empty(&err_info->err_addr_list))
 			continue;
 
+		addr_in.ma.node_inst = err_info->mcm_info.die_id;
+		addr_in.ma.socket_id = err_info->mcm_info.socket_id;
+
 		list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) {
 			mc_umc_status = mca_err_addr->err_status;
 			if (mc_umc_status &&
@@ -446,6 +449,10 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
 							MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 				InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
 
+				addr_in.ma.err_addr = err_addr;
+				addr_in.ma.ch_inst = MCA_IPID_LO_2_UMC_CH(InstanceIdLo);
+				addr_in.ma.umc_inst = MCA_IPID_LO_2_UMC_INST(InstanceIdLo);
+
 				dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
 					mca_ipid,
 					err_info->mcm_info.die_id,
@@ -454,11 +461,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
 					err_addr);
 
 				umc_v12_0_convert_error_address(adev,
-					err_data, err_addr,
-					MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
-					MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
-					err_info->mcm_info.die_id,
-					err_info->mcm_info.socket_id);
+					err_data, &addr_in);
 			}
 
 			/* Delete error address node from list and free memory */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface for UMC v12
  2024-03-21  3:30 ` [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface for UMC v12 Tao Zhou
@ 2024-03-21  9:33   ` Yang, Stanley
  0 siblings, 0 replies; 3+ messages in thread
From: Yang, Stanley @ 2024-03-21  9:33 UTC (permalink / raw
  To: Zhou1, Tao, amd-gfx@lists.freedesktop.org; +Cc: Zhou1, Tao

[AMD Official Use Only - General]

The series is Reviewed-by: Stanley.Yang <Stanley.Yang@amd.com>

Regards,
Stanley
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao
> Zhou
> Sent: Thursday, March 21, 2024 11:30 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface
> for UMC v12
>
> Replace separate parameters with struct ta_ras_query_address_input.
>
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 57 ++++++++++++++---------
> ---
>  1 file changed, 30 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> index 0a9cc87e98d0..d0fcfcb3404f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> @@ -266,26 +266,19 @@ static void umc_v12_0_mca_addr_to_pa(struct
> amdgpu_device *adev,  }
>
>  static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
> -                                         struct ras_err_data *err_data,
> uint64_t err_addr,
> -                                         uint32_t ch_inst, uint32_t
> umc_inst,
> -                                         uint32_t node_inst, uint32_t
> socket_id)
> +                                     struct ras_err_data *err_data,
> +                                     struct ta_ras_query_address_input
> *addr_in)
>  {
>       uint32_t col, row, row_xor, bank, channel_index;
> -     uint64_t soc_pa, retired_page, column;
> -     struct ta_ras_query_address_input addr_in;
> +     uint64_t soc_pa, retired_page, column, err_addr;
>       struct ta_ras_query_address_output addr_out;
>
> -     addr_in.addr_type = TA_RAS_MCA_TO_PA;
> -     addr_in.ma.err_addr = err_addr;
> -     addr_in.ma.ch_inst = ch_inst;
> -     addr_in.ma.umc_inst = umc_inst;
> -     addr_in.ma.node_inst = node_inst;
> -     addr_in.ma.socket_id = socket_id;
> -
> -     if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out))
> +     err_addr = addr_in->ma.err_addr;
> +     addr_in->addr_type = TA_RAS_MCA_TO_PA;
> +     if (psp_ras_query_address(&adev->psp, addr_in, &addr_out))
>               /* fallback to old path if fail to get pa from psp */
> -             umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst,
> umc_inst,
> -                             node_inst, &addr_out);
> +             umc_v12_0_mca_addr_to_pa(adev, err_addr, addr_in-
> >ma.ch_inst,
> +                             addr_in->ma.umc_inst, addr_in-
> >ma.node_inst, &addr_out);
>
>       soc_pa = addr_out.pa.pa;
>       bank = addr_out.pa.bank;
> @@ -310,7 +303,7 @@ static void umc_v12_0_convert_error_address(struct
> amdgpu_device *adev,
>                       "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x
> Bank:0x%x Channel:0x%x\n",
>                       retired_page, row, col, bank, channel_index);
>               amdgpu_umc_fill_error_record(err_data, err_addr,
> -                     retired_page, channel_index, umc_inst);
> +                     retired_page, channel_index, addr_in->ma.umc_inst);
>
>               /* shift R13 bit */
>               retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -
> 318,7 +311,7 @@ static void umc_v12_0_convert_error_address(struct
> amdgpu_device *adev,
>                       "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x
> Bank:0x%x Channel:0x%x\n",
>                       retired_page, row_xor, col, bank, channel_index);
>               amdgpu_umc_fill_error_record(err_data, err_addr,
> -                     retired_page, channel_index, umc_inst);
> +                     retired_page, channel_index, addr_in->ma.umc_inst);
>       }
>  }
>
> @@ -326,13 +319,13 @@ static int umc_v12_0_query_error_address(struct
> amdgpu_device *adev,
>                                       uint32_t node_inst, uint32_t
> umc_inst,
>                                       uint32_t ch_inst, void *data)
>  {
> +     struct ras_err_data *err_data = (struct ras_err_data *)data;
> +     struct ta_ras_query_address_input addr_in;
>       uint64_t mc_umc_status_addr;
>       uint64_t mc_umc_status, err_addr;
>       uint64_t mc_umc_addrt0;
> -     struct ras_err_data *err_data = (struct ras_err_data *)data;
>       uint64_t umc_reg_offset =
>               get_umc_v12_0_reg_offset(adev, node_inst, umc_inst,
> ch_inst);
> -     uint32_t socket_id = 0;
>
>       mc_umc_status_addr =
>               SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -362,10 +355,16 @@ static
> int umc_v12_0_query_error_address(struct amdgpu_device *adev,
>               if (!adev->aid_mask &&
>                   adev->smuio.funcs &&
>                   adev->smuio.funcs->get_socket_id)
> -                     socket_id = adev->smuio.funcs->get_socket_id(adev);
> +                     addr_in.ma.socket_id = adev->smuio.funcs-
> >get_socket_id(adev);
> +             else
> +                     addr_in.ma.socket_id = 0;
> +
> +             addr_in.ma.err_addr = err_addr;
> +             addr_in.ma.ch_inst = ch_inst;
> +             addr_in.ma.umc_inst = umc_inst;
> +             addr_in.ma.node_inst = node_inst;
>
> -             umc_v12_0_convert_error_address(adev, err_data, err_addr,
> -                                     ch_inst, umc_inst, node_inst,
> socket_id);
> +             umc_v12_0_convert_error_address(adev, err_data,
> &addr_in);
>       }
>
>       /* clear umc status */
> @@ -425,12 +424,16 @@ static void
> umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
>       struct ras_err_info *err_info;
>       struct ras_err_addr *mca_err_addr, *tmp;
>       struct ras_err_data *err_data = (struct ras_err_data
> *)ras_error_status;
> +     struct ta_ras_query_address_input addr_in;
>
>       for_each_ras_error(err_node, err_data) {
>               err_info = &err_node->err_info;
>               if (list_empty(&err_info->err_addr_list))
>                       continue;
>
> +             addr_in.ma.node_inst = err_info->mcm_info.die_id;
> +             addr_in.ma.socket_id = err_info->mcm_info.socket_id;
> +
>               list_for_each_entry_safe(mca_err_addr, tmp, &err_info-
> >err_addr_list, node) {
>                       mc_umc_status = mca_err_addr->err_status;
>                       if (mc_umc_status &&
> @@ -446,6 +449,10 @@ static void
> umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
>
>       MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
>                               InstanceIdLo = REG_GET_FIELD(mca_ipid,
> MCMP1_IPIDT0, InstanceIdLo);
>
> +                             addr_in.ma.err_addr = err_addr;
> +                             addr_in.ma.ch_inst =
> MCA_IPID_LO_2_UMC_CH(InstanceIdLo);
> +                             addr_in.ma.umc_inst =
> MCA_IPID_LO_2_UMC_INST(InstanceIdLo);
> +
>                               dev_info(adev->dev, "UMC:IPID:0x%llx,
> aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
>                                       mca_ipid,
>                                       err_info->mcm_info.die_id,
> @@ -454,11 +461,7 @@ static void
> umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
>                                       err_addr);
>
>                               umc_v12_0_convert_error_address(adev,
> -                                     err_data, err_addr,
> -
>       MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
> -
>       MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
> -                                     err_info->mcm_info.die_id,
> -                                     err_info->mcm_info.socket_id);
> +                                     err_data, &addr_in);
>                       }
>
>                       /* Delete error address node from list and free
> memory */
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-03-21  9:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-21  3:30 [PATCH 1/2] drm/amdgpu: add socket id parameter for psp query address cmd Tao Zhou
2024-03-21  3:30 ` [PATCH 2/2] drm/amdgpu: simplify convert_error_address interface for UMC v12 Tao Zhou
2024-03-21  9:33   ` Yang, Stanley

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.