All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings
@ 2014-02-03 11:05 Alex Netes
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw
  To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes

Calling cl_ntoh32(cl_ntoh32()) causes shadow parameters redefinition.
The fix is to split the cl_ntoh32() calls.

Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 include/iba/ib_types.h |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h
index 86fa07b..a5136d4 100644
--- a/include/iba/ib_types.h
+++ b/include/iba/ib_types.h
@@ -8164,9 +8164,10 @@ static inline void OSM_API
 ib_inform_info_set_qpn(IN ib_inform_info_t * p_ii, IN ib_net32_t const qpn)
 {
 	uint32_t tmp = cl_ntoh32(p_ii->g_or_v.generic.qpn_resp_time_val);
+	uint32_t qpn_h = cl_ntoh32(qpn);
 
 	p_ii->g_or_v.generic.qpn_resp_time_val =
-	    cl_hton32((tmp & 0x000000ff) | ((cl_ntoh32(qpn) << 8) & 0xffffff00)
+	    cl_hton32((tmp & 0x000000ff) | ((qpn_h << 8) & 0xffffff00)
 	    );
 }
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] opensm: change discovery order of switch data
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 11:05   ` Alex Netes
       [not found]     ` <1391425516-14462-2-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 11:05   ` [PATCH 3/5] opensm: Better handle topology changes in the fabric Alex Netes
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw
  To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes

Previously upon receiving GetResp(NodeInfo) of a switch, SM sent
Get(SwitchInfo) and Get(PortInfo) to all its' ports in parallel. Upon receiving
GetResp(PortInfo) SM sends Get(PkeyTable). The problem is that we need
SwitchInfo.PartEnforceCap value to calculate max Pkeys block, so in case
one of the GetResp(PortInfo) arrives prior to GetResp(SwitchInfo) this
value won't be set.
The fix to change the discover order. Upon receiving GetResP(NodeInfo),
SM sends Get(SwitchInfo). Upon receiving GetResp(SwitchInfo), SM sends
Get(PortInfo port0). If we don't get GetResp(PortInfo port=0), SM will
drop the switch, otherwise SM sends Get(PortInfo ExtPorts).
Moreover, now SM queries for ExtPortInfo and Pkeys only for non-Down
ports.

Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 opensm/osm_node_info_rcv.c |   43 ++++-------
 opensm/osm_port_info_rcv.c |  165 +++++++++++++++++++++++++++++++++-----------
 opensm/osm_sw_info_rcv.c   |   43 ++++++++++++
 3 files changed, 183 insertions(+), 68 deletions(-)

diff --git a/opensm/osm_node_info_rcv.c b/opensm/osm_node_info_rcv.c
index 4242924..b4e00f3 100644
--- a/opensm/osm_node_info_rcv.c
+++ b/opensm/osm_node_info_rcv.c
@@ -283,19 +283,13 @@ static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
 	osm_madw_context_t context;
 	osm_physp_t *physp;
 	ib_node_info_t *ni;
-	unsigned port, num_ports;
+	unsigned port;
 	ib_api_status_t status;
 	int mlnx_epi_supported = 0;
 
 	ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw));
 
-	if (ni->node_type == IB_NODE_TYPE_SWITCH) {
-		port = 0;
-		num_ports = osm_node_get_num_physp(node);
-	} else {
-		port = ib_node_info_get_local_port_num(ni);
-		num_ports = port + 1;
-	}
+	port = ib_node_info_get_local_port_num(ni);
 
 	if (sm->p_subn->opt.fdr10)
 		mlnx_epi_supported = is_mlnx_ext_port_info_supported(ni->device_id);
@@ -309,25 +303,23 @@ static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
 	context.pi_context.active_transition = FALSE;
 	context.pi_context.client_rereg = FALSE;
 
-	for (; port < num_ports; port++) {
-		status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
-				     IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
+			     IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+			     TRUE, 0, CL_DISP_MSGID_NONE, &context);
+	if (status != IB_SUCCESS)
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
+			"Failure initiating PortInfo request (%s)\n",
+			ib_get_err_str(status));
+	if (mlnx_epi_supported) {
+		status = osm_req_get(sm,
+				     osm_physp_get_dr_path_ptr(physp),
+				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+				     cl_hton32(port),
 				     TRUE, 0, CL_DISP_MSGID_NONE, &context);
 		if (status != IB_SUCCESS)
-			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
-				"Failure initiating PortInfo request (%s)\n",
+			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
+				"Failure initiating MLNX ExtPortInfo request (%s)\n",
 				ib_get_err_str(status));
-		if (mlnx_epi_supported) {
-			status = osm_req_get(sm,
-					     osm_physp_get_dr_path_ptr(physp),
-					     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
-					     cl_hton32(port), TRUE, 0,
-					     CL_DISP_MSGID_NONE, &context);
-			if (status != IB_SUCCESS)
-				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
-					"Failure initiating MLNX ExtPortInfo request (%s)\n",
-					ib_get_err_str(status));
-		}
 	}
 }
 
@@ -566,9 +558,6 @@ static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
 			"Failure initiating SwitchInfo request (%s)\n",
 			ib_get_err_str(status));
 
-	if (p_node->discovery_count == 1)
-		ni_rcv_get_port_info(sm, p_node, p_madw);
-
 	OSM_LOG_EXIT(sm->p_log);
 }
 
diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c
index 5c12989..b3d4bd3 100644
--- a/opensm/osm_port_info_rcv.c
+++ b/opensm/osm_port_info_rcv.c
@@ -199,18 +199,75 @@ static void pi_rcv_process_endport(IN osm_sm_t * sm, IN osm_physp_t * p_physp,
 /**********************************************************************
  The plock must be held before calling this function.
 **********************************************************************/
-static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
-				       IN osm_physp_t * p_physp,
-				       IN ib_port_info_t * p_pi)
+static void pi_rcv_process_switch_port0(IN osm_sm_t * sm,
+					IN osm_node_t * p_node,
+					IN osm_physp_t * p_physp,
+					IN ib_port_info_t * p_pi)
+{
+	ib_api_status_t status;
+	osm_madw_context_t context;
+	uint8_t port, num_ports;
+
+	OSM_LOG_ENTER(sm->p_log);
+
+	num_ports = osm_node_get_num_physp(p_node);
+
+	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
+	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
+	context.pi_context.set_method = FALSE;
+	context.pi_context.light_sweep = FALSE;
+	context.pi_context.active_transition = FALSE;
+	context.pi_context.client_rereg = FALSE;
+
+	for (port = 1; port < num_ports; port++) {
+		status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
+				     IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
+				     FALSE,
+				     ib_port_info_get_m_key(&p_physp->port_info),
+				     CL_DISP_MSGID_NONE, &context);
+		if (status != IB_SUCCESS)
+			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F16: "
+				"Failure initiating PortInfo request (%s)\n",
+				ib_get_err_str(status));
+	}
+
+	if (p_physp->need_update)
+		sm->p_subn->ignore_existing_lfts = TRUE;
+
+	pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);
+
+	/*
+	   Update the PortInfo attribute.
+	 */
+	osm_physp_set_port_info(p_physp, p_pi, sm);
+
+	/* Determine if base switch port 0 */
+	if (p_node->sw &&
+	    !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
+		/* PortState is not used on BSP0 but just in case it is DOWN */
+		p_physp->port_info = *p_pi;
+	pi_rcv_process_endport(sm, p_physp, p_pi);
+	OSM_LOG_EXIT(sm->p_log);
+}
+
+/**********************************************************************
+ The plock must be held before calling this function.
+**********************************************************************/
+static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
+					   IN osm_node_t * p_node,
+					   IN osm_physp_t * p_physp,
+					   IN ib_port_info_t * p_pi)
 {
 	ib_api_status_t status = IB_SUCCESS;
 	osm_madw_context_t context;
-	osm_physp_t *p_remote_physp;
+	osm_physp_t *p_remote_physp, *physp0;
 	osm_node_t *p_remote_node;
+	ib_net64_t m_key;
 	unsigned data_vls;
 	uint8_t port_num;
 	uint8_t remote_port_num;
 	osm_dr_path_t path;
+	int mlnx_epi_supported = 0;
 
 	OSM_LOG_ENTER(sm->p_log);
 
@@ -220,10 +277,14 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
 	   then ask for NodeInfo.  Ignore the switch management port.
 	 */
 	port_num = osm_physp_get_port_num(p_physp);
+
+	if (sm->p_subn->opt.fdr10)
+		mlnx_epi_supported = is_mlnx_ext_port_info_supported(p_node->node_info.device_id);
+
 	/* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch,
 	   and we got switchInfo of our local switch. Do not continue
 	   probing through the switch. */
-	if (port_num != 0 && sm->p_subn->in_sweep_hop_0 == FALSE) {
+	if (sm->p_subn->in_sweep_hop_0 == FALSE) {
 		switch (ib_port_info_get_port_state(p_pi)) {
 		case IB_LINK_DOWN:
 			p_remote_physp = osm_physp_get_remote(p_physp);
@@ -259,6 +320,26 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
 		case IB_LINK_INIT:
 		case IB_LINK_ARMED:
 		case IB_LINK_ACTIVE:
+			physp0 = osm_node_get_physp_ptr(p_node, 0);
+			if (mlnx_epi_supported) {
+				m_key = ib_port_info_get_m_key(&physp0->port_info);
+
+				context.pi_context.node_guid = osm_node_get_node_guid(p_node);
+				context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
+				context.pi_context.set_method = FALSE;
+				context.pi_context.light_sweep = FALSE;
+				context.pi_context.active_transition = FALSE;
+				context.pi_context.client_rereg = FALSE;
+				status = osm_req_get(sm,
+						     osm_physp_get_dr_path_ptr(p_physp),
+						     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+						     cl_hton32(port_num), FALSE, m_key,
+						     CL_DISP_MSGID_NONE, &context);
+				if (status != IB_SUCCESS)
+					OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F11: "
+						"Failure initiating MLNX ExtPortInfo request (%s)\n",
+						ib_get_err_str(status));
+		        }
 			/*
 			   To avoid looping forever, only probe the port if it
 			   is NOT the port that responded to the SMP.
@@ -316,46 +397,35 @@ static void pi_rcv_process_switch_port(IN osm_sm_t * sm, IN osm_node_t * p_node,
 	}
 
 	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
-	    p_node->sw->need_update == 1 && port_num != 0)
+	    p_node->sw->need_update == 1)
 		p_node->sw->need_update = 0;
 
 	if (p_physp->need_update)
 		sm->p_subn->ignore_existing_lfts = TRUE;
 
-	if (port_num == 0)
-		pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp);
-
 	/*
 	   Update the PortInfo attribute.
 	 */
 	osm_physp_set_port_info(p_physp, p_pi, sm);
 
-	if (port_num == 0) {
-		/* Determine if base switch port 0 */
-		if (p_node->sw &&
-		    !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info))
-			/* PortState is not used on BSP0 but just in case it is DOWN */
-			p_physp->port_info = *p_pi;
-		pi_rcv_process_endport(sm, p_physp, p_pi);
-	} else {
-		if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
-			goto Exit;
+	if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
+		goto Exit;
 
-		p_remote_physp = osm_physp_get_remote(p_physp);
-		if (p_remote_physp) {
-			p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
-			if (p_remote_node->sw) {
-				data_vls = 1U << (ib_port_info_get_op_vls(p_pi) - 1);
-				if (data_vls >= IB_MAX_NUM_VLS)
-					data_vls = IB_MAX_NUM_VLS - 1;
-				if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
-					OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
-						"Setting switch port minimal data VLs to:%u defined by node:0x%"
-						PRIx64 ", port:%u\n", data_vls,
-						cl_ntoh64(osm_node_get_node_guid(p_node)),
-						port_num);
-					sm->p_subn->min_sw_data_vls = data_vls;
-				}
+	p_remote_physp = osm_physp_get_remote(p_physp);
+	if (p_remote_physp) {
+		p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
+		if (p_remote_node->sw) {
+			data_vls = 1U << (ib_port_info_get_op_vls(p_pi) - 1);
+			if (data_vls >= IB_MAX_NUM_VLS)
+				data_vls = IB_MAX_NUM_VLS - 1;
+			if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) {
+				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
+					"Setting switch port minimal data VLs "
+					"to:%u defined by node:0x%"
+					PRIx64 ", port:%u\n", data_vls,
+					cl_ntoh64(osm_node_get_node_guid(p_node)),
+					port_num);
+				sm->p_subn->min_sw_data_vls = data_vls;
 			}
 		}
 	}
@@ -469,6 +539,18 @@ static void pi_rcv_get_pkey_slvl_vla_tables(IN osm_sm_t * sm,
 	OSM_LOG_EXIT(sm->p_log);
 }
 
+static int osm_pi_rcv_update_self(IN osm_sm_t *sm, IN osm_physp_t *p_physp,
+				  IN ib_port_info_t *p_pi)
+{
+	if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
+		return 0;
+
+	if (sm->p_subn->need_update || p_physp->need_update > 1)
+		return 1;
+
+	return 0;
+}
+
 static void pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * p_node,
 			       IN uint8_t port_num, IN osm_madw_t * p_madw)
 {
@@ -654,11 +736,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
 		osm_dr_path_init(p_dr_path, p_smp->hop_count,
 				 p_smp->initial_path);
 
-		/* if port just inited or reached INIT state (external reset)
-		   request update for port related tables */
-		p_physp->need_update =
-		    (ib_port_info_get_port_state(p_pi) == IB_LINK_INIT ||
-		     p_physp->need_update > 1) ? 1 : 0;
+		p_physp->need_update = osm_pi_rcv_update_self(sm, p_physp, p_pi);
 
 		switch (osm_node_get_type(p_node)) {
 		case IB_NODE_TYPE_CA:
@@ -675,7 +753,12 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
 				p_port->discovery_count++;
 				p_node->physp_discovered[port_num] = 1;
 			}
-			pi_rcv_process_switch_port(sm, p_node, p_physp, p_pi);
+			if (port_num == 0)
+				pi_rcv_process_switch_port0(sm, p_node,
+							    p_physp, p_pi);
+			else
+				pi_rcv_process_switch_ext_port(sm, p_node,
+							       p_physp, p_pi);
 			break;
 		default:
 			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: "
@@ -688,7 +771,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
 		/*
 		   Get the tables on the physp.
 		 */
-		if (p_physp->need_update || sm->p_subn->need_update)
+		if (p_physp->need_update)
 			pi_rcv_get_pkey_slvl_vla_tables(sm, p_node, p_physp);
 
 	}
diff --git a/opensm/osm_sw_info_rcv.c b/opensm/osm_sw_info_rcv.c
index 02f6ab2..98f7b81 100644
--- a/opensm/osm_sw_info_rcv.c
+++ b/opensm/osm_sw_info_rcv.c
@@ -333,6 +333,48 @@ static boolean_t si_rcv_process_existing(IN osm_sm_t * sm,
 	return is_change_detected;
 }
 
+static void si_rcv_get_sp0_info(IN osm_sm_t * sm, IN osm_node_t * node)
+{
+	osm_madw_context_t context;
+	osm_physp_t *physp;
+	ib_api_status_t status;
+	int mlnx_epi_supported = 0;
+
+	physp = osm_node_get_physp_ptr(node, 0);
+
+	context.pi_context.node_guid = osm_node_get_node_guid(node);
+	context.pi_context.port_guid = osm_physp_get_port_guid(physp);
+	context.pi_context.set_method = FALSE;
+	context.pi_context.light_sweep = FALSE;
+	context.pi_context.active_transition = FALSE;
+	context.pi_context.client_rereg = FALSE;
+
+	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
+			     IB_MAD_ATTR_PORT_INFO, 0, TRUE, 0,
+			     CL_DISP_MSGID_NONE, &context);
+	if (status != IB_SUCCESS)
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3611: "
+			"Failure initiating PortInfo request (%s)\n",
+			ib_get_err_str(status));
+
+	if (ib_switch_info_is_enhanced_port0(&node->sw->switch_info) &&
+	    sm->p_subn->opt.fdr10) {
+		mlnx_epi_supported = is_mlnx_ext_port_info_supported(node->node_info.device_id);
+		if (mlnx_epi_supported) {
+			status = osm_req_get(sm,
+					     osm_physp_get_dr_path_ptr(physp),
+					     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
+					     0, TRUE, 0,
+					     CL_DISP_MSGID_NONE, &context);
+			if (status != IB_SUCCESS)
+				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3616: "
+					"Failure initiating MLNX ExtPortInfo request (%s)\n",
+					ib_get_err_str(status));
+		}
+	}
+
+}
+
 void osm_si_rcv_process(IN void *context, IN void *data)
 {
 	osm_sm_t *sm = context;
@@ -390,6 +432,7 @@ void osm_si_rcv_process(IN void *context, IN void *data)
 		/* we might get back a request for signaling change was detected */
 		sm->p_subn->force_heavy_sweep = TRUE;
 
+	si_rcv_get_sp0_info(sm, p_node);
 	CL_PLOCK_RELEASE(sm->p_lock);
 Exit:
 	OSM_LOG_EXIT(sm->p_log);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] opensm: Better handle topology changes in the fabric
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 11:05   ` [PATCH 2/5] opensm: change discovery order of switch data Alex Netes
@ 2014-02-03 11:05   ` Alex Netes
       [not found]     ` <1391425516-14462-3-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 11:05   ` [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep Alex Netes
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw
  To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes

The patch tries to solve the following problem:
When newly discovered switch is rebooted during the configuration cycle,
SM end-up setting all Initialized ports to Active, but the configuration
on the switch such as Pkey tables, QoS, etc' might be incorrect.

The fix is solves this in two steps. First, turn need_update flag when
switch's StateChange bit is detected ON or CA's neighbor switch has
StateChange bit ON. Second, clear StateChange bit on the switches, before
any configuration is done. This assures that we don't miss changes in
the fabric. If a switch was rebooted during a sweep, we will detect it
in a sequential sweep and configure all its' neighbors from scratch.

Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 include/iba/ib_types.h     |   60 ++++++++++++++++++++++++++++++++++++++++
 opensm/osm_port_info_rcv.c |   27 +++++++++++++++++-
 opensm/osm_state_mgr.c     |   66 ++++++++++++++++++++++++++++++++++++++++++++
 opensm/osm_ucast_mgr.c     |   15 ++-------
 4 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h
index a5136d4..249ee16 100644
--- a/include/iba/ib_types.h
+++ b/include/iba/ib_types.h
@@ -6507,6 +6507,34 @@ ib_switch_info_clear_state_change(IN ib_switch_info_t * const p_si)
 * SEE ALSO
 *********/
 
+/****f* IBA Base: Types/ib_switch_info_set_state_change
+* NAME
+*	ib_switch_info_set_state_change
+*
+* DESCRIPTION
+*	Clears the switch's state change bit.
+*
+* SYNOPSIS
+*/
+static inline void OSM_API
+ib_switch_info_set_state_change(IN ib_switch_info_t * const p_si)
+{
+	p_si->life_state = (uint8_t) ((p_si->life_state & ~IB_SWITCH_PSC) | IB_SWITCH_PSC);
+}
+
+/*
+* PARAMETERS
+*	p_si
+*		[in] Pointer to a SwitchInfo attribute.
+*
+* RETURN VALUES
+*	None
+*
+* NOTES
+*
+* SEE ALSO
+*********/
+
 /****f* IBA Base: Types/ib_switch_info_get_opt_sl2vlmapping
 * NAME
 *	ib_switch_info_get_state_opt_sl2vlmapping
@@ -6535,6 +6563,38 @@ ib_switch_info_get_opt_sl2vlmapping(IN const ib_switch_info_t * const p_si)
 * SEE ALSO
 *********/
 
+/****f* IBA Base: Types/ib_switch_info_set_life_time
+* NAME
+*	ib_switch_info_set_life_time
+*
+* DESCRIPTION
+*	Sets the value of LifeTimeValue.
+*
+* SYNOPSIS
+*/
+static inline void OSM_API
+ib_switch_info_set_life_time(IN ib_switch_info_t * const p_si,
+			     IN const uint8_t life_time_val)
+{
+	p_si->life_state = (p_si->life_state & 0x1f) |
+			   (life_time_val << 3);
+}
+
+/*
+* PARAMETERS
+*	p_si
+*		[in] Pointer to a SwitchInfo attribute.
+*	life_time_val
+*		[in] LiveTimeValue.
+*
+* RETURN VALUES
+*	None.
+*
+* NOTES
+*
+* SEE ALSO
+*********/
+
 /****f* IBA Base: Types/ib_switch_info_is_enhanced_port0
 * NAME
 *	ib_switch_info_is_enhanced_port0
diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c
index b3d4bd3..d813f1a 100644
--- a/opensm/osm_port_info_rcv.c
+++ b/opensm/osm_port_info_rcv.c
@@ -397,6 +397,7 @@ static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm,
 	}
 
 	if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw &&
+	    !ib_switch_info_get_state_change(&p_node->sw->switch_info) &&
 	    p_node->sw->need_update == 1)
 		p_node->sw->need_update = 0;
 
@@ -545,7 +546,8 @@ static int osm_pi_rcv_update_self(IN osm_sm_t *sm, IN osm_physp_t *p_physp,
 	if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN)
 		return 0;
 
-	if (sm->p_subn->need_update || p_physp->need_update > 1)
+	if (sm->p_subn->need_update || p_physp->need_update > 1 ||
+	    ib_port_info_get_port_state(p_pi) == IB_LINK_INIT)
 		return 1;
 
 	return 0;
@@ -608,6 +610,28 @@ static void pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * p_node,
 	OSM_LOG_EXIT(sm->p_log);
 }
 
+static int osm_pi_rcv_update_neighbor(IN osm_physp_t *p_physp)
+{
+	osm_physp_t *p_rem_physp = p_physp->p_remote_physp;
+	osm_node_t *p_node;
+
+	/*
+	 * Our own port - this is the only case where CA port
+	 * is discovered before its' neighbor port
+	 */
+	if (!p_rem_physp)
+		return p_physp->need_update;
+
+	p_node = osm_physp_get_node_ptr(p_rem_physp);
+	CL_ASSERT(p_node);
+
+	/* CA/RTR to CA/RTR connection */
+	if (!p_node->sw)
+		return p_physp->need_update;
+
+	return (ib_switch_info_get_state_change(&p_node->sw->switch_info) ? 1 : p_physp->need_update);
+}
+
 void osm_pi_rcv_process(IN void *context, IN void *data)
 {
 	osm_sm_t *sm = context;
@@ -745,6 +769,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
 				p_port->discovery_count++;
 				p_node->physp_discovered[port_num] = 1;
 			}
+			p_physp->need_update = osm_pi_rcv_update_neighbor(p_physp);
 			pi_rcv_process_ca_or_router_port(sm, p_node, p_physp,
 							 p_pi);
 			break;
diff --git a/opensm/osm_state_mgr.c b/opensm/osm_state_mgr.c
index c86627d..5080b22 100644
--- a/opensm/osm_state_mgr.c
+++ b/opensm/osm_state_mgr.c
@@ -531,6 +531,60 @@ static void query_sm_info(cl_map_item_t * item, void *cxt)
 			ib_get_err_str(ret));
 }
 
+static void state_mgr_reset_state_change_bit(IN cl_map_item_t * obj,
+					     IN void *context)
+{
+	osm_madw_context_t mad_context;
+	osm_switch_t *p_sw = (osm_switch_t *) obj;
+	osm_sm_t *sm = context;
+	osm_node_t *p_node;
+	osm_physp_t *p_physp;
+	osm_dr_path_t *p_path;
+	ib_api_status_t status;
+	ib_switch_info_t si;
+
+	OSM_LOG_ENTER(sm->p_log);
+
+	CL_ASSERT(p_sw);
+
+	p_node = p_sw->p_node;
+
+	CL_ASSERT(p_node);
+
+	p_physp = osm_node_get_physp_ptr(p_node, 0);
+	p_path = osm_physp_get_dr_path_ptr(p_physp);
+
+	if (!ib_switch_info_get_state_change(&p_sw->switch_info))
+		goto exit;
+
+	si = p_sw->switch_info;
+
+	ib_switch_info_set_state_change(&si);
+
+	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
+		"Resetting PortStateChange on switch GUID 0x%016" PRIx64 "\n",
+		cl_ntoh64(osm_node_get_node_guid(p_node)));
+
+	mad_context.si_context.light_sweep = FALSE;
+	mad_context.si_context.node_guid = osm_node_get_node_guid(p_node);
+	mad_context.si_context.set_method = TRUE;
+	mad_context.si_context.lft_top_change = FALSE;
+
+	status = osm_req_set(sm, p_path, (uint8_t *) &si,
+			     sizeof(si), IB_MAD_ATTR_SWITCH_INFO,
+			     0, FALSE,
+			     ib_port_info_get_m_key(&p_physp->port_info),
+			     CL_DISP_MSGID_NONE, &mad_context);
+
+	if (status != IB_SUCCESS)
+		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332A: "
+			"Sending SwitchInfo attribute failed (%s)\n",
+			ib_get_err_str(status));
+
+exit:
+	OSM_LOG_EXIT(sm->p_log);
+}
+
 static void state_mgr_update_node_desc(IN cl_map_item_t * obj, IN void *context)
 {
 	osm_madw_context_t mad_context;
@@ -576,6 +630,14 @@ exit:
 	OSM_LOG_EXIT(sm->p_log);
 }
 
+void osm_reset_switch_state_change_bit(IN osm_opensm_t *osm)
+{
+	CL_PLOCK_ACQUIRE(&osm->lock);
+	cl_qmap_apply_func(&osm->subn.sw_guid_tbl, state_mgr_reset_state_change_bit,
+			   &osm->sm);
+	CL_PLOCK_RELEASE(&osm->lock);
+}
+
 void osm_update_node_desc(IN osm_opensm_t *osm)
 {
 	CL_PLOCK_ACQUIRE(&osm->lock);
@@ -1340,6 +1402,10 @@ repeat_discovery:
 	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
 		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
 
+	osm_reset_switch_state_change_bit(sm->p_subn->p_osm);
+	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
+		return;
+
 	osm_pkey_mgr_process(sm->p_subn->p_osm);
 
 	/* try to restore SA DB (this should be before lid_mgr
diff --git a/opensm/osm_ucast_mgr.c b/opensm/osm_ucast_mgr.c
index f53e288..8194307 100644
--- a/opensm/osm_ucast_mgr.c
+++ b/opensm/osm_ucast_mgr.c
@@ -938,18 +938,11 @@ static void ucast_mgr_set_fwd_top(IN cl_map_item_t * p_map_item,
 	} else
 		context.si_context.lft_top_change = FALSE;
 
-	/* check to see if the change state bit is on. If it is - then we
-	   need to clear it. */
-	if (ib_switch_info_get_state_change(&si))
-		life_state = ((p_mgr->p_subn->opt.packet_life_time << 3)
-			      | (si.life_state & IB_SWITCH_PSC)) & 0xfc;
-	else
-		life_state = (p_mgr->p_subn->opt.packet_life_time << 3) & 0xf8;
-
-	if (life_state != si.life_state || ib_switch_info_get_state_change(&si)) {
+	life_state = si.life_state;
+	ib_switch_info_set_life_time(&si, p_mgr->p_subn->opt.packet_life_time);
+
+	if (life_state != si.life_state)
 		set_swinfo_require = TRUE;
-		si.life_state = life_state;
-	}
 
 	if (set_swinfo_require) {
 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 11:05   ` [PATCH 2/5] opensm: change discovery order of switch data Alex Netes
  2014-02-03 11:05   ` [PATCH 3/5] opensm: Better handle topology changes in the fabric Alex Netes
@ 2014-02-03 11:05   ` Alex Netes
       [not found]     ` <1391425516-14462-4-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 11:05   ` [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error Alex Netes
  2014-02-03 19:42   ` [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings Hal Rosenstock
  4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw
  To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes

SM may miss switch reboot because of a long sweep. In that case it won't
reread pkey tables from Switch Port 0.

Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 opensm/osm_port_info_rcv.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c
index d813f1a..eb68dbf 100644
--- a/opensm/osm_port_info_rcv.c
+++ b/opensm/osm_port_info_rcv.c
@@ -796,7 +796,8 @@ void osm_pi_rcv_process(IN void *context, IN void *data)
 		/*
 		   Get the tables on the physp.
 		 */
-		if (p_physp->need_update)
+		if (p_physp->need_update || (p_node->sw &&
+					     p_node->sw->need_update))
 			pi_rcv_get_pkey_slvl_vla_tables(sm, p_node, p_physp);
 
 	}
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2014-02-03 11:05   ` [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep Alex Netes
@ 2014-02-03 11:05   ` Alex Netes
       [not found]     ` <1391425516-14462-5-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 19:42   ` [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings Hal Rosenstock
  4 siblings, 1 reply; 11+ messages in thread
From: Alex Netes @ 2014-02-03 11:05 UTC (permalink / raw
  To: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Alex Netes

There are several MADs that we only SET during the sweep (and never
GET).
Zero the stored block, so in case the MAD will end up with error,
we will resend it during the next sweep.

Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 opensm/osm_qos.c       |   13 +++++++++++++
 opensm/osm_ucast_mgr.c |    7 +++++++
 2 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/opensm/osm_qos.c b/opensm/osm_qos.c
index a301803..473e3c8 100644
--- a/opensm/osm_qos.c
+++ b/opensm/osm_qos.c
@@ -183,6 +183,13 @@ static ib_api_status_t vlarb_update_table_block(osm_sm_t * sm,
 	if (!p_mad)
 		return IB_INSUFFICIENT_MEMORY;
 
+	/*
+	 * Zero the stored VL Arbitration block, so in case the MAD will
+	 * end up with error, we will resend it in the next sweep.
+	 */
+	memset(&p->vl_arb[block_num], 0,
+	       block_length * sizeof(block.vl_entry[0]));
+
 	cl_qlist_insert_tail(mad_list, &p_mad->list_item);
 
 	return IB_SUCCESS;
@@ -272,6 +279,12 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p,
 	if (!p_mad)
 		return IB_INSUFFICIENT_MEMORY;
 
+	/*
+	 * Zero the stored SL2VL block, so in case the MAD will
+	 * end up with error, we will resend it in the next sweep.
+	 */
+	memset(p_tbl, 0, sizeof(tbl));
+
 	cl_qlist_insert_tail(mad_list, &p_mad->list_item);
 	return IB_SUCCESS;
 }
diff --git a/opensm/osm_ucast_mgr.c b/opensm/osm_ucast_mgr.c
index 8194307..c8a7360 100644
--- a/opensm/osm_ucast_mgr.c
+++ b/opensm/osm_ucast_mgr.c
@@ -1002,6 +1002,13 @@ static int set_lft_block(IN osm_switch_t *p_sw, IN osm_ucast_mgr_t *p_mgr,
 		    IB_SMP_DATA_SIZE))
 		return 0;
 
+	/*
+	 * Zero the stored LFT block, so in case the MAD will end up
+	 * with error, we will resend it in the next sweep.
+	 */
+	memset(p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE, OSM_NO_PATH,
+	       IB_SMP_DATA_SIZE);
+
 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
 		"Writing FT block %u to switch 0x%" PRIx64 "\n", block_id_ho,
 		cl_ntoh64(context.lft_context.node_guid));
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings
       [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2014-02-03 11:05   ` [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error Alex Netes
@ 2014-02-03 19:42   ` Hal Rosenstock
  4 siblings, 0 replies; 11+ messages in thread
From: Hal Rosenstock @ 2014-02-03 19:42 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/3/2014 6:05 AM, Alex Netes wrote:
> Calling cl_ntoh32(cl_ntoh32()) causes shadow parameters redefinition.
> The fix is to split the cl_ntoh32() calls.
> 
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] opensm: change discovery order of switch data
       [not found]     ` <1391425516-14462-2-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 19:42       ` Hal Rosenstock
  0 siblings, 0 replies; 11+ messages in thread
From: Hal Rosenstock @ 2014-02-03 19:42 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/3/2014 6:05 AM, Alex Netes wrote:
> Previously upon receiving GetResp(NodeInfo) of a switch, SM sent
> Get(SwitchInfo) and Get(PortInfo) to all its' ports in parallel. Upon receiving
> GetResp(PortInfo) SM sends Get(PkeyTable). The problem is that we need
> SwitchInfo.PartEnforceCap value to calculate max Pkeys block, so in case
> one of the GetResp(PortInfo) arrives prior to GetResp(SwitchInfo) this
> value won't be set.
> The fix to change the discover order. Upon receiving GetResP(NodeInfo),
> SM sends Get(SwitchInfo). Upon receiving GetResp(SwitchInfo), SM sends
> Get(PortInfo port0). If we don't get GetResp(PortInfo port=0), SM will
> drop the switch, otherwise SM sends Get(PortInfo ExtPorts).
> Moreover, now SM queries for ExtPortInfo and Pkeys only for non-Down
> ports.
> 
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] opensm: Better handle topology changes in the fabric
       [not found]     ` <1391425516-14462-3-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 19:42       ` Hal Rosenstock
  0 siblings, 0 replies; 11+ messages in thread
From: Hal Rosenstock @ 2014-02-03 19:42 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/3/2014 6:05 AM, Alex Netes wrote:
> The patch tries to solve the following problem:
> When newly discovered switch is rebooted during the configuration cycle,
> SM end-up setting all Initialized ports to Active, but the configuration
> on the switch such as Pkey tables, QoS, etc' might be incorrect.
> 
> The fix is solves this in two steps. First, turn need_update flag when
> switch's StateChange bit is detected ON or CA's neighbor switch has
> StateChange bit ON. Second, clear StateChange bit on the switches, before
> any configuration is done. This assures that we don't miss changes in
> the fabric. If a switch was rebooted during a sweep, we will detect it
> in a sequential sweep and configure all its' neighbors from scratch.
> 
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep
       [not found]     ` <1391425516-14462-4-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 19:42       ` Hal Rosenstock
  0 siblings, 0 replies; 11+ messages in thread
From: Hal Rosenstock @ 2014-02-03 19:42 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/3/2014 6:05 AM, Alex Netes wrote:
> SM may miss switch reboot because of a long sweep. In that case it won't
> reread pkey tables from Switch Port 0.
> 
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error
       [not found]     ` <1391425516-14462-5-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2014-02-03 19:43       ` Hal Rosenstock
  2014-02-04  5:54       ` Jens Domke
  1 sibling, 0 replies; 11+ messages in thread
From: Hal Rosenstock @ 2014-02-03 19:43 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 2/3/2014 6:05 AM, Alex Netes wrote:
> There are several MADs that we only SET during the sweep (and never
> GET).
> Zero the stored block, so in case the MAD will end up with error,
> we will resend it during the next sweep.
> 
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error
       [not found]     ` <1391425516-14462-5-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  2014-02-03 19:43       ` Hal Rosenstock
@ 2014-02-04  5:54       ` Jens Domke
  1 sibling, 0 replies; 11+ messages in thread
From: Jens Domke @ 2014-02-04  5:54 UTC (permalink / raw
  To: Alex Netes; +Cc: hal-VPRAkNaXOzVWk0Htik3J/w, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 2384 bytes --]

Dear Alex,

the memset call in sl2vl_update_table causes segmentation faults if 
force_update=1, since p_tbl won't get anything assigned and remains NULL.
Please, find a possible fix attached.

Regards,
Jens

On 03.02.14 20:05, Alex Netes wrote:
> There are several MADs that we only SET during the sweep (and never
> GET).
> Zero the stored block, so in case the MAD will end up with error,
> we will resend it during the next sweep.
>
> Signed-off-by: Alex Netes <alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>   opensm/osm_qos.c       |   13 +++++++++++++
>   opensm/osm_ucast_mgr.c |    7 +++++++
>   2 files changed, 20 insertions(+), 0 deletions(-)
>
> diff --git a/opensm/osm_qos.c b/opensm/osm_qos.c
> index a301803..473e3c8 100644
> --- a/opensm/osm_qos.c
> +++ b/opensm/osm_qos.c
> @@ -183,6 +183,13 @@ static ib_api_status_t vlarb_update_table_block(osm_sm_t * sm,
>   	if (!p_mad)
>   		return IB_INSUFFICIENT_MEMORY;
>
> +	/*
> +	 * Zero the stored VL Arbitration block, so in case the MAD will
> +	 * end up with error, we will resend it in the next sweep.
> +	 */
> +	memset(&p->vl_arb[block_num], 0,
> +	       block_length * sizeof(block.vl_entry[0]));
> +
>   	cl_qlist_insert_tail(mad_list, &p_mad->list_item);
>
>   	return IB_SUCCESS;
> @@ -272,6 +279,12 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p,
>   	if (!p_mad)
>   		return IB_INSUFFICIENT_MEMORY;
>
> +	/*
> +	 * Zero the stored SL2VL block, so in case the MAD will
> +	 * end up with error, we will resend it in the next sweep.
> +	 */
> +	memset(p_tbl, 0, sizeof(tbl));
> +
>   	cl_qlist_insert_tail(mad_list, &p_mad->list_item);
>   	return IB_SUCCESS;
>   }
> diff --git a/opensm/osm_ucast_mgr.c b/opensm/osm_ucast_mgr.c
> index 8194307..c8a7360 100644
> --- a/opensm/osm_ucast_mgr.c
> +++ b/opensm/osm_ucast_mgr.c
> @@ -1002,6 +1002,13 @@ static int set_lft_block(IN osm_switch_t *p_sw, IN osm_ucast_mgr_t *p_mgr,
>   		    IB_SMP_DATA_SIZE))
>   		return 0;
>
> +	/*
> +	 * Zero the stored LFT block, so in case the MAD will end up
> +	 * with error, we will resend it in the next sweep.
> +	 */
> +	memset(p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE, OSM_NO_PATH,
> +	       IB_SMP_DATA_SIZE);
> +
>   	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
>   		"Writing FT block %u to switch 0x%" PRIx64 "\n", block_id_ho,
>   		cl_ntoh64(context.lft_context.node_guid));
>

[-- Attachment #2: 0001-osm_qos.c-fix-potential-segmentation-fault.patch --]
[-- Type: text/plain, Size: 1234 bytes --]

>From 3cbe8f10c4ab7d83c5898b67e42d9e99be355c05 Mon Sep 17 00:00:00 2001
From: Jens Domke <domke.j.aa@m.titech.ac.jp>
Date: Tue, 4 Feb 2014 14:47:44 +0900
Subject: [PATCH 1/1] osm_qos.c: fix potential segmentation fault

if force_update=1, then p_tbl remains NULL and therefore memset
crashes

Signed-off-by: Jens Domke <domke.j.aa@m.titech.ac.jp>
---
 opensm/osm_qos.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/opensm/osm_qos.c b/opensm/osm_qos.c
index 473e3c8..76f0ff6 100644
--- a/opensm/osm_qos.c
+++ b/opensm/osm_qos.c
@@ -252,7 +252,7 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p,
 					  const ib_slvl_table_t * sl2vl_table,
 					  cl_qlist_t *mad_list)
 {
-	ib_slvl_table_t tbl, *p_tbl;
+	ib_slvl_table_t tbl, *p_tbl = NULL;
 	unsigned vl_mask;
 	uint8_t vl1, vl2;
 	int i;
@@ -283,7 +283,8 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p,
 	 * Zero the stored SL2VL block, so in case the MAD will
 	 * end up with error, we will resend it in the next sweep.
 	 */
-	memset(p_tbl, 0, sizeof(tbl));
+	if (p_tbl)
+		memset(p_tbl, 0, sizeof(tbl));
 
 	cl_qlist_insert_tail(mad_list, &p_mad->list_item);
 	return IB_SUCCESS;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-02-04  5:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-03 11:05 [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings Alex Netes
     [not found] ` <1391425516-14462-1-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 11:05   ` [PATCH 2/5] opensm: change discovery order of switch data Alex Netes
     [not found]     ` <1391425516-14462-2-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 19:42       ` Hal Rosenstock
2014-02-03 11:05   ` [PATCH 3/5] opensm: Better handle topology changes in the fabric Alex Netes
     [not found]     ` <1391425516-14462-3-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 19:42       ` Hal Rosenstock
2014-02-03 11:05   ` [PATCH 4/5] opensm/osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a sweep Alex Netes
     [not found]     ` <1391425516-14462-4-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 19:42       ` Hal Rosenstock
2014-02-03 11:05   ` [PATCH 5/5] opensm: Resend LFTs/VLArb/SL2VL MADs in case of error Alex Netes
     [not found]     ` <1391425516-14462-5-git-send-email-alexne-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-02-03 19:43       ` Hal Rosenstock
2014-02-04  5:54       ` Jens Domke
2014-02-03 19:42   ` [PATCH 1/5] opensm/include/iba/ib_types.h: Fix shadow declaration warnings Hal Rosenstock

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.