All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v3 0/3] tcp: make trace of reset logic complete
@ 2024-03-29  3:42 Jason Xing
  2024-03-29  3:42 ` [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Jason Xing
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Jason Xing @ 2024-03-29  3:42 UTC (permalink / raw)
  To: edumazet, mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni,
	davem
  Cc: netdev, linux-trace-kernel, kerneljasonxing, Jason Xing

From: Jason Xing <kernelxing@tencent.com>

Before this, we miss some cases where the TCP layer could send RST but
we cannot trace it. So I decided to complete it :)

v3
1. fix a format problem in patch [3/3]

v2
1. fix spelling mistakes

Jason Xing (3):
  trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters
  trace: tcp: fully support trace_tcp_send_reset
  tcp: add location into reset trace process

 include/trace/events/tcp.h | 67 ++++++++++++++++++++++++++++++--------
 net/ipv4/tcp_ipv4.c        |  4 +--
 net/ipv4/tcp_output.c      |  2 +-
 net/ipv6/tcp_ipv6.c        |  3 +-
 4 files changed, 59 insertions(+), 17 deletions(-)

-- 
2.37.3


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters
  2024-03-29  3:42 [PATCH net-next v3 0/3] tcp: make trace of reset logic complete Jason Xing
@ 2024-03-29  3:42 ` Jason Xing
  2024-03-29  8:49   ` Eric Dumazet
  2024-03-29  3:42 ` [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset Jason Xing
  2024-03-29  3:42 ` [PATCH net-next v3 3/3] tcp: add location into reset trace process Jason Xing
  2 siblings, 1 reply; 10+ messages in thread
From: Jason Xing @ 2024-03-29  3:42 UTC (permalink / raw)
  To: edumazet, mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni,
	davem
  Cc: netdev, linux-trace-kernel, kerneljasonxing, Jason Xing

From: Jason Xing <kernelxing@tencent.com>

Introducing entry_saddr and entry_daddr parameters in this macro
for later use can help us record the reverse 4-tuple by analyzing
the 4-tuple of the incoming skb when receiving.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/trace/events/tcp.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 3c08a0846c47..194425f69642 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -273,15 +273,15 @@ TRACE_EVENT(tcp_probe,
 		  __entry->skbaddr, __entry->skaddr)
 );
 
-#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb)			\
+#define TP_STORE_ADDR_PORTS_SKB_V4(skb, entry_saddr, entry_daddr)	\
 	do {								\
 		const struct tcphdr *th = (const struct tcphdr *)skb->data; \
-		struct sockaddr_in *v4 = (void *)__entry->saddr;	\
+		struct sockaddr_in *v4 = (void *)entry_saddr;		\
 									\
 		v4->sin_family = AF_INET;				\
 		v4->sin_port = th->source;				\
 		v4->sin_addr.s_addr = ip_hdr(skb)->saddr;		\
-		v4 = (void *)__entry->daddr;				\
+		v4 = (void *)entry_daddr;				\
 		v4->sin_family = AF_INET;				\
 		v4->sin_port = th->dest;				\
 		v4->sin_addr.s_addr = ip_hdr(skb)->daddr;		\
@@ -289,29 +289,30 @@ TRACE_EVENT(tcp_probe,
 
 #if IS_ENABLED(CONFIG_IPV6)
 
-#define TP_STORE_ADDR_PORTS_SKB(__entry, skb)				\
+#define TP_STORE_ADDR_PORTS_SKB(skb, entry_saddr, entry_daddr)		\
 	do {								\
 		const struct iphdr *iph = ip_hdr(skb);			\
 									\
 		if (iph->version == 6) {				\
 			const struct tcphdr *th = (const struct tcphdr *)skb->data; \
-			struct sockaddr_in6 *v6 = (void *)__entry->saddr; \
+			struct sockaddr_in6 *v6 = (void *)entry_saddr;	\
 									\
 			v6->sin6_family = AF_INET6;			\
 			v6->sin6_port = th->source;			\
 			v6->sin6_addr = ipv6_hdr(skb)->saddr;		\
-			v6 = (void *)__entry->daddr;			\
+			v6 = (void *)entry_daddr;			\
 			v6->sin6_family = AF_INET6;			\
 			v6->sin6_port = th->dest;			\
 			v6->sin6_addr = ipv6_hdr(skb)->daddr;		\
 		} else							\
-			TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb);	\
+			TP_STORE_ADDR_PORTS_SKB_V4(skb, entry_saddr,	\
+						   entry_daddr); \
 	} while (0)
 
 #else
 
-#define TP_STORE_ADDR_PORTS_SKB(__entry, skb)		\
-	TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb)
+#define TP_STORE_ADDR_PORTS_SKB(skb, entry_saddr, entry_daddr)		\
+	TP_STORE_ADDR_PORTS_SKB_V4(skb, entry_saddr, entry_daddr)
 
 #endif
 
@@ -336,7 +337,7 @@ DECLARE_EVENT_CLASS(tcp_event_skb,
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
 
-		TP_STORE_ADDR_PORTS_SKB(__entry, skb);
+		TP_STORE_ADDR_PORTS_SKB(skb, __entry->saddr, __entry->daddr);
 	),
 
 	TP_printk("skbaddr=%p src=%pISpc dest=%pISpc",
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset
  2024-03-29  3:42 [PATCH net-next v3 0/3] tcp: make trace of reset logic complete Jason Xing
  2024-03-29  3:42 ` [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Jason Xing
@ 2024-03-29  3:42 ` Jason Xing
  2024-03-29  9:07   ` Eric Dumazet
  2024-03-29  3:42 ` [PATCH net-next v3 3/3] tcp: add location into reset trace process Jason Xing
  2 siblings, 1 reply; 10+ messages in thread
From: Jason Xing @ 2024-03-29  3:42 UTC (permalink / raw)
  To: edumazet, mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni,
	davem
  Cc: netdev, linux-trace-kernel, kerneljasonxing, Jason Xing

From: Jason Xing <kernelxing@tencent.com>

Prior to this patch, what we can see by enabling trace_tcp_send is
only happening under two circumstances:
1) active rst mode
2) non-active rst mode and based on the full socket

That means the inconsistency occurs if we use tcpdump and trace
simultaneously to see how rst happens.

It's necessary that we should take into other cases into considerations,
say:
1) time-wait socket
2) no socket
...

By parsing the incoming skb and reversing its 4-tuple can
we know the exact 'flow' which might not exist.

Samples after applied this patch:
1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
state=TCP_ESTABLISHED
2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
state=UNKNOWN
Note:
1) UNKNOWN means we cannot extract the right information from skb.
2) skbaddr/skaddr could be 0

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/trace/events/tcp.h | 39 ++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_ipv4.c        |  4 ++--
 net/ipv6/tcp_ipv6.c        |  3 ++-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 194425f69642..289438c54227 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
  * skb of trace_tcp_send_reset is the skb that caused RST. In case of
  * active reset, skb should be NULL
  */
-DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
+TRACE_EVENT(tcp_send_reset,
 
 	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
 
-	TP_ARGS(sk, skb)
+	TP_ARGS(sk, skb),
+
+	TP_STRUCT__entry(
+		__field(const void *, skbaddr)
+		__field(const void *, skaddr)
+		__field(int, state)
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->skaddr = sk;
+		/* Zero means unknown state. */
+		__entry->state = sk ? sk->sk_state : 0;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+		if (sk && sk_fullsock(sk)) {
+			const struct inet_sock *inet = inet_sk(sk);
+
+			TP_STORE_ADDR_PORTS(__entry, inet, sk);
+		} else {
+			/*
+			 * We should reverse the 4-tuple of skb, so later
+			 * it can print the right flow direction of rst.
+			 */
+			TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, entry->saddr);
+		}
+	),
+
+	TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
+		  __entry->skbaddr, __entry->skaddr,
+		  __entry->saddr, __entry->daddr,
+		  __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
 );
 
 /*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a22ee5838751..d5c4a969c066 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	 */
 	if (sk) {
 		arg.bound_dev_if = sk->sk_bound_dev_if;
-		if (sk_fullsock(sk))
-			trace_tcp_send_reset(sk, skb);
 	}
 
+	trace_tcp_send_reset(sk, skb);
+
 	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3f4cba49e9ee..8e9c59b6c00c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 	if (sk) {
 		oif = sk->sk_bound_dev_if;
 		if (sk_fullsock(sk)) {
-			trace_tcp_send_reset(sk, skb);
 			if (inet6_test_bit(REPFLOW, sk))
 				label = ip6_flowlabel(ipv6h);
 			priority = READ_ONCE(sk->sk_priority);
@@ -1129,6 +1128,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 			label = ip6_flowlabel(ipv6h);
 	}
 
+	trace_tcp_send_reset(sk, skb);
+
 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
 			     ipv6_get_dsfield(ipv6h), label, priority, txhash,
 			     &key);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next v3 3/3] tcp: add location into reset trace process
  2024-03-29  3:42 [PATCH net-next v3 0/3] tcp: make trace of reset logic complete Jason Xing
  2024-03-29  3:42 ` [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Jason Xing
  2024-03-29  3:42 ` [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset Jason Xing
@ 2024-03-29  3:42 ` Jason Xing
  2024-03-29  9:13   ` Eric Dumazet
  2 siblings, 1 reply; 10+ messages in thread
From: Jason Xing @ 2024-03-29  3:42 UTC (permalink / raw)
  To: edumazet, mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni,
	davem
  Cc: netdev, linux-trace-kernel, kerneljasonxing, Jason Xing

From: Jason Xing <kernelxing@tencent.com>

In addition to knowing the 4-tuple of the flow which generates RST,
the reason why it does so is very important because we have some
cases where the RST should be sent and have no clue which one
exactly.

Adding location of reset process can help us more, like what
trace_kfree_skb does.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/trace/events/tcp.h | 13 +++++++++----
 net/ipv4/tcp_ipv4.c        |  2 +-
 net/ipv4/tcp_output.c      |  2 +-
 net/ipv6/tcp_ipv6.c        |  2 +-
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 289438c54227..7a6dc525bfc7 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -80,13 +80,16 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
  */
 TRACE_EVENT(tcp_send_reset,
 
-	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_PROTO(const struct sock *sk,
+		 const struct sk_buff *skb,
+		 void *location),
 
-	TP_ARGS(sk, skb),
+	TP_ARGS(sk, skb, location),
 
 	TP_STRUCT__entry(
 		__field(const void *, skbaddr)
 		__field(const void *, skaddr)
+		__field(void *, location)
 		__field(int, state)
 		__array(__u8, saddr, sizeof(struct sockaddr_in6))
 		__array(__u8, daddr, sizeof(struct sockaddr_in6))
@@ -112,12 +115,14 @@ TRACE_EVENT(tcp_send_reset,
 			 */
 			TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, entry->saddr);
 		}
+		__entry->location = location;
 	),
 
-	TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
+	TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s location=%pS",
 		  __entry->skbaddr, __entry->skaddr,
 		  __entry->saddr, __entry->daddr,
-		  __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
+		  __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN",
+		  __entry->location)
 );
 
 /*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d5c4a969c066..fec54cfc4fb3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -870,7 +870,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 		arg.bound_dev_if = sk->sk_bound_dev_if;
 	}
 
-	trace_tcp_send_reset(sk, skb);
+	trace_tcp_send_reset(sk, skb,  __builtin_return_address(0));
 
 	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3167ad96567..fb613582817e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3608,7 +3608,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
 	 * skb here is different to the troublesome skb, so use NULL
 	 */
-	trace_tcp_send_reset(sk, NULL);
+	trace_tcp_send_reset(sk, NULL,  __builtin_return_address(0));
 }
 
 /* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8e9c59b6c00c..7eba9c3d69f1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1128,7 +1128,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 			label = ip6_flowlabel(ipv6h);
 	}
 
-	trace_tcp_send_reset(sk, skb);
+	trace_tcp_send_reset(sk, skb,  __builtin_return_address(0));
 
 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
 			     ipv6_get_dsfield(ipv6h), label, priority, txhash,
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters
  2024-03-29  3:42 ` [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Jason Xing
@ 2024-03-29  8:49   ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2024-03-29  8:49 UTC (permalink / raw)
  To: Jason Xing
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> From: Jason Xing <kernelxing@tencent.com>
>
> Introducing entry_saddr and entry_daddr parameters in this macro
> for later use can help us record the reverse 4-tuple by analyzing
> the 4-tuple of the incoming skb when receiving.
>
> Signed-off-by: Jason Xing <kernelxing@tencent.com>

Reviewed-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset
  2024-03-29  3:42 ` [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset Jason Xing
@ 2024-03-29  9:07   ` Eric Dumazet
  2024-03-29 10:22     ` Jason Xing
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-03-29  9:07 UTC (permalink / raw)
  To: Jason Xing
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> From: Jason Xing <kernelxing@tencent.com>
>
> Prior to this patch, what we can see by enabling trace_tcp_send is
> only happening under two circumstances:
> 1) active rst mode
> 2) non-active rst mode and based on the full socket
>
> That means the inconsistency occurs if we use tcpdump and trace
> simultaneously to see how rst happens.
>
> It's necessary that we should take into other cases into considerations,
> say:
> 1) time-wait socket
> 2) no socket
> ...
>
> By parsing the incoming skb and reversing its 4-tuple can
> we know the exact 'flow' which might not exist.
>
> Samples after applied this patch:
> 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> state=TCP_ESTABLISHED
> 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> state=UNKNOWN
> Note:
> 1) UNKNOWN means we cannot extract the right information from skb.
> 2) skbaddr/skaddr could be 0
>
> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> ---
>  include/trace/events/tcp.h | 39 ++++++++++++++++++++++++++++++++++++--
>  net/ipv4/tcp_ipv4.c        |  4 ++--
>  net/ipv6/tcp_ipv6.c        |  3 ++-
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 194425f69642..289438c54227 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
>   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
>   * active reset, skb should be NULL
>   */
> -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> +TRACE_EVENT(tcp_send_reset,
>
>         TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
>
> -       TP_ARGS(sk, skb)
> +       TP_ARGS(sk, skb),
> +
> +       TP_STRUCT__entry(
> +               __field(const void *, skbaddr)
> +               __field(const void *, skaddr)
> +               __field(int, state)
> +               __array(__u8, saddr, sizeof(struct sockaddr_in6))
> +               __array(__u8, daddr, sizeof(struct sockaddr_in6))
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->skbaddr = skb;
> +               __entry->skaddr = sk;
> +               /* Zero means unknown state. */
> +               __entry->state = sk ? sk->sk_state : 0;
> +
> +               memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> +               memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> +
> +               if (sk && sk_fullsock(sk)) {
> +                       const struct inet_sock *inet = inet_sk(sk);
> +
> +                       TP_STORE_ADDR_PORTS(__entry, inet, sk);
> +               } else {

To be on the safe side, I would test if (skb) here.
We have one caller with skb == NULL, we might have more in the future.

> +                       /*
> +                        * We should reverse the 4-tuple of skb, so later
> +                        * it can print the right flow direction of rst.
> +                        */
> +                       TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, entry->saddr);
> +               }
> +       ),
> +
> +       TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> +                 __entry->skbaddr, __entry->skaddr,
> +                 __entry->saddr, __entry->daddr,
> +                 __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
>  );
>
>  /*
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index a22ee5838751..d5c4a969c066 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>          */
>         if (sk) {
>                 arg.bound_dev_if = sk->sk_bound_dev_if;
> -               if (sk_fullsock(sk))
> -                       trace_tcp_send_reset(sk, skb);
>         }

Remove the { } ?


>
> +       trace_tcp_send_reset(sk, skb);
> +
>         BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
>                      offsetof(struct inet_timewait_sock, tw_bound_dev_if));
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 3f4cba49e9ee..8e9c59b6c00c 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>         if (sk) {
>                 oif = sk->sk_bound_dev_if;
>                 if (sk_fullsock(sk)) {
> -                       trace_tcp_send_reset(sk, skb);
>                         if (inet6_test_bit(REPFLOW, sk))
>                                 label = ip6_flowlabel(ipv6h);
>                         priority = READ_ONCE(sk->sk_priority);
> @@ -1129,6 +1128,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>                         label = ip6_flowlabel(ipv6h);
>         }
>
> +       trace_tcp_send_reset(sk, skb);
> +
>         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
>                              ipv6_get_dsfield(ipv6h), label, priority, txhash,
>                              &key);
> --
> 2.37.3
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 3/3] tcp: add location into reset trace process
  2024-03-29  3:42 ` [PATCH net-next v3 3/3] tcp: add location into reset trace process Jason Xing
@ 2024-03-29  9:13   ` Eric Dumazet
  2024-03-29 10:40     ` Jason Xing
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2024-03-29  9:13 UTC (permalink / raw)
  To: Jason Xing
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> From: Jason Xing <kernelxing@tencent.com>
>
> In addition to knowing the 4-tuple of the flow which generates RST,
> the reason why it does so is very important because we have some
> cases where the RST should be sent and have no clue which one
> exactly.
>
> Adding location of reset process can help us more, like what
> trace_kfree_skb does.

Well, I would prefer a drop_reason here, even if there is no 'dropped' packet.

This would be more stable than something based on function names that
could be changed.

tracepoints do not have to get ugly, we can easily get stack traces if needed.

perf record -a -g  -e tcp:tcp_send_reset ...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset
  2024-03-29  9:07   ` Eric Dumazet
@ 2024-03-29 10:22     ` Jason Xing
  2024-03-29 10:43       ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Jason Xing @ 2024-03-29 10:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 5:07 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
> >
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > Prior to this patch, what we can see by enabling trace_tcp_send is
> > only happening under two circumstances:
> > 1) active rst mode
> > 2) non-active rst mode and based on the full socket
> >
> > That means the inconsistency occurs if we use tcpdump and trace
> > simultaneously to see how rst happens.
> >
> > It's necessary that we should take into other cases into considerations,
> > say:
> > 1) time-wait socket
> > 2) no socket
> > ...
> >
> > By parsing the incoming skb and reversing its 4-tuple can
> > we know the exact 'flow' which might not exist.
> >
> > Samples after applied this patch:
> > 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> > state=TCP_ESTABLISHED
> > 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> > state=UNKNOWN
> > Note:
> > 1) UNKNOWN means we cannot extract the right information from skb.
> > 2) skbaddr/skaddr could be 0
> >
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > ---
> >  include/trace/events/tcp.h | 39 ++++++++++++++++++++++++++++++++++++--
> >  net/ipv4/tcp_ipv4.c        |  4 ++--
> >  net/ipv6/tcp_ipv6.c        |  3 ++-
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> > index 194425f69642..289438c54227 100644
> > --- a/include/trace/events/tcp.h
> > +++ b/include/trace/events/tcp.h
> > @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
> >   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
> >   * active reset, skb should be NULL
> >   */
> > -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> > +TRACE_EVENT(tcp_send_reset,
> >
> >         TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
> >
> > -       TP_ARGS(sk, skb)
> > +       TP_ARGS(sk, skb),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(const void *, skbaddr)
> > +               __field(const void *, skaddr)
> > +               __field(int, state)
> > +               __array(__u8, saddr, sizeof(struct sockaddr_in6))
> > +               __array(__u8, daddr, sizeof(struct sockaddr_in6))
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->skbaddr = skb;
> > +               __entry->skaddr = sk;
> > +               /* Zero means unknown state. */
> > +               __entry->state = sk ? sk->sk_state : 0;
> > +
> > +               memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> > +               memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> > +
> > +               if (sk && sk_fullsock(sk)) {
> > +                       const struct inet_sock *inet = inet_sk(sk);
> > +
> > +                       TP_STORE_ADDR_PORTS(__entry, inet, sk);
> > +               } else {
>
> To be on the safe side, I would test if (skb) here.
> We have one caller with skb == NULL, we might have more in the future.

Thanks for the review.

How about changing '} else {' to '} else if (skb) {', then if we go
into this else-if branch, we will print nothing, right? I'll test it
in this case.

>
> > +                       /*
> > +                        * We should reverse the 4-tuple of skb, so later
> > +                        * it can print the right flow direction of rst.
> > +                        */
> > +                       TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, entry->saddr);
> > +               }
> > +       ),
> > +
> > +       TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> > +                 __entry->skbaddr, __entry->skaddr,
> > +                 __entry->saddr, __entry->daddr,
> > +                 __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
> >  );
> >
> >  /*
> > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > index a22ee5838751..d5c4a969c066 100644
> > --- a/net/ipv4/tcp_ipv4.c
> > +++ b/net/ipv4/tcp_ipv4.c
> > @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
> >          */
> >         if (sk) {
> >                 arg.bound_dev_if = sk->sk_bound_dev_if;
> > -               if (sk_fullsock(sk))
> > -                       trace_tcp_send_reset(sk, skb);
> >         }
>
> Remove the { } ?

Yes, I forgot to remove them.

Thanks,
Jason

>
>
> >
> > +       trace_tcp_send_reset(sk, skb);
> > +
> >         BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
> >                      offsetof(struct inet_timewait_sock, tw_bound_dev_if));
> >
> > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> > index 3f4cba49e9ee..8e9c59b6c00c 100644
> > --- a/net/ipv6/tcp_ipv6.c
> > +++ b/net/ipv6/tcp_ipv6.c
> > @@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
> >         if (sk) {
> >                 oif = sk->sk_bound_dev_if;
> >                 if (sk_fullsock(sk)) {
> > -                       trace_tcp_send_reset(sk, skb);
> >                         if (inet6_test_bit(REPFLOW, sk))
> >                                 label = ip6_flowlabel(ipv6h);
> >                         priority = READ_ONCE(sk->sk_priority);
> > @@ -1129,6 +1128,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
> >                         label = ip6_flowlabel(ipv6h);
> >         }
> >
> > +       trace_tcp_send_reset(sk, skb);
> > +
> >         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
> >                              ipv6_get_dsfield(ipv6h), label, priority, txhash,
> >                              &key);
> > --
> > 2.37.3
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 3/3] tcp: add location into reset trace process
  2024-03-29  9:13   ` Eric Dumazet
@ 2024-03-29 10:40     ` Jason Xing
  0 siblings, 0 replies; 10+ messages in thread
From: Jason Xing @ 2024-03-29 10:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 5:13 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
> >
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > In addition to knowing the 4-tuple of the flow which generates RST,
> > the reason why it does so is very important because we have some
> > cases where the RST should be sent and have no clue which one
> > exactly.
> >
> > Adding location of reset process can help us more, like what
> > trace_kfree_skb does.
>
> Well, I would prefer a drop_reason here, even if there is no 'dropped' packet.

Good idea really. Then we can accurately diagnose which kind of reason
exactly causes the RST behavior.

I'm not sure if we can reuse the drop_reason here, like adding/using
some reasons in enum skb_drop_reason {}? The name is a little bit
strange.

Oh, I can just print the string of reason directly instead of really
using enum skb_drop_reason {}...

>
> This would be more stable than something based on function names that
> could be changed.
>
> tracepoints do not have to get ugly, we can easily get stack traces if needed.
>
> perf record -a -g  -e tcp:tcp_send_reset ...

Ah, yes, I blindly mimic what trace_skb_kfree() and
trace_consume_skb() do. Introducing some RST reasons is more
reasonable and easier to detect since it's not hard to add four or
five reasons only.

Thanks,
Jason

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset
  2024-03-29 10:22     ` Jason Xing
@ 2024-03-29 10:43       ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2024-03-29 10:43 UTC (permalink / raw)
  To: Jason Xing
  Cc: mhiramat, mathieu.desnoyers, rostedt, kuba, pabeni, davem, netdev,
	linux-trace-kernel, Jason Xing

On Fri, Mar 29, 2024 at 11:23 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> On Fri, Mar 29, 2024 at 5:07 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Fri, Mar 29, 2024 at 4:43 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
> > >
> > > From: Jason Xing <kernelxing@tencent.com>
> > >
> > > Prior to this patch, what we can see by enabling trace_tcp_send is
> > > only happening under two circumstances:
> > > 1) active rst mode
> > > 2) non-active rst mode and based on the full socket
> > >
> > > That means the inconsistency occurs if we use tcpdump and trace
> > > simultaneously to see how rst happens.
> > >
> > > It's necessary that we should take into other cases into considerations,
> > > say:
> > > 1) time-wait socket
> > > 2) no socket
> > > ...
> > >
> > > By parsing the incoming skb and reversing its 4-tuple can
> > > we know the exact 'flow' which might not exist.
> > >
> > > Samples after applied this patch:
> > > 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> > > state=TCP_ESTABLISHED
> > > 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> > > state=UNKNOWN
> > > Note:
> > > 1) UNKNOWN means we cannot extract the right information from skb.
> > > 2) skbaddr/skaddr could be 0
> > >
> > > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > > ---
> > >  include/trace/events/tcp.h | 39 ++++++++++++++++++++++++++++++++++++--
> > >  net/ipv4/tcp_ipv4.c        |  4 ++--
> > >  net/ipv6/tcp_ipv6.c        |  3 ++-
> > >  3 files changed, 41 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> > > index 194425f69642..289438c54227 100644
> > > --- a/include/trace/events/tcp.h
> > > +++ b/include/trace/events/tcp.h
> > > @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
> > >   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
> > >   * active reset, skb should be NULL
> > >   */
> > > -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> > > +TRACE_EVENT(tcp_send_reset,
> > >
> > >         TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
> > >
> > > -       TP_ARGS(sk, skb)
> > > +       TP_ARGS(sk, skb),
> > > +
> > > +       TP_STRUCT__entry(
> > > +               __field(const void *, skbaddr)
> > > +               __field(const void *, skaddr)
> > > +               __field(int, state)
> > > +               __array(__u8, saddr, sizeof(struct sockaddr_in6))
> > > +               __array(__u8, daddr, sizeof(struct sockaddr_in6))
> > > +       ),
> > > +
> > > +       TP_fast_assign(
> > > +               __entry->skbaddr = skb;
> > > +               __entry->skaddr = sk;
> > > +               /* Zero means unknown state. */
> > > +               __entry->state = sk ? sk->sk_state : 0;
> > > +
> > > +               memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> > > +               memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> > > +
> > > +               if (sk && sk_fullsock(sk)) {
> > > +                       const struct inet_sock *inet = inet_sk(sk);
> > > +
> > > +                       TP_STORE_ADDR_PORTS(__entry, inet, sk);
> > > +               } else {
> >
> > To be on the safe side, I would test if (skb) here.
> > We have one caller with skb == NULL, we might have more in the future.
>
> Thanks for the review.
>
> How about changing '} else {' to '} else if (skb) {', then if we go
> into this else-if branch, we will print nothing, right? I'll test it
> in this case.

Right, the fields are cleared before this else

+               memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+               memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));

>
> >
> > > +                       /*
> > > +                        * We should reverse the 4-tuple of skb, so later
> > > +                        * it can print the right flow direction of rst.
> > > +                        */
> > > +                       TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, entry->saddr);
> > > +               }
> > > +       ),
> > > +
> > > +       TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> > > +                 __entry->skbaddr, __entry->skaddr,
> > > +                 __entry->saddr, __entry->daddr,
> > > +                 __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
> > >  );
> > >
> > >  /*
> > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > > index a22ee5838751..d5c4a969c066 100644
> > > --- a/net/ipv4/tcp_ipv4.c
> > > +++ b/net/ipv4/tcp_ipv4.c
> > > @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
> > >          */
> > >         if (sk) {
> > >                 arg.bound_dev_if = sk->sk_bound_dev_if;
> > > -               if (sk_fullsock(sk))
> > > -                       trace_tcp_send_reset(sk, skb);
> > >         }
> >
> > Remove the { } ?
>
> Yes, I forgot to remove them.

No problem.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-03-29 10:43 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-29  3:42 [PATCH net-next v3 0/3] tcp: make trace of reset logic complete Jason Xing
2024-03-29  3:42 ` [PATCH net-next v3 1/3] trace: adjust TP_STORE_ADDR_PORTS_SKB() parameters Jason Xing
2024-03-29  8:49   ` Eric Dumazet
2024-03-29  3:42 ` [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset Jason Xing
2024-03-29  9:07   ` Eric Dumazet
2024-03-29 10:22     ` Jason Xing
2024-03-29 10:43       ` Eric Dumazet
2024-03-29  3:42 ` [PATCH net-next v3 3/3] tcp: add location into reset trace process Jason Xing
2024-03-29  9:13   ` Eric Dumazet
2024-03-29 10:40     ` Jason Xing

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.