| From: |
| Stephen Hemminger <shemminger@osdl.org> |
| To: |
| "David S. Miller" <davem@davemloft.net> |
| Subject: |
| [PATCH] TSO and cwnd |
| Date: |
| Wed, 2 Nov 2005 11:35:46 -0800 |
| Cc: |
| netdev@vger.kernel.org |
Resend of earlier patch wit better explanation.
TCP peformance with TSO over networks with delay is awful.
On a 100Mbit link with 150ms delay, we get 4Mbits/sec with TSO and
50Mbits/sec without TSO. The root cause is because TSO intentionally
does not keep the window full, therefore the congestion window doesn't
get updated enough.
We can update the congestion window in response to each ACK
according to RFC2581. The downside of doing this, is that if an application
starts off sending slowly; it can build up an artificially large
congestion window.
The following patch looks bigger than it really is. It just
eliminates the in_flight < max_packets test in each congestion
handler; and since in_flight is no longer needed, it can be removed
from the API.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Index: tcp-2.6/include/net/tcp.h
===================================================================
--- tcp-2.6.orig/include/net/tcp.h
+++ tcp-2.6/include/net/tcp.h
@@ -678,8 +678,7 @@ struct tcp_congestion_ops {
/* lower bound for congestion window (optional) */
u32 (*min_cwnd)(struct sock *sk);
/* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, u32 ack,
- u32 rtt, u32 in_flight, int good_ack);
+ void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, int good_ack);
/* round trip time sample per acked packet (optional) */
void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */
@@ -708,8 +707,7 @@ extern int tcp_set_congestion_control(st
extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct sock *sk);
-extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
- u32 rtt, u32 in_flight, int flag);
+extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int flag);
extern u32 tcp_reno_min_cwnd(struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;
Index: tcp-2.6/net/ipv4/tcp_bic.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_bic.c
+++ tcp-2.6/net/ipv4/tcp_bic.c
@@ -209,17 +209,13 @@ static inline void bictcp_low_utilizatio
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int data_acked)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, int data_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
bictcp_low_utilization(sk, data_acked);
- if (in_flight < tp->snd_cwnd)
- return;
-
if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* In "safe" area, increase. */
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
Index: tcp-2.6/net/ipv4/tcp_cong.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_cong.c
+++ tcp-2.6/net/ipv4/tcp_cong.c
@@ -181,14 +181,10 @@ int tcp_set_congestion_control(struct so
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
- int flag)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (in_flight < tp->snd_cwnd)
- return;
-
if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* In "safe" area, increase. */
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
Index: tcp-2.6/net/ipv4/tcp_highspeed.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_highspeed.c
+++ tcp-2.6/net/ipv4/tcp_highspeed.c
@@ -110,15 +110,11 @@ static void hstcp_init(struct sock *sk)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
-static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
- u32 in_flight, int good)
+static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, int good)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
- if (in_flight < tp->snd_cwnd)
- return;
-
if (tp->snd_cwnd <= tp->snd_ssthresh) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
Index: tcp-2.6/net/ipv4/tcp_htcp.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_htcp.c
+++ tcp-2.6/net/ipv4/tcp_htcp.c
@@ -201,15 +201,11 @@ static u32 htcp_recalc_ssthresh(struct s
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int data_acked)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int data_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- if (in_flight < tp->snd_cwnd)
- return;
-
if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* In "safe" area, increase. */
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
Index: tcp-2.6/net/ipv4/tcp_hybla.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_hybla.c
+++ tcp-2.6/net/ipv4/tcp_hybla.c
@@ -86,8 +86,7 @@ static inline u32 hybla_fraction(u32 odd
* o Give cwnd a new value based on the model proposed
* o remember increments <1
*/
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int flag)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
@@ -101,10 +100,7 @@ static void hybla_cong_avoid(struct sock
}
if (!ca->hybla_en)
- return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
-
- if (in_flight < tp->snd_cwnd)
- return;
+ return tcp_reno_cong_avoid(sk, ack, rtt, flag);
if (ca->rho == 0)
hybla_recalc_param(sk);
Index: tcp-2.6/net/ipv4/tcp_input.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_input.c
+++ tcp-2.6/net/ipv4/tcp_input.c
@@ -1977,11 +1977,10 @@ static inline void tcp_ack_update_rtt(st
tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
}
-static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int good)
+static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int good)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, good);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -2297,7 +2296,6 @@ static int tcp_ack(struct sock *sk, stru
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- u32 prior_in_flight;
s32 seq_rtt;
s32 seq_usrtt = 0;
int prior_packets;
@@ -2349,8 +2347,6 @@ static int tcp_ack(struct sock *sk, stru
if (!prior_packets)
goto no_queue;
- prior_in_flight = tcp_packets_in_flight(tp);
-
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
@@ -2361,11 +2357,11 @@ static int tcp_ack(struct sock *sk, stru
if (tcp_ack_is_dubious(sk, flag)) {
/* Advanve CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
+ tcp_cong_avoid(sk, ack, seq_rtt, 0);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
if ((flag & FLAG_DATA_ACKED))
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
+ tcp_cong_avoid(sk, ack, seq_rtt, 1);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
Index: tcp-2.6/net/ipv4/tcp_scalable.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_scalable.c
+++ tcp-2.6/net/ipv4/tcp_scalable.c
@@ -16,12 +16,9 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int flag)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (in_flight < tp->snd_cwnd)
- return;
if (tp->snd_cwnd <= tp->snd_ssthresh) {
tp->snd_cwnd++;
Index: tcp-2.6/net/ipv4/tcp_vegas.c
===================================================================
--- tcp-2.6.orig/net/ipv4/tcp_vegas.c
+++ tcp-2.6/net/ipv4/tcp_vegas.c
@@ -162,14 +162,13 @@ static void tcp_vegas_cwnd_event(struct
tcp_vegas_init(sk);
}
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int flag)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now)
- return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
+ return tcp_reno_cong_avoid(sk, ack, seq_rtt, flag);
/* The key players are v_beg_snd_una and v_beg_snd_nxt.
*
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html