Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
ea42bd8
Update kernel.yml
minuscat Aug 8, 2023
2ca19e9
RTT_ref window version
minuscat Aug 9, 2023
61b56d7
RTT_ref window version
minuscat Aug 9, 2023
07c9c18
Update kernel.yml
minuscat Aug 11, 2023
5f9d6d0
Update kernel.yml
minuscat Aug 11, 2023
4708fab
Merge pull request #20 from minuscat/testing
minuscat Sep 14, 2023
13ad496
Revert experiment code and fix typo in workflow
minuscat Sep 14, 2023
a8d9adf
Update tcp_ecn in ip-sysctl.rst
bbriscoe Sep 14, 2023
dbab85e
Update tcp_ecn in ip-sysctl.rst
bbriscoe Sep 14, 2023
9d02d31
Merge branch 'l4steam' of https://github.com/minuscat/L4STeam_linux i…
minuscat Sep 14, 2023
68908bf
Remove experiment code and update workflow
minuscat Sep 15, 2023
6a56c9c
Update ratebased control code
minuscat Sep 18, 2023
0c6e07b
Revert ratebased control code and update workflow
minuscat Sep 18, 2023
8b07043
Commit ratebased control code
minuscat Sep 18, 2023
60b07a2
Update the control part of ratebase code
minuscat Sep 22, 2023
eab30d0
Fix one typo
minuscat Sep 22, 2023
8f15b59
Use dst_mtu to replace mtu_cookie
minuscat Sep 23, 2023
b748e18
Update with mtu_cache
minuscat Sep 24, 2023
fbd66a9
Revert some changes
minuscat Oct 23, 2023
1452e0e
Revert some changes
minuscat Oct 23, 2023
1a97483
Fix issues for rate-base and add mode switching criterion
minuscat Dec 5, 2023
b55c9fa
Modify to align AccECN draft
minuscat Jan 3, 2024
b06ab7c
Modify how tcp_ecn_option_beacon works
minuscat Jan 19, 2024
ae3f31d
Modification for ACCECN draft
minuscat Jan 19, 2024
bb25bcf
Update kernel.yml
minuscat Jan 19, 2024
12a84bc
Update tcp_minisocks.c
minuscat Jan 19, 2024
230e7ae
Modification for ACCECN draft
minuscat Jan 19, 2024
472b228
Revert "Modification for ACCECN draft"
minuscat Jan 20, 2024
2fc01fe
Modification for ACCECN draft
minuscat Jan 20, 2024
04b6c98
Modification for ACCECN draft
minuscat Jan 20, 2024
a449deb
Modification for ACCECN draft
minuscat Jan 21, 2024
0e900e5
Modification for ACCECN draft
minuscat Jan 21, 2024
24da58c
Fix typo in tcp.c
minuscat Jan 21, 2024
7a6a4ee
Update retx counter for synack
minuscat Jan 21, 2024
3941f6d
Add is_rtx flag
minuscat Jan 21, 2024
3ff07b9
Fix missing SYN retx in AccECN draft
minuscat Jan 22, 2024
15a3b7c
Update syn_ack_rcv for challenge ack
minuscat Jan 22, 2024
08367ec
Update syn_ect_rcv for retx SYN
minuscat Jan 23, 2024
b9fd5d0
Modification for ACCECN draft
minuscat Jan 23, 2024
a974c3b
Modify update_cwr and clean duplicate code
minuscat Jan 23, 2024
3734354
Parametrize the mode
minuscat Jan 23, 2024
8780b89
Include the case of 3.2.3.2.2. for packet loss carrying AccECN option
minuscat Jan 25, 2024
821d92c
Fix typo
minuscat Jan 25, 2024
08b1f99
Fix typo and comments
minuscat Jan 30, 2024
4538174
Fix typo
minuscat Jan 30, 2024
bde09fe
Fix typo
minuscat Jan 30, 2024
5d809aa
Add ACE check on first data ACK
minuscat Feb 3, 2024
ebae93f
Add ACE check on first data ACK
minuscat Feb 5, 2024
623cd47
Update kernel.yml
minuscat Feb 7, 2024
64441b4
Merge pull request #26 from minuscat/AccECN-2024
minuscat Feb 7, 2024
804cadc
Merge pull request #27 from L4STeam/ratebase
koen0607 Mar 4, 2024
4579ffb
Merge pull request #28 from L4STeam/AccECN-2023
koen0607 Mar 5, 2024
da73e6f
Merge pull request #30 from L4STeam/testing
minuscat May 16, 2024
dc111c5
Update for fractional congestion window
minuscat May 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions .github/workflows/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ jobs:
path: debian_build

release:
name: Release build artifacts for the testing branch
name: Release build artifacts for the branch
runs-on: ubuntu-20.04
needs: build
permissions: write-all
if: github.ref == 'refs/heads/testing'
if: ${{ github.ref == 'refs/heads/testing' || github.ref == 'refs/heads/ratebase' || github.ref == 'refs/heads/AccECN-2023'}}
steps:
- name: Get artifact
uses: actions/download-artifact@v3
Expand All @@ -74,15 +74,19 @@ jobs:
wget https://github.com/L4STeam/iproute2/releases/download/master-build/iproute2-l4s.zip
unzip iproute2-l4s
mv -t . iproute2-l4s/*.deb
- name: Extract branch name
shell: bash
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
id: extract_branch
- name: Zip artifacts
run: |
mkdir debian_build
mv *.deb debian_build
zip -r l4s-testing.zip debian_build
zip -r l4s-${{ steps.extract_branch.outputs.branch }}.zip debian_build
- name: Release tip build
uses: pyTooling/Actions/releaser@main
with:
token: ${{ secrets.GITHUB_TOKEN }}
tag: testing-build
tag: ${{ steps.extract_branch.outputs.branch }}-build
files: |
l4s-testing.zip
l4s-${{ steps.extract_branch.outputs.branch }}.zip
39 changes: 23 additions & 16 deletions Documentation/networking/ip-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -387,27 +387,28 @@ tcp_early_retrans - INTEGER

tcp_ecn - INTEGER
Control use of Explicit Congestion Notification (ECN) by TCP.
ECN is used only when both ends of the TCP connection indicate
support for it. This feature is useful in avoiding losses due
to congestion by allowing supporting routers to signal
congestion before having to drop packets. The highest variant
of ECN feedback that both peers support is chosen by the ECN
negotiation (Accurate ECN, ECN, or no ECN).
ECN is used only when both ends of the TCP connection indicate support
for it. This feature is useful in avoiding losses due to congestion by
allowing supporting routers to signal congestion before having to drop
packets. A host that supports ECN both sends ECN at the IP layer and
feeds back ECN at the TCP layer. The highest variant of ECN feedback
that both peers support is chosen by the ECN negotiation (Accurate ECN,
ECN, or no ECN).

The highest negotiated variant for incoming connection requests
and the highest variant requested by outgoing connection
attempts:

= ==================== ====================
Incoming connections Outgoing connections
= ==================== ====================
0 No ECN No ECN
1 ECN ECN
2 ECN No ECN
3 AccECN AccECN
4 AccECN ECN
5 AccECN No ECN
= ==================== ====================
===== ==================== ====================
Value Incoming connections Outgoing connections
===== ==================== ====================
0 No ECN No ECN
1 ECN ECN
2 ECN No ECN
3 AccECN AccECN
4 AccECN ECN
5 AccECN No ECN
===== ==================== ====================

Default: 2

Expand All @@ -430,6 +431,12 @@ tcp_ecn_option - INTEGER

Default: 2

tcp_ecn_option_beacon - INTEGER
Control Accurate ECN (AccECN) option sending frequency per RTT and it
takes effect only when tcp_ecn_option is set to 2.

Default: 1 (AccECN will be send at least 1 time per RTT)

tcp_ecn_fallback - BOOLEAN
If the kernel detects that ECN connection misbehaves, enable fall
back to non-ECN. Currently, this knob implements the fallback
Expand Down
4 changes: 4 additions & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ struct tcp_sock {
u32 snd_wnd; /* The window we expect to receive */
u32 max_window; /* Maximal window ever seen from peer */
u32 mss_cache; /* Cached effective mss, not including SACKS */
bool mss_cache_set_by_ca;

u32 window_clamp; /* Maximal window to advertise */
u32 rcv_ssthresh; /* Current window clamp */
Expand All @@ -233,6 +234,9 @@ struct tcp_sock {
syn_ect_snt:2, /* AccECN ECT memory, only */
syn_ect_rcv:2, /* ... needed durign 3WHS + first seqno */
ecn_fail:1; /* ECN reflector detected path mangling */
u8 accecn_no_respond:1, /* AccECN no response on feedback */
accecn_no_options:1, /* AccECN no options send out */
first_data_ack:1; /* Check for first data ack */
u8 saw_accecn_opt:2, /* An AccECN option was seen */
fast_ack_mode:2, /* which fast ack mode ? */
unused:4;
Expand Down
4 changes: 3 additions & 1 deletion include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct request_sock {
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
u8 num_timeout:7; /* number of timeouts */
u8 num_timeout:7,
is_rtx:1; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
Expand Down Expand Up @@ -105,6 +106,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->num_timeout = 0;
req->is_rtx = 0;
req->num_retrans = 0;
req->sk = NULL;
refcount_set(&req->rsk_refcnt, 0);
Expand Down
1 change: 0 additions & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \
TCPOLEN_ACCECN_PERFIELD * \
TCP_ACCECN_NUMFIELDS)
#define TCP_ACCECN_BEACON_FREQ_SHIFT 2 /* Send option at least 2^2 times per RTT */
#define TCP_ACCECN_SAFETY_SHIFT 1 /* SAFETY_FACTOR in accecn draft */

/* tp->saw_accecn_opt states */
Expand Down
3 changes: 1 addition & 2 deletions include/uapi/linux/inet_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,10 @@ struct tcp_bbr_info {

struct tcp_prague_info {
__u64 prague_alpha;
__u64 prague_ai_ack_increase;
__u64 prague_frac_cwnd;
__u64 prague_rate_bytes;
__u32 prague_max_burst;
__u32 prague_round;
__u32 prague_rtt_indep;
__u32 prague_rtt_target;
bool prague_enabled;
};
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ static void syn_ack_recalc(struct request_sock *req,

int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
{
req->is_rtx = 1;
int err = req->rsk_ops->rtx_syn_ack(parent, req);

if (!err)
Expand Down
4 changes: 4 additions & 0 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
tp->mss_cache_set_by_ca = false;

tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
Expand Down Expand Up @@ -3033,6 +3034,9 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->delivered_ce = 0;
tp->saw_accecn_opt = 0;
tp->ecn_fail = 0;
tp->accecn_no_respond = 0;
tp->accecn_no_options = 0;
tp->first_data_ack = 0;
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
Expand Down
63 changes: 57 additions & 6 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,11 +444,31 @@ static void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
break;
case 0x1:
case 0x5:
if (tcp_ca_no_fallback_rfc3168(sk))
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
else if (tcp_ecn_mode_pending(tp))
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
/* [CY] 3.1.2. Backward Compatibility - If a TCP Client has sent a SYN requesting AccECN feedback with (AE,CWR,ECE) =
* (1,1,1) then receives a SYN/ACK with the currently reserved combination (AE,CWR,ECE) = (1,0,1) but it does not
* have logic specific to such a combination, the Client MUST enable AccECN mode as if the SYN/ACK confirmed that the
* Server supported AccECN and as if it fed back that the IP-ECN field on the SYN had arrived unchanged.
*/
case 0x5:
if (tcp_ecn_mode_pending(tp)) {
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
tp->saw_accecn_opt = tcp_accecn_option_init(skb,
tp->rx_opt.accecn);
tp->accecn_opt_demand = 2;
}
if (INET_ECN_is_ce(ip_dsfield)) {
tp->received_ce++;
tp->received_ce_pending++;
}
}
break;
default:
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
Expand Down Expand Up @@ -575,7 +595,7 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp,
bool order1, res;
unsigned int i;

if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL)
if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL || tp->accecn_no_respond)
return false;

if (!(flag & FLAG_SLOWPATH) || !tp->rx_opt.accecn) {
Expand Down Expand Up @@ -683,6 +703,22 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
if (flag & FLAG_SYN_ACKED)
return 0;

/* [CY] 3.2.2.4. Testing for Zeroing of the ACE Field - If AccECN has been successfully negotiated, the Data Sender
* MAY check the value of the ACE counter in the first feedback packet (with or without data) that arrives after the
* 3-way handshake. If the value of this ACE field is found to be zero (0b000), for the remainder of the half-
* connection the Data Sender ought to send non-ECN-capable packets and it is advised not to respond to any feedback
* of CE markings.
*/
if (!tp->first_data_ack) {
tp->first_data_ack = 1;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
tp->ecn_fail = 1;
INET_ECN_dontxmit(sk);
tp->accecn_no_respond = 1;
return 0;
}
}

if (tp->received_ce_pending >= TCP_ACCECN_ACE_MAX_DELTA)
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;

Expand Down Expand Up @@ -4873,8 +4909,18 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
* DSACK state and change the txhash to re-route speculatively.
*/
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
sk_rethink_txhash(sk))
sk_rethink_txhash(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If a middlebox is dropping
* packets with options it does not recognize, a host that is sending little or no data but mostly pure
* ACKs will not inherently detect such losses. Such a host MAY detect loss of ACKs carrying the AccECN
* Option by detecting whether the acknowledged data always reappears as a retransmission. In such cases,
* the host SHOULD disable the sending of the AccECN Option for this half-connection.
*/
if (tcp_ecn_mode_accecn(tcp_sk(sk)))
tcp_sk(sk)->accecn_no_options = 1;

}
}

static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
Expand Down Expand Up @@ -6215,6 +6261,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (th->syn) {
if (tcp_ecn_mode_accecn(tp)) {
send_accecn_reflector = true;
/* [CY] 3.1.5. Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.”
*/
tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
tp->saw_accecn_opt = tcp_accecn_option_init(skb,
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,7 @@ static int __net_init tcp_sk_init(struct net *net)

net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_option = 2;
net->ipv4.sysctl_tcp_ecn_option_beacon = 1;
net->ipv4.sysctl_tcp_ecn_option_beacon = 3;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_ecn_unsafe_cep = 0;

Expand Down
30 changes: 28 additions & 2 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,13 @@ void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,

switch (ace) {
case 0x0:
/* [CY] 3.2.2.1. ACE Field on the ACK of the SYN/ACK - If the Server is in AccECN mode and in SYN-RCVD
* state, and if it receives a value of zero on a pure ACK with SYN=0 and no SACK blocks, for the rest
* of the connection the Server MUST NOT set ECT on outgoing packets and MUST NOT respond to AccECN
* feedback. Nonetheless, as a Data Receiver it MUST NOT disable AccECN feedback.
*/
tp->ecn_fail = 1;
tp->accecn_no_respond = 1;
break;
case 0x7:
case 0x5:
Expand All @@ -432,6 +438,10 @@ static void tcp_ecn_openreq_child(struct sock *sk,
const struct tcp_request_sock *treq = tcp_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);

/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
*/
if (treq->accecn_ok) {
const struct tcphdr *th = (const struct tcphdr *)skb->data;
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
Expand Down Expand Up @@ -694,9 +704,24 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*/
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
&tcp_rsk(req)->last_oow_ack_time) &&
&tcp_rsk(req)->last_oow_ack_time)) {

if (tcp_rsk(req)->accecn_ok) {
/* [CY] 3.1.5 Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.
*/
tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake
*/
tcp_sk(sk)->ecn_fail = 1;
}
}

!inet_rtx_syn_ack(sk, req)) {
if (!inet_rtx_syn_ack(sk, req)) {
unsigned long expires = jiffies;

expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
Expand All @@ -705,6 +730,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
mod_timer_pending(&req->rsk_timer, expires);
else
req->rsk_timer.expires = expires;
}
}
return NULL;
}
Expand Down
Loading