Last active
July 28, 2024 04:17
-
-
Save love4taylor/111d56cd2b1dc149cba6d80f617f47b1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 4c04d48064e064ffc5dbba0078564257389ceaaf Mon Sep 17 00:00:00 2001 | |
From: love4taylor <[email protected]> | |
Date: Sun, 28 Jul 2024 13:13:00 +0900 | |
Subject: [PATCH] net-tcp_brutal: make it as a built-in kernel module | |
upstream: https://github.com/apernet/tcp-brutal | |
--- | |
net/ipv4/Kconfig | 9 ++ | |
net/ipv4/Makefile | 1 + | |
net/ipv4/tcp_brutal.c | 316 ++++++++++++++++++++++++++++++++++++++++++ | |
3 files changed, 326 insertions(+) | |
create mode 100644 net/ipv4/tcp_brutal.c | |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig | |
index 8e94ed7c5..c58e7de8b 100644 | |
--- a/net/ipv4/Kconfig | |
+++ b/net/ipv4/Kconfig | |
@@ -678,6 +678,15 @@ config TCP_CONG_BBR | |
AQM schemes that do not provide a delay signal. It requires the fq | |
("Fair Queue") pacing packet scheduler. | |
+config TCP_CONG_BRUTAL | |
+ tristate "TCP Brutal" | |
+ default n | |
+ help | |
+ | |
+ TCP Brutal is Hysteria's congestion control algorithm ported to TCP, as a | |
+ Linux kernel module. Information about Brutal itself can be found in the | |
+ Hysteria documentation. | |
+ | |
choice | |
prompt "Default TCP congestion control" | |
default DEFAULT_CUBIC | |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile | |
index ec36d2ec0..3ecda04e1 100644 | |
--- a/net/ipv4/Makefile | |
+++ b/net/ipv4/Makefile | |
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o | |
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | |
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o | |
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o | |
+obj-$(CONFIG_TCP_CONG_BRUTAL) += tcp_brutal.o | |
obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o | |
obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o | |
obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o | |
diff --git a/net/ipv4/tcp_brutal.c b/net/ipv4/tcp_brutal.c | |
new file mode 100644 | |
index 000000000..070538551 | |
--- /dev/null | |
+++ b/net/ipv4/tcp_brutal.c | |
@@ -0,0 +1,316 @@ | |
+#include <linux/module.h> | |
+#include <linux/version.h> | |
+#include <net/tcp.h> | |
+#include <linux/math64.h> | |
+ | |
+#if IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) | |
+#include <net/transp_v6.h> | |
+#else | |
+#warning IPv6 support is disabled. Brutal will only work with IPv4. \ | |
+ Please ensure you have enabled CONFIG_IPV6 in your kernel config \ | |
+ and your kernel version is greater than 5.8. | |
+#endif | |
+ | |
+#define INIT_PACING_RATE 125000 // 1 Mbps | |
+#define INIT_CWND_GAIN 20 | |
+ | |
+#define MIN_PACING_RATE 62500 // 500 Kbps | |
+#define MIN_CWND_GAIN 5 | |
+#define MAX_CWND_GAIN 80 | |
+#define MIN_CWND 4 | |
+ | |
+#ifndef ICSK_CA_PRIV_SIZE | |
+#error "ICSK_CA_PRIV_SIZE not defined" | |
+#else | |
+// This is the size of the private data area in struct inet_connection_sock | |
+// The size varies between Linux versions | |
+// We use it to calculate the number of slots in the packet info array | |
+#define RAW_PKT_INFO_SLOTS ((ICSK_CA_PRIV_SIZE - 2 * sizeof(u64)) / sizeof(struct brutal_pkt_info)) | |
+#define PKT_INFO_SLOTS (RAW_PKT_INFO_SLOTS < 3 ? 3 : (RAW_PKT_INFO_SLOTS > 5 ? 5 : RAW_PKT_INFO_SLOTS)) | |
+#endif | |
+ | |
+#define MIN_PKT_INFO_SAMPLES 50 | |
+#define MIN_ACK_RATE_PERCENT 80 | |
+ | |
+#define TCP_BRUTAL_PARAMS 23301 | |
+ | |
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) | |
+static u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return div_u64(tp->tcp_mstamp, USEC_PER_SEC); | |
+} | |
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) | |
+// see https://github.com/torvalds/linux/commit/9a568de4818dea9a05af141046bd3e589245ab83 | |
+static u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return div_u64(tp->tcp_mstamp.stamp_us, USEC_PER_SEC); | |
+} | |
+#else | |
+#include <linux/jiffies.h> | |
+static u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return div_u64(jiffies_to_usecs(tcp_time_stamp), USEC_PER_SEC); | |
+} | |
+#endif | |
+ | |
+struct brutal_pkt_info | |
+{ | |
+ u64 sec; | |
+ u32 acked; | |
+ u32 losses; | |
+}; | |
+ | |
+struct brutal | |
+{ | |
+ u64 rate; | |
+ u32 cwnd_gain; | |
+ | |
+ struct brutal_pkt_info slots[PKT_INFO_SLOTS]; | |
+}; | |
+ | |
+struct brutal_params | |
+{ | |
+ u64 rate; // Send rate in bytes per second | |
+ u32 cwnd_gain; // CWND gain in tenths (10=1.0) | |
+} __packed; | |
+ | |
+static struct proto tcp_prot_override __ro_after_init; | |
+#ifdef _TRANSP_V6_H | |
+static struct proto tcpv6_prot_override __ro_after_init; | |
+#endif // _TRANSP_V6_H | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_set_params(struct sock *sk, sockptr_t optval, unsigned int optlen) | |
+#else | |
+static int brutal_set_params(struct sock *sk, char __user *optval, unsigned int optlen) | |
+#endif | |
+{ | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ struct brutal_params params; | |
+ | |
+ if (optlen < sizeof(params)) | |
+ return -EINVAL; | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+ if (copy_from_sockptr(¶ms, optval, sizeof(params))) | |
+ return -EFAULT; | |
+#else | |
+ if (copy_from_user(¶ms, optval, sizeof(params))) | |
+ return -EFAULT; | |
+#endif | |
+ | |
+ // Sanity checks | |
+ if (params.rate < MIN_PACING_RATE) | |
+ return -EINVAL; | |
+ if (params.cwnd_gain < MIN_CWND_GAIN || params.cwnd_gain > MAX_CWND_GAIN) | |
+ return -EINVAL; | |
+ | |
+ brutal->rate = params.rate; | |
+ brutal->cwnd_gain = params.cwnd_gain; | |
+ | |
+ return 0; | |
+} | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) | |
+#else | |
+static int brutal_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) | |
+#endif | |
+{ | |
+ if (level == IPPROTO_TCP && optname == TCP_BRUTAL_PARAMS) | |
+ return brutal_set_params(sk, optval, optlen); | |
+ else | |
+ return tcp_prot.setsockopt(sk, level, optname, optval, optlen); | |
+} | |
+ | |
+#ifdef _TRANSP_V6_H | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_tcpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) | |
+#else // _LINUX_SOCKPTR_H | |
+static int brutal_tcpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) | |
+#endif // _LINUX_SOCKPTR_H | |
+{ | |
+ if (level == IPPROTO_TCP && optname == TCP_BRUTAL_PARAMS) | |
+ return brutal_set_params(sk, optval, optlen); | |
+ else | |
+ return tcpv6_prot.setsockopt(sk, level, optname, optval, optlen); | |
+} | |
+#endif // _TRANSP_V6_H | |
+ | |
+static void brutal_init(struct sock *sk) | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ if (sk->sk_family == AF_INET) | |
+ sk->sk_prot = &tcp_prot_override; | |
+#ifdef _TRANSP_V6_H | |
+ else if (sk->sk_family == AF_INET6) | |
+ sk->sk_prot = &tcpv6_prot_override; | |
+#endif // _TRANSP_V6_H | |
+ else | |
+ BUG(); // WTF? | |
+ | |
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | |
+ | |
+ brutal->rate = INIT_PACING_RATE; | |
+ brutal->cwnd_gain = INIT_CWND_GAIN; | |
+ | |
+ memset(brutal->slots, 0, sizeof(brutal->slots)); | |
+ | |
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) | |
+ // Pacing is REQUIRED for Brutal to work, but Linux only has internal pacing after 4.13. | |
+ // For kernels prior to 4.13, you MUST add fq pacing manually (e.g. "tc qdisc add dev eth0 root fq pacing") | |
+ // or rate control will be broken. | |
+ // See https://github.com/torvalds/linux/commit/218af599fa635b107cfe10acf3249c4dfe5e4123 for details. | |
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); | |
+#endif | |
+} | |
+ | |
+// Copied from tcp.h for compatibility reasons | |
+static inline u32 brutal_tcp_snd_cwnd(const struct tcp_sock *tp) | |
+{ | |
+ return tp->snd_cwnd; | |
+} | |
+ | |
+// Copied from tcp.h for compatibility reasons | |
+static inline void brutal_tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) | |
+{ | |
+ WARN_ON_ONCE((int)val <= 0); | |
+ tp->snd_cwnd = val; | |
+} | |
+ | |
+static void brutal_update_rate(struct sock *sk) | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ u64 sec = tcp_sock_get_sec(tp); | |
+ u64 min_sec = sec - PKT_INFO_SLOTS; | |
+ u32 acked = 0, losses = 0; | |
+ u32 ack_rate; // Scaled by 100 (100=1.00) as kernel doesn't support float | |
+ u64 rate = brutal->rate; | |
+ u32 cwnd; | |
+ | |
+ u32 mss = tp->mss_cache; | |
+ u32 rtt_ms = (tp->srtt_us >> 3) / USEC_PER_MSEC; | |
+ if (!rtt_ms) | |
+ rtt_ms = 1; | |
+ | |
+ for (int i = 0; i < PKT_INFO_SLOTS; i++) | |
+ { | |
+ if (brutal->slots[i].sec >= min_sec) | |
+ { | |
+ acked += brutal->slots[i].acked; | |
+ losses += brutal->slots[i].losses; | |
+ } | |
+ } | |
+ if (acked + losses < MIN_PKT_INFO_SAMPLES) | |
+ ack_rate = 100; | |
+ else | |
+ { | |
+ ack_rate = acked * 100 / (acked + losses); | |
+ if (ack_rate < MIN_ACK_RATE_PERCENT) | |
+ ack_rate = MIN_ACK_RATE_PERCENT; | |
+ } | |
+ | |
+ rate *= 100; | |
+ rate = div_u64(rate, ack_rate); | |
+ | |
+ // The order here is chosen carefully to avoid overflow as much as possible | |
+ cwnd = div_u64(rate, MSEC_PER_SEC); | |
+ cwnd *= rtt_ms; | |
+ cwnd /= mss; | |
+ cwnd *= brutal->cwnd_gain; | |
+ cwnd /= 10; | |
+ cwnd = max_t(u32, cwnd, MIN_CWND); | |
+ | |
+ brutal_tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); | |
+ | |
+ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate))); | |
+} | |
+ | |
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 10, 0) | |
+static void brutal_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) | |
+#else | |
+static void brutal_main(struct sock *sk, const struct rate_sample *rs) | |
+#endif | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ u64 sec; | |
+ u32 slot; | |
+ | |
+ // Ignore invalid rate samples | |
+ if (rs->delivered < 0 || rs->interval_us <= 0) | |
+ return; | |
+ | |
+ sec = tcp_sock_get_sec(tp); | |
+ div_u64_rem(sec, PKT_INFO_SLOTS, &slot); | |
+ | |
+ if (brutal->slots[slot].sec == sec) | |
+ { | |
+ // Current slot, update | |
+ brutal->slots[slot].acked += rs->acked_sacked; | |
+ brutal->slots[slot].losses += rs->losses; | |
+ } | |
+ else | |
+ { | |
+ // Uninitialized slot or slot expired | |
+ brutal->slots[slot].sec = sec; | |
+ brutal->slots[slot].acked = rs->acked_sacked; | |
+ brutal->slots[slot].losses = rs->losses; | |
+ } | |
+ | |
+ brutal_update_rate(sk); | |
+} | |
+ | |
+static u32 brutal_undo_cwnd(struct sock *sk) | |
+{ | |
+ return brutal_tcp_snd_cwnd(tcp_sk(sk)); | |
+} | |
+ | |
+static u32 brutal_ssthresh(struct sock *sk) | |
+{ | |
+ return tcp_sk(sk)->snd_ssthresh; | |
+} | |
+ | |
+static struct tcp_congestion_ops tcp_brutal_ops = { | |
+ .flags = TCP_CONG_NON_RESTRICTED, | |
+ .name = "brutal", | |
+ .owner = THIS_MODULE, | |
+ .init = brutal_init, | |
+ .cong_control = brutal_main, | |
+ .undo_cwnd = brutal_undo_cwnd, | |
+ .ssthresh = brutal_ssthresh, | |
+}; | |
+ | |
+static int __init brutal_register(void) | |
+{ | |
+ BUILD_BUG_ON(sizeof(struct brutal) > ICSK_CA_PRIV_SIZE); | |
+ BUILD_BUG_ON(PKT_INFO_SLOTS < 1); | |
+ | |
+ tcp_prot_override = tcp_prot; | |
+ tcp_prot_override.setsockopt = brutal_tcp_setsockopt; | |
+ | |
+#ifdef _TRANSP_V6_H | |
+ tcpv6_prot_override = tcpv6_prot; | |
+ tcpv6_prot_override.setsockopt = brutal_tcpv6_setsockopt; | |
+#endif // _TRANSP_V6_H | |
+ | |
+ return tcp_register_congestion_control(&tcp_brutal_ops); | |
+} | |
+ | |
+static void __exit brutal_unregister(void) | |
+{ | |
+ tcp_unregister_congestion_control(&tcp_brutal_ops); | |
+} | |
+ | |
+module_init(brutal_register); | |
+module_exit(brutal_unregister); | |
+ | |
+MODULE_AUTHOR("Aperture Internet Laboratory"); | |
+MODULE_LICENSE("GPL"); | |
+MODULE_DESCRIPTION("TCP Brutal"); | |
+MODULE_VERSION("1.0.2"); | |
-- | |
2.45.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment