xdp-tutorial packet03-redirectingããã£ã¦ã¿ã
åæ
alias t='sudo ../testenv/testenv.sh'
Assignment1
ICMP echoãµã¼ãã¼ãå®è£ ããã¨ãããã®ã
ãã¹ãç°å¢æ§ç¯
t setup --name test --legacy-ip
ã³ã¼ãä¿®æ£
ã¾ãã¯ä»¥ä¸ãå®æ½ããã
swap_src_dst_mac()
ã®å®è£swap_src_dst_ipv6()
ã®å®è£swap_src_dst_ipv4()
ã®å®è£
--- a/packet03-redirecting/xdp_prog_kern.c +++ b/packet03-redirecting/xdp_prog_kern.c @@ -34,16 +34,28 @@ static __always_inline void swap_src_dst_mac(struct ethhdr *eth) { /* Assignment 1: swap source and destination addresses in the eth. * For simplicity you can use the memcpy macro defined above */ + unsigned char tmp[ETH_ALEN]; + memcpy(tmp, eth->h_dest, sizeof(eth->h_dest)); + memcpy(eth->h_dest, eth->h_source, sizeof(eth->h_dest)); + memcpy(eth->h_source, tmp, sizeof(eth->h_dest)); } static __always_inline void swap_src_dst_ipv6(struct ipv6hdr *ipv6) { /* Assignment 1: swap source and destination addresses in the iphv6dr */ + struct in6_addr tmp; + memcpy(&tmp, &ipv6->daddr, sizeof(struct in6_addr)); + memcpy(&ipv6->daddr, &ipv6->saddr, sizeof(struct in6_addr)); + memcpy(&ipv6->saddr, &tmp, sizeof(struct in6_addr)); } static __always_inline void swap_src_dst_ipv4(struct iphdr *iphdr) { /* Assignment 1: swap source and destination addresses in the iphdr */ + __be32 tmp; + memcpy(&tmp, &iphdr->daddr, sizeof(iphdr->daddr)); + memcpy(&iphdr->daddr, &iphdr->saddr, sizeof(iphdr->daddr)); + memcpy(&iphdr->saddr, &tmp, sizeof(iphdr->daddr)); }
ç¶ãã¦ä»¥ä¸ãå®è£ ããã
- icmp typeãã£ã¼ã«ãã®å¤æ´
- icmp header checksumã®æ´æ°
@@ -105,6 +117,16 @@ int xdp_icmp_echo_func(struct xdp_md *ctx) /* Assignment 1: patch the packet and update the checksum. You can use * the echo_reply variable defined above to fix the ICMP Type field. */ + struct icmphdr_common before = *icmphdr; + icmphdr->type = echo_reply; + __u32 size = sizeof(struct icmphdr); + if ((void *)icmphdr + 32 > data_end) { + goto out; + } + __u32 sum = bpf_csum_diff((__be32 *)&before, size, (__be32 *)icmphdr, size, 0); + sum = (sum & 0xffff) + (sum >> 16); + __u16 new_checksum = ~sum; + icmphdr->cksum = new_checksum; bpf_printk("echo_reply: %d", echo_reply);
checksumã®è¨ç®ã¯bpf_csum_diff()
ã使ãããã ãããã ã¨32bitã®ãã§ãã¯ãµã ã«ãªãã®ã§ãããã16bitãã§ãã¯ãµã ã«ç¸®ããå¿
è¦ãããã
ããã«ã¤ãã¦ã¯RFC1071ã®ãµã³ãã«ã³ã¼ãã«å¾ã£ã¦ããã°ããã
TEST
XDPããã°ã©ã ããã¼ãããã
sudo ./xdp-loader unload test --all t exec -n test -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o t load -n test -- --prog-name xdp_icmp_echo_func xdp_prog_kern.o
pingã§icmp replyãè¿ããæ¤è¨¼ããã
$sudo ../testenv/testenv.sh enter --name veth-packet03 $ping 10.11.1.1 -c 3
icmp replyãã±ãããXDPããã°ã©ã ã§ä½ããããã®ã§ãããã¨ã確èªããããã«bpf_printk()
ã®åºåã確èªããã
$cat /sys/kernel/debug/tracing/trace_pipe ping-5561 [001] ..s21 47419.968928: bpf_trace_printk: echo_reply: 0 ping-5561 [001] ..s21 47420.982531: bpf_trace_printk: echo_reply: 0 ping-5561 [001] ..s21 47422.006479: bpf_trace_printk: echo_reply: 0
Assignment2
ãã¹ãç°å¢æ§ç¯
以ä¸ã®ãããªå½¢ãä½ã£ã¦ãããã
t setup --name left t setup --name right
ã³ã¼ãä¿®æ£
å®å
MACãæ¸ãæãã¦ãegressã®ifindexãæå®ãã¦bpf_redirect()
ãå¼ã³åºãã ãã
--- a/packet03-redirecting/xdp_prog_kern.c +++ b/packet03-redirecting/xdp_prog_kern.c @@ -146,8 +146,8 @@ int xdp_redirect_func(struct xdp_md *ctx) struct ethhdr *eth; int eth_type; int action = XDP_PASS; - /* unsigned char dst[ETH_ALEN] = {} */ /* Assignment 2: fill in with the MAC address of the left inner interface */ - /* unsigned ifindex = 0; */ /* Assignment 2: fill in with the ifindex of the left interface */ + unsigned char dst[ETH_ALEN] = {0x56, 0x78, 0x59, 0x76, 0xf0, 0x33}; + unsigned int ifindex = 5; /* These keep track of the next header type and iterator pointer */ nh.pos = data; @@ -157,8 +157,8 @@ int xdp_redirect_func(struct xdp_md *ctx) if (eth_type == -1) goto out; - /* Assignment 2: set a proper destination address and call the - * bpf_redirect() with proper parameters, action = bpf_redirect(...) */ + memcpy(eth->h_dest, dst, ETH_ALEN); + action = bpf_redirect(ifindex, 0); out: return xdp_stats_record_action(ctx, action);
TEST
XDPããã°ã©ã ãrightã¤ã³ã¿ã¼ãã§ã¼ã¹ã«ãã¼ããã¦ããã
t load -n right -- --prog-name xdp_redirect_func xdp_prog_kern.o
right namespaceã®veth0ããleft namespaceã®veth0ã¸pingãå®è¡ãã¦ããã
t enter --name right # ping -6 fc00:dead:cafe:1::2
以ä¸ã確èªãã¦ããã°ããã
bpf_redirect()
ã®è¿ãå¤ãXDP_REDIRECTã«ãªã£ã¦ããã- left network namespaceã®veth0ã«å°éãããã±ããã®source MACãright network namespaceã®veth0ã«ãªã£ã¦ããã
bpf_redirect()ã®è¿ãå¤ã«ã¤ãã¦ã¯åé¡æã«ããéãxdp_statsã§ç¢ºèªãã¦ãã£ã¦ãããããbpf_printkã§ããªã³ããããã°ãã¦ãç°¡åã«ç¢ºèªã§ããã
ãã±ããã«ã¤ãã¦ã¯ä»¥ä¸ã®ç»åã®ããã«sorce macãright network namespaceã®veth0ã®ãã®ã«ãªã£ã¦ãããã¨ã確èªã§ããã
å·®åã®ããã«XDPããã°ã©ã ããã¼ãããªãç¶æ ãã±ããã以ä¸ã«è¼ãã¦ãããå½ããåã ããsource macã¯host namespaceã®leftã®ãã®ã«ãªã£ã¦ããã
Assignment3
Assignment2ã§ã¯interface numberã¨MACããã¼ãã³ã¼ããããããããããªãããã«ãã¦ããã
ã¨ãã£ã¦ããããã¯ã«ã¼ãã«ã®æ
å ±ãåç
§ããããã«ããã¿ãããªè©±ã§ã¯ãªãã¦ï¼ããã¯Assignment4ã§ããï¼ãebpf mapããå¤ãåç
§ããããã«ããã¨ãããã®ã
ãã¹ãç°å¢æ§ç¯
æ§æã¯Assignment2ã¨åã
t setup --name left t setup --name right
ã³ã¼ãä¿®æ£
userlandå´ã®ä¿®æ£ã
--- a/packet03-redirecting/xdp_prog_user.c +++ b/packet03-redirecting/xdp_prog_user.c @@ -27,6 +27,7 @@ static const char *__doc__ = "XDP redirect helper\n" #include "../common/xdp_stats_kern_user.h" static const struct option_wrapper long_options[] = { {{"help", no_argument, NULL, 'h' }, @@ -52,9 +53,13 @@ static const struct option_wrapper long_options[] = { static int parse_mac(char *str, unsigned char mac[ETH_ALEN]) { - /* Assignment 3: parse a MAC address in this function and place the - * result in the mac array */ - + int i, j; + char tmp[3]; + tmp[2] = '\0'; + for (i = 0, j = 0; i < strlen(str); i+=3, j++) { + memcpy(tmp, str + i, 2); + mac[j] = strtoul(tmp ,NULL, 16); + } return 0; } @@ -126,6 +131,10 @@ int main(int argc, char **argv) /* Assignment 3: open the tx_port map corresponding to the cfg.ifname interface */ map_fd = -1; + map_fd = open_bpf_map_file(pin_dir, "tx_port", NULL); + if (map_fd < 0) { + return EXIT_FAIL_BPF; + } printf("map dir: %s\n", pin_dir); @@ -137,6 +146,10 @@ int main(int argc, char **argv) /* Assignment 3: open the redirect_params map corresponding to the cfg.ifname interface */ map_fd = -1; + map_fd = open_bpf_map_file(pin_dir, "redirect_params", NULL); + if (map_fd < 0) { + return EXIT_FAIL_BPF; + } /* Setup the mapping containing MAC addresses */ if (write_iface_params(map_fd, src, dest) < 0) {
open_bpf_map_file()
ã¯basic04ã§ãåºã¦ããpinningãããbpf_mapãã¡ã¤ã«ãopenããé¢æ°ã
TEST
t load -n left -- --prog-name xdp_redirect_map_func xdp_prog_kern.o t load -n right -- --prog-name xdp_redirect_map_func xdp_prog_kern.o t exec -n left -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o t exec -n right -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o t redirect right left
../testenv/testenv.sh redirect
ã§å®è¡ãããå
容ã¯ä»¥ä¸ã®éãã
local src="$1" local dest="$2" local src_mac=$(ip netns exec $src cat /sys/class/net/veth0/address) local dest_mac=$(ip netns exec $dest cat /sys/class/net/veth0/address) # set bidirectional forwarding ./xdp_prog_user -d $src -r $dest --src-mac $src_mac --dest-mac $dest_mac ./xdp_prog_user -d $dest -r $src --src-mac $dest_mac --dest-mac $src_mac
ãã®ç¶æ ã§rightã®veth0ããleftã®veth0ã«pingãæã£ã¦ããã
# ping -6 fc00:dead:cafe:1::2
xdp_statsã確èªãã¦ããã¨ãrightã§ãleftã§ãXDP_REDIRECTãã¦ãããã¨ããããã
âsudo ./xdp_stats -d left
ã®åºå
XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000369 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000370 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000370 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000370 XDP_REDIRECT 6 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000370
âsudo ./xdp_stats -d right
ã®åºå
XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000545 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000544 XDP_PASS 3 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000544 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000545 XDP_REDIRECT 6 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000545
Assignment4
Assignment3ã§ã¯xdp_prog_userã使ã£ã¦å¯¾è±¡ã®IFã¨MACãæå
¥åãã¦ããã®ã§ãããã§ã¯ãã¼ãã³ã¼ãã£ã³ã°ã¨ããã¦å¤ãããªãã
Assignment4ã§ã¯ãã±ããã®å®å
ã¢ãã¬ã¹ãkeyã«ãã¦kernelã®FIBãããredirectããifindexãsrc mac/dst macãé©åã«è§£æ±ºããããã«ããã
ãã¹ãç°å¢æ§ç¯
t setup -n uno --legacy-ip t setup -n dos --legacy-ip t setup -n tres --legacy-ip
ã³ã¼ãä¿®æ£
xdp_router_func()
ãå®è£
ãã¦ãããååãã³ã¡ã³ãã«åã£ã¦è¡ãã
ip_decrease_ttl()
ã®ãã§ãã¯ãµã ã®è¨ç®ã®ã¨ããã¯kernelã®sample codeã®ãã¤ï¼åèãªã³ã¯3ï¼ãã»ã¼ã³ãããã¦ããã ãã
--- a/packet03-redirecting/xdp_prog_kern.c +++ b/packet03-redirecting/xdp_prog_kern.c @@ -180,7 +180,10 @@ out: /* from include/net/ip.h */ static __always_inline int ip_decrease_ttl(struct iphdr *iph) { - /* Assignment 4: see samples/bpf/xdp_fwd_kern.c from the kernel */ + __u32 check = (__u32)iph->check; + + check += (__u32)bpf_htons(0x0100); + iph->check = (__u16)(check + (check >= 0xFFFF)); return --iph->ttl; } @@ -218,10 +221,13 @@ int xdp_router_func(struct xdp_md *ctx) goto out; /* Assignment 4: fill the fib_params structure for the AF_INET case */ + fib_params.family = AF_INET; + fib_params.l4_protocol = iph->protocol; + fib_params.ipv4_src = iph->saddr; + fib_params.ipv4_dst = iph->daddr; } else if (h_proto == bpf_htons(ETH_P_IPV6)) { - /* These pointers can be used to assign structures instead of executing memcpy: */ - /* struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; */ - /* struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; */ + struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; + struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; ip6h = data + nh_off; if (ip6h + 1 > data_end) { @@ -233,6 +239,10 @@ int xdp_router_func(struct xdp_md *ctx) goto out; /* Assignment 4: fill the fib_params structure for the AF_INET6 case */ + fib_params.family = AF_INET6; + fib_params.l4_protocol = ip6h->nexthdr; + *src = ip6h->saddr; + *dst = ip6h->daddr; } else { goto out; } @@ -247,11 +257,9 @@ int xdp_router_func(struct xdp_md *ctx) else if (h_proto == bpf_htons(ETH_P_IPV6)) ip6h->hop_limit--; - /* Assignment 4: fill in the eth destination and source - * addresses and call the bpf_redirect function */ - /* memcpy(eth->h_dest, ???, ETH_ALEN); */ - /* memcpy(eth->h_source, ???, ETH_ALEN); */ - /* action = bpf_redirect(???, 0); */ + memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + action = bpf_redirect(fib_params.ifindex, 0); break; case BPF_FIB_LKUP_RET_BLACKHOLE: /* dest is blackholed; can be dropped */ case BPF_FIB_LKUP_RET_UNREACHABLE: /* dest is unreachable; can be dropped */
TEST
XDPããã°ã©ã ããã¼ãããã
t load -n uno -- --prog-name xdp_router_func xdp_prog_kern.o t load -n dos -- --prog-name xdp_router_func xdp_prog_kern.o t load -n tres -- --prog-name xdp_router_func xdp_prog_kern.o t exec -n uno -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o t exec -n dos -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o t exec -n tres -- ./xdp-loader load --prog-name xdp_pass_func veth0 xdp_prog_kern.o
åã¤ã³ã¿ã¼ãã§ã¼ã¹ã§xdp_statsãåããã
sudo ./xdp_stats -d uno sudo ./xdp_stats -d dos sudo ./xdp_stats -d tres
ånamespaceã®vethããå¥ã®namespaceã«ããvethã«pingãæã¤ã¨ãããããã®IFã§XDP_REDIRECTãã«ã¦ã³ãã¢ãããããã
ãã¨ãã°unoã®vethããtresã®vethã«pingãæã¤ã
$ sudo ../testenv/testenv.sh enter --name uno # ping 10.11.3.2 -c 1
ããã¨ãunoã¨tresã®ããããã§XDP_REDIRECTãã«ã¦ã³ãã¢ãããããã®ã確èªã§ããã
âsudo ./xdp_stats -d uno
ã®åºå
XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000415 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000414 XDP_PASS 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000414 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000414 XDP_REDIRECT 1 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000414
âsudo ./xdp_stats -d tres
ã®åºå
XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000365 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000365 XDP_PASS 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000366 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000366 XDP_REDIRECT 1 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000366
è£è¶³
packet03ãè¡ãã«å½ããxdp-tutorialã«ä»¥ä¸ï¼ã¤ã®ä¿®æ£ãè¡ã£ãã
åèãªã³ã¯
xdp-tutorial packet02-rewritingããã£ã¦ã¿ã
bpf_xdp_adjust_head()
ã¯ãã±ããã®å
é ãã¤ã³ã¿ãããããã¨ãã§ããã
ã¤ã¾ãããã±ããé·ãé·ããããããã±ããé·ãçãããã¨ãã«ä½¿ãããï¼ä¾ãã°ãencapå¦çãdecapå¦çï¼bpf_xdp_adjust_headã®ãããã©ã¡ã¼ã¿ã«æ£ã®å¤ãä¸ããã¨ããã±ããé·ãçããªããè² ã®å¤ãä¸ããã¨ãã±ããé·ãé·ããªãã
ã¤ã¡ã¼ã¸ã¨ãã¦ã¯ãããªæãã
098 | 099 | 100 | 101 | ... | 500 ^ ^ - <--- | ---> + - <--- | ---> + (start-pointer) (end-pointer)
ãã±ããã®æ«å°¾ã®ãã¤ã³ã¿ããããbpf_xdp_adjust_tail()
ã£ã¦ã®ãããã
Assignment1
TCP/UDPãã±ããã®dst portã-1ããã
ãã¹ãç°å¢æ§ç¯
sudo ../testenv/testenv.sh setup --name veth-packet02
ã³ã¼ãä¿®æ£
- ã³ã¼ãä¸ã®ã³ã¡ã³ããè¦ãã¨ã
xdp_port_rewrite_func()
ã«å¦çãå®è£ ãããããã xdp_port_rewrite_func()
ã«ã¯packet01-pasingã®ã¨ãã«å®è£ ããæ©è½ãè¨è¼ããã¦ãããã¤ã¾ããIP headerã¾ã§ã®ãã¼ã¹ã«ã¯ãã®é¢æ°ã®å¦çã®ä¸é¨ããã®ã¾ã¾ã³ããããã°ããã- ãããã³ã«ãã¨ã®ãã¼ã¹å¦çã¯packet01-parsingã®ã¨ãã¯èªåã§ãã¹ã¦å®è£
ãã¦ããããä»åã¯ãããæä¾ããã¦ãããå®è£
ã¯
common/parsing_helpers.h
ã«åå¨ãã¦ããããããã³ã«ãã¨ã®ãã¼ã¹å¦çã¯ååãããå©ç¨ãããâ»ç§ãããã§å®è£ ãããã®ã¨ã¯å¾®å¦ã«å·®ç°ããããã注æã
--- a/packet02-rewriting/xdp_prog_kern.c +++ b/packet02-rewriting/xdp_prog_kern.c @@ -57,7 +57,54 @@ static __always_inline int vlan_tag_push(struct xdp_md *ctx, SEC("xdp") int xdp_port_rewrite_func(struct xdp_md *ctx) { - return XDP_PASS; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + + /* Default action XDP_PASS, imply everything we couldn't parse, or that + * we don't want to deal with, we just pass up the stack and let the + * kernel deal with it. + */ + __u32 action = XDP_PASS; /* Default action */ + + /* These keep track of the next header type and iterator pointer */ + struct hdr_cursor nh; + int nh_type; + nh.pos = data; + + struct ethhdr *eth; + + /* Packet parsing in steps: Get each header one at a time, aborting if + * parsing fails. Each helper function does sanity checking (is the + * header type in the packet correct?), and bounds checking. + */ + nh_type = parse_ethhdr(&nh, data_end, ð); + + if (nh_type == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h; + nh_type = parse_ip6hdr(&nh, data_end, &ip6h); + } else if (nh_type == bpf_htons(ETH_P_IP)) { + struct iphdr *iph; + nh_type = parse_iphdr(&nh, data_end, &iph); + } + if (nh_type == IPPROTO_TCP) { + struct tcphdr *tcph; + int hdrlen; + if ((hdrlen = parse_tcphdr(&nh, data_end, &tcph)) < 0) { + goto out; + } + tcph->dest = bpf_htons(bpf_ntohs(tcph->dest) - 1); + //return XDP_PASS; + } else if (nh_type == IPPROTO_UDP) { + struct udphdr *udph; + int hdrlen; + if ((hdrlen = parse_udphdr(&nh, data_end, &udph)) < 0) { + goto out; + } + udph->dest = bpf_htons(bpf_ntohs(udph->dest) - 1); + //return XDP_PASS; + } + out: + return xdp_stats_record_action(ctx, action); } /* VLAN swapper; will pop outermost VLAN tag if it exists, otherwise push a new
TEST
XDPããã°ã©ã ããã¼ãããã
make sudo ./xdp-loader unload veth-packet02 --all sudo ./xdp-loader load --prog-name xdp_port_rewrite_func --mode skb veth-packet02 xdp_prog_kern.o
dst portã2000ã«æå®ãã¦udpãã±ãããéä¿¡ããã
sudo ../testenv/testenv.sh exec -- socat - 'udp6:[fc00:dead:cafe:1::1]:2000'
veth-packet02ã§ãã±ããããã£ããã£ããã¨dst portã2000ãã1999ã«å¤ãã£ã¦ããã¨ãåããã
Assignment2
ä¸çªå¤å´ã«ããVLANã¿ã°ãåãå¤ãã
ãã¹ãç°å¢æ§ç¯
sudo ../testenv/testenv.sh setup --name veth-packet02 --vlan
ã³ã¼ãä¿®æ£
vlan_tag_pop()
ãå®ç¾©ããã¦ããã®ã§ãå¦çã¯ããã«å®è£
ããã
--- a/packet02-rewriting/xdp_prog_kern.c +++ b/packet02-rewriting/xdp_prog_kern.c @@ -3,6 +3,8 @@ #include <linux/in.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <pcap/vlan.h> +#include <string.h> // The parsing helper functions from the packet01 lesson have moved here #include "../common/parsing_helpers.h" @@ -16,30 +18,38 @@ */ static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) { - /* void *data_end = (void *)(long)ctx->data_end; struct ethhdr eth_cpy; - struct vlan_hdr *vlh; + struct vlan_tag *vlh; __be16 h_proto; - */ int vlid = -1; - /* Check if there is a vlan tag to pop */ - /* Still need to do bounds checking */ /* Save vlan ID for returning, h_proto for updating Ethernet header */ + memcpy(&vlh, (void *)eth + sizeof(eth->h_dest) + sizeof(eth->h_source), sizeof(struct vlan_tag)); + vlid = bpf_ntohs(vlh->vlan_tci) & 0x0fff; + h_proto = *(__be16 *)((void *)eth + sizeof(eth->h_dest) + sizeof(eth->h_source) + sizeof(struct vlan_tag)); /* Make a copy of the outer Ethernet header before we cut it off */ + memcpy(ð_cpy, eth, sizeof(struct ethhdr)); /* Actually adjust the head pointer */ + if (bpf_xdp_adjust_head(ctx, sizeof(struct vlan_tag))) + return -1; /* Need to re-evaluate data *and* data_end and do new bounds checking * after adjusting head */ + eth = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + if (eth + 1 > data_end) + return -1; /* Copy back the old Ethernet header and update the proto type */ + memcpy(eth, ð_cpy, sizeof(struct ethhdr)); + eth->h_proto = h_proto; return vlid; }
TEST
XDPããã°ã©ã ããã¼ãããã
$ make $ sudo ./xdp-loader unload veth-packet02 --all $ sudo ./xdp-loader load --prog-name xdp_vlan_swap_func --mode skb veth-packet02 xdp_prog_kern.o
$ sudo ../testenv/testenv.sh ping --vlan Running ping from inside test environment: PING fc00:dead:cafe:1001::1(fc00:dead:cafe:1001::1) 56 data bytes 64 bytes from fc00:dead:cafe:1001::1: icmp_seq=1 ttl=64 time=0.044 ms 64 bytes from fc00:dead:cafe:1001::1: icmp_seq=2 ttl=64 time=0.111 ms ^C --- fc00:dead:cafe:1001::1 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1027ms rtt min/avg/max/mdev = 0.044/0.077/0.111/0.033 ms
veth-packet02ã§ãã£ããã£ããã¨ä»¥ä¸ã®ããã«ãªãã
icmp echo requestããvlanã¿ã°ãæ¶ãã¦ããã®ãåããã
Assignment3
VLANã¿ã°ãã¤ãã¦ãªãã£ããVLANã¿ã°ãä»ä¸ããã
ãã¹ãç°å¢æ§ç¯
sudo ../testenv/testenv.sh setup --name veth-packet02
ã³ã¼ãä¿®æ£
vlan_tag_push()
ã«å¦çãå®è£
ããã
--- a/packet02-rewriting/xdp_prog_kern.c +++ b/packet02-rewriting/xdp_prog_kern.c @@ -60,9 +60,31 @@ static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) /* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, * -1 on failure. */ -static __always_inline int vlan_tag_push(struct xdp_md *ctx, - struct ethhdr *eth, int vlid) -{ +static __always_inline int vlan_tag_push(struct xdp_md *ctx, struct ethhdr *eth, + int vlid) { + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_tag vlh = { + .vlan_tpid = bpf_htons(ETH_P_8021Q), + .vlan_tci = bpf_htons(vlid), + }; + + if (eth + 1 > data_end) { + return -1; + } + memcpy(ð_cpy, eth, sizeof(struct ethhdr)); + if (bpf_xdp_adjust_head(ctx, -(int)sizeof(struct vlan_tag))) + return -1; + eth = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + if ((void *)eth + sizeof(struct ethhdr) + sizeof(struct vlan_tag) > data_end) { + return -1; + } + memcpy(eth, ð_cpy, sizeof(struct ethhdr)); + memcpy(ð->h_proto, &vlh, sizeof(struct vlan_tag)); + void *p = (void *)ð + sizeof(eth->h_dest) + sizeof(eth->h_source) + sizeof(struct vlan_tag); + memcpy(p, ð_cpy.h_proto, sizeof(eth_cpy.h_proto)); + return 0; }
TEST
XDPããã°ã©ã ããã¼ãããã
$ make $ sudo ./xdp-loader unload veth-packet02 --all $ sudo ./xdp-loader load --prog-name xdp_vlan_swap_func --mode skb veth-packet02 xdp_prog_kern.o
pingããã
$ sudo ../testenv/testenv.sh ping
veth-packet02ã§ãã£ããã£ããã¨ä»¥ä¸ã®ããã«ãªãã
icmp echo requestã«vlanã¿ã°ãä»ä¸ããã¦ããã®ãåããã
è£è¶³
testenv.shã³ããç¨ã³ãã³ãã¡ã¢
sudo ../testenv/testenv.sh setup --name veth-packet02 sudo ../testenv/testenv.sh setup --name veth-packet02 --vlan sudo ../testenv/testenv.sh setup --name veth-packet02 --vlan --legacy-ip sudo ../testenv/testenv.sh enter --name veth-packet02 sudo ../testenv/testenv.sh teardown
xdp-loaderã³ããç¨ã³ãã³ãã¡ã¢
sudo ./xdp-loader load --prog-name xdp_port_rewrite_func --mode skb veth-packet02 xdp_prog_kern.o sudo ./xdp-loader load --prog-name xdp_vlan_swap_func --mode skb veth-packet02 xdp_prog_kern.o sudo ./xdp-loader unload veth-packet02 --all sudo ./xdp-loader status
xdp-tutorial packet01-parsingããã£ã¦ã¿ã
packet01-parsingã§ã¯ä»¥ä¸ãè¡ãã
- æå®ããICMPãã±ããã許å¯ãããã以å¤ã®ICMPãã±ããã¯ããããããã
- ICMPãã±ããã§ãªããã®ã¯è¨±å¯ã
Assignment1
parse_ethhdr()
ã®bouds checkã®ãã°ãã£ãã¯ã¹ãããã
ãã¹ãç°å¢æ§ç¯
sudo ../testenv/testenv.sh setup --name veth-packet01
ã³ã¼ãä¿®æ£
--- a/packet01-parsing/xdp_prog_kern.c +++ b/packet01-parsing/xdp_prog_kern.c @@ -36,7 +36,7 @@ static __always_inline int parse_ethhdr(struct hdr_cursor *nh, /* Byte-count bounds check; check if current pointer + size of header * is after data_end. */ - if (nh->pos + 1 > data_end) + if (nh->pos + hdrsize > data_end) return -1; nh->pos += hdrsize;
Assignment2
parse_ip6hdr()
ãå®è£
ãã¦ãIPv6ãã±ããããã¼ã¹ããã
ãã¹ãç°å¢æ§ç¯
sudo ../testenv/testenv.sh setup --name veth-packet01
ã³ã¼ãä¿®æ£
--- a/packet01-parsing/xdp_prog_kern.c +++ b/packet01-parsing/xdp_prog_kern.c @@ -46,11 +46,18 @@ static __always_inline int parse_ethhdr(struct hdr_cursor *nh, } /* Assignment 2: Implement and use this */ -/*static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, +static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, void *data_end, struct ipv6hdr **ip6hdr) { -}*/ + struct ipv6hdr *ip6h = nh->pos; + + if (ip6h + 1 > data_end) + return -1; + nh->pos = ip6h + 1; + *ip6hdr = ip6h; + return ip6h->nexthdr; +} /* Assignment 3: Implement and use this */ /*static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, @@ -65,6 +72,7 @@ int xdp_parser_func(struct xdp_md *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth; + struct ipv6hdr *ipv6; /* Default action XDP_PASS, imply everything we couldn't parse, or that * we don't want to deal with, we just pass up the stack and let the @@ -86,6 +94,9 @@ int xdp_parser_func(struct xdp_md *ctx) nh_type = parse_ethhdr(&nh, data_end, ð); if (nh_type != bpf_htons(ETH_P_IPV6)) goto out; + nh_type = parse_ip6hdr(&nh, data_end, &ipv6); + if (nh_type != IPPROTO_ICMPV6) + goto out; /* Assignment additions go below here */
Assignment3
parse_icmp6hdr()
ãå®è£
ãã¦ãseqenceãå¥æ°ã®ICMPãã±ããã®ã¿ã許å¯ããã
ã³ã¼ãä¿®æ£
--- a/packet01-parsing/xdp_prog_kern.c +++ b/packet01-parsing/xdp_prog_kern.c @@ -60,11 +60,18 @@ static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, } /* Assignment 3: Implement and use this */ -/*static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, +static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, void *data_end, struct icmp6hdr **icmp6hdr) { -}*/ + struct icmp6hdr *icmp6h = nh->pos; + + if (icmp6h + 1 > data_end) + return -1; + nh->pos = icmp6h + 1; + *icmp6hdr = icmp6h; + return bpf_ntohs(icmp6h->icmp6_sequence); +} SEC("xdp") int xdp_parser_func(struct xdp_md *ctx) @@ -73,6 +80,8 @@ int xdp_parser_func(struct xdp_md *ctx) void *data = (void *)(long)ctx->data; struct ethhdr *eth; struct ipv6hdr *ipv6; + struct icmp6hdr *icmp6; + int icmp_seq; /* Default action XDP_PASS, imply everything we couldn't parse, or that * we don't want to deal with, we just pass up the stack and let the @@ -97,6 +106,9 @@ int xdp_parser_func(struct xdp_md *ctx) nh_type = parse_ip6hdr(&nh, data_end, &ipv6); if (nh_type != IPPROTO_ICMPV6) goto out; + icmp_seq = parse_icmp6hdr(&nh, data_end, &icmp6); + if (icmp_seq % 2 == 1) + goto out; /* Assignment additions go below here */
TEST
$ make $ sudo ./xdp-loader unload veth-packet01 --all $ sudo ./xdp-loader load --prog-name xdp_parser_func --mode skb veth-packet01 xdp_prog_kern.o
$ ping -6 fc00:dead:cafe:1::2 PING fc00:dead:cafe:1::2(fc00:dead:cafe:1::2) 56 data bytes 64 bytes from fc00:dead:cafe:1::2: icmp_seq=1 ttl=64 time=0.057 ms 64 bytes from fc00:dead:cafe:1::2: icmp_seq=3 ttl=64 time=0.082 ms 64 bytes from fc00:dead:cafe:1::2: icmp_seq=5 ttl=64 time=0.075 ms ^C --- fc00:dead:cafe:1::2 ping statistics --- 5 packets transmitted, 3 received, 40% packet loss, time 4084ms rtt min/avg/max/mdev = 0.057/0.071/0.082/0.010 ms
å¥æ°ã·ã¼ã±ã³ã¹ã®ãã±ããã®ã¿pingãéã£ã¦ãã
Assignment4
VLANããµãã¼ããããã ã¾ããé常ã®ã¿ã°ãã±ããã ãã§ãªããããã«ã¿ã°VLANãèæ ®ããã
ãã¹ãç°å¢æ§ç¯
以ä¸ã³ãã³ãã§vlanç°å¢ãæ§ç¯ããã
sudo ../testenv/testenv.sh setup --name veth-packet01 --vlan
ä¸å³ã®ãããªç°å¢ãåºæ¥ä¸ããã
試ãã«host namespaceããveth0.2ã®ã¢ãã¬ã¹ã«åãã¦pingãæã£ã¦ã¿ãã
$ ping -6 fc00:dead:cafe:2001::2 -I veth-packet01.2 -c 2 PING fc00:dead:cafe:2001::2(fc00:dead:cafe:2001::2) from fc00:dead:cafe:2001::1 veth-packet01.2: 56 data bytes 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=1 ttl=64 time=0.097 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=2 ttl=64 time=0.099 ms --- fc00:dead:cafe:2001::2 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1008ms rtt min/avg/max/mdev = 0.097/0.098/0.099/0.001 ms
veth-packet01.2ã§åå¾ãããã£ããã£
veth-packet01ã§åå¾ãããã£ããã£
veth-packet01.2ï¼VLANã¤ã³ã¿ã¼ãã§ã¼ã¹ï¼ã§vlan tagã®ä»ä¸ã»åãå¤ããã¡ããã¨è¡ããã¦ããã
ã³ã¼ãä¿®æ£
--- a/packet01-parsing/xdp_prog_kern.c +++ b/packet01-parsing/xdp_prog_kern.c @@ -8,6 +8,7 @@ #include <linux/icmpv6.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <pcap/vlan.h> /* Defines xdp_stats_map from packet04 */ #include "../common/xdp_stats_kern_user.h" #include "../common/xdp_stats_kern.h" @@ -17,6 +18,10 @@ struct hdr_cursor { void *pos; }; +static __always_inline int proto_is_vlan(__u16 h_proto) +{ + return !!(h_proto == bpf_htons(ETH_P_8021Q) || h_proto == bpf_htons(ETH_P_8021AD)); +} /* Packet parsing helpers. * * Each helper parses a packet header, including doing bounds checking, and @@ -31,18 +36,38 @@ static __always_inline int parse_ethhdr(struct hdr_cursor *nh, struct ethhdr **ethhdr) { struct ethhdr *eth = nh->pos; - int hdrsize = sizeof(*eth); + __u16 *proto = NULL; + struct vlan_tag *vtag; /* Byte-count bounds check; check if current pointer + size of header * is after data_end. */ - if (nh->pos + hdrsize > data_end) + if ((nh->pos + sizeof(eth->h_dest)) > data_end) return -1; - - nh->pos += hdrsize; + nh->pos += sizeof(eth->h_dest); + if ((nh->pos + sizeof(eth->h_source)) > data_end) + return -1; + nh->pos += sizeof(eth->h_source); + + int i = 0; + #pragma unroll + for (i = 0; i < 2; i++) { + if ((nh->pos + sizeof(vtag->vlan_tpid)) > data_end) + return -1; + if (!proto_is_vlan(*(__u16 *)nh->pos)) + break; + if ((nh->pos + sizeof(struct vlan_tag)) > data_end) + return -1; + nh->pos += sizeof(struct vlan_tag); + } + + if ((nh->pos + sizeof(eth->h_proto)) > data_end) + return -1; + proto = (__u16 *)nh->pos; + nh->pos += sizeof(eth->h_proto); *ethhdr = eth; - return eth->h_proto; /* network-byte-order */ + return *proto; /* network-byte-order */ } /* Assignment 2: Implement and use this */
TEST
$ make $ sudo ./xdp-loader unload veth-packet01 --all $ sudo ./xdp-loader load --prog-name xdp_parser_func --mode skb veth-packet01 xdp_prog_kern.o
$ ping -6 fc00:dead:cafe:2001::2 PING fc00:dead:cafe:2001::2(fc00:dead:cafe:2001::2) 56 data bytes 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=1 ttl=64 time=0.081 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=3 ttl=64 time=0.092 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=5 ttl=64 time=0.088 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=7 ttl=64 time=0.091 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=9 ttl=64 time=0.087 ms ^C --- fc00:dead:cafe:2001::2 ping statistics --- 9 packets transmitted, 5 received, 44.4444% packet loss, time 8180ms rtt min/avg/max/mdev = 0.081/0.087/0.092/0.003 ms
Assignment5
IPv4ããµãã¼ããããã
ãã¹ãç°å¢æ§ç¯
以ä¸ã³ãã³ãã§ipv4ç°å¢ãæ§ç¯ããã
sudo ../testenv/testenv.sh setup --name veth-packet01 --vlan --legacy-ip
確èªããæãvlan interfaceã«IPã¢ãã¬ã¹ãã¢ãµã¤ã³ããã¦ãªããããªã®ã§ãã¢ãµã¤ã³ããã
$ sudo ip addr add 192.168.101.1/24 dev veth-packet01.1 $ sudo ip addr add 192.168.102.1/24 dev veth-packet01.2 $ sudo ip netns exec veth-packet01 ip addr add 192.168.101.2/24 dev veth0.1 $ sudo ip netns exec veth-packet01 ip addr add 192.168.102.2/24 dev veth0.2
試ãã«host namespaceããveth0.2ã®ã¢ãã¬ã¹ã«åãã¦pingãæã£ã¦ã¿ãã
$ ping 192.168.102.2 -c 2 PING 192.168.102.2 (192.168.102.2) 56(84) bytes of data. 64 bytes from 192.168.102.2: icmp_seq=1 ttl=64 time=0.240 ms 64 bytes from 192.168.102.2: icmp_seq=2 ttl=64 time=0.091 ms --- 192.168.102.2 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1001ms rtt min/avg/max/mdev = 0.091/0.165/0.240/0.074 ms
veth-packet01.2ã§åå¾ãããã£ããã£
veth-packet01ã§åå¾ãããã£ããã£
ã³ã¼ãä¿®æ£
--- a/packet01-parsing/xdp_prog_kern.c +++ b/packet01-parsing/xdp_prog_kern.c @@ -4,7 +4,9 @@ #include <linux/in.h> #include <linux/if_ether.h> #include <linux/if_packet.h> +#include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <linux/icmpv6.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> @@ -70,6 +72,22 @@ static __always_inline int parse_ethhdr(struct hdr_cursor *nh, return *proto; /* network-byte-order */ } +static __always_inline int parse_iphdr(struct hdr_cursor *nh, + void *data_end, + struct iphdr **iphdr) +{ + struct iphdr *iph = nh->pos; + + if (iph + 1 > data_end) + return -1; + int hdrsize = iph->ihl * 4; + if (nh->pos + hdrsize > data_end) + return -1; + nh->pos += hdrsize; + *iphdr = iph; + return iph->protocol; +} + /* Assignment 2: Implement and use this */ static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, void *data_end, @@ -84,6 +102,19 @@ static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, return ip6h->nexthdr; } +static __always_inline int parse_icmphdr(struct hdr_cursor *nh, + void *data_end, + struct icmphdr **icmphdr) +{ + struct icmphdr *icmph = nh->pos; + + if (icmph + 1 > data_end) + return -1; + nh->pos = icmph + 1; + *icmphdr = icmph; + return bpf_ntohs(icmph->un.echo.sequence); +} + /* Assignment 3: Implement and use this */ static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, void *data_end, @@ -104,7 +135,9 @@ int xdp_parser_func(struct xdp_md *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth; + struct iphdr *ip; struct ipv6hdr *ipv6; + struct icmphdr *icmp; struct icmp6hdr *icmp6; int icmp_seq; @@ -126,12 +159,19 @@ int xdp_parser_func(struct xdp_md *ctx) * header type in the packet correct?), and bounds checking. */ nh_type = parse_ethhdr(&nh, data_end, ð); - if (nh_type != bpf_htons(ETH_P_IPV6)) - goto out; - nh_type = parse_ip6hdr(&nh, data_end, &ipv6); - if (nh_type != IPPROTO_ICMPV6) + if (nh_type == bpf_htons(ETH_P_IP)) { + nh_type = parse_iphdr(&nh, data_end, &ip); + if (nh_type != IPPROTO_ICMP) + goto out; + icmp_seq = parse_icmphdr(&nh, data_end, &icmp); + } else if (nh_type == bpf_htons(ETH_P_IPV6)) { + nh_type = parse_ip6hdr(&nh, data_end, &ipv6); + if (nh_type != IPPROTO_ICMPV6) + goto out; + icmp_seq = parse_icmp6hdr(&nh, data_end, &icmp6); + } else { goto out; - icmp_seq = parse_icmp6hdr(&nh, data_end, &icmp6); + } if (icmp_seq % 2 == 1) goto out;
TEST
$ make $ sudo ./xdp-loader unload veth-packet01 --all $ sudo ./xdp-loader load --prog-name xdp_parser_func --mode skb veth-packet01 xdp_prog_kern.o
$ ping 192.168.102.2 PING 192.168.102.2 (192.168.102.2) 56(84) bytes of data. 64 bytes from 192.168.102.2: icmp_seq=1 ttl=64 time=0.204 ms 64 bytes from 192.168.102.2: icmp_seq=3 ttl=64 time=0.104 ms 64 bytes from 192.168.102.2: icmp_seq=5 ttl=64 time=0.091 ms 64 bytes from 192.168.102.2: icmp_seq=7 ttl=64 time=0.087 ms 64 bytes from 192.168.102.2: icmp_seq=9 ttl=64 time=0.083 ms 64 bytes from 192.168.102.2: icmp_seq=11 ttl=64 time=0.086 ms --- 192.168.102.2 ping statistics --- 11 packets transmitted, 6 received, 45.4545% packet loss, time 10223ms rtt min/avg/max/mdev = 0.083/0.109/0.204/0.042 ms
$ ping -6 fc00:dead:cafe:2001::2 PING fc00:dead:cafe:2001::2(fc00:dead:cafe:2001::2) 56 data bytes 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=1 ttl=64 time=0.074 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=3 ttl=64 time=0.095 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=5 ttl=64 time=0.088 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=7 ttl=64 time=0.088 ms 64 bytes from fc00:dead:cafe:2001::2: icmp_seq=9 ttl=64 time=0.090 ms --- fc00:dead:cafe:2001::2 ping statistics --- 10 packets transmitted, 5 received, 50% packet loss, time 9201ms rtt min/avg/max/mdev = 0.074/0.087/0.095/0.007 ms
è£è¶³
testenv.sh
ã³ããç¨ã³ãã³ãã¡ã¢
sudo ../testenv/testenv.sh setup --name veth-packet01 sudo ../testenv/testenv.sh setup --name veth-packet01 --vlan sudo ../testenv/testenv.sh setup --name veth-packet01 --vlan --legacy-ip sudo ../testenv/testenv.sh enter --name veth-packet01 sudo ../testenv/testenv.sh teardown
xdp-loader
ã³ããç¨ã³ãã³ãã¡ã¢
sudo ./xdp-loader load --prog-name xdp_parser_func --mode skb veth-packet01 xdp_prog_kern.o sudo ./xdp-loader unload veth-packet01 --all sudo ./xdp-loader status
xdp-tutorial basic04-pinning-mapsããã£ã¦ã¿ã
Assignment1
eBPFããã°ã©ã ãåãã¼ããããå ´åãxdp_statsããã°ã©ã ã§ãããæ¤ç¥ãã¦ãmapãååå¾ããã¨ãããã®ã
解ãã¦ã¿ã
誤ã£ããã¡ã¤ã«ãã£ã¹ã¯ãªãã¿ã使ã£ã¦ããªããbpfããããæ¤è¨¼ããé¢æ°ãå®è£ ãã
åé¡æã«æ¸ãã¦ããéããæ°ããbpfããã°ã©ã ãåãã¼ããããããbpfãããã®idãå¤ããã®ã§ããããå©ç¨ãã¦ã誤ã£ããã¡ã¤ã«ãã£ã¹ã¯ãªãã¿ã使ã£ã¦ããªããæ¤è¨¼ã§ããã
xdp_stats.c
@@ -180,6 +180,16 @@ static bool map_collect(int fd, __u32 map_type, __u32 key, struct record *rec) return true; } +static int is_wrong_fd(__u32 id, char * pin_dir) { + struct bpf_map_info info = { 0 }; + int map_fd = open_bpf_map_file(pin_dir, "xdp_stats_map", &info); + if (map_fd < 0) { + return -1; + } + close(map_fd); + return id != info.id; +} + static void stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec) {
stats_pollã®ã«ã¼ãã®ä¸ã§bpfããããæ¤è¨¼ããã
stats_pollã®ã«ã¼ãã®ä¸ã§bpfããããæ¤è¨¼ããããã«ä¿®æ£ãè¡ãã
open_bpf_map_fileã§ã¨ã©ã¼ãèµ·ããå ´åã«ã¯xdp_statsãEXIT_FAILã§çµäºããã¦ããã
EXIT_FAILã¯common_difine.hã§å®ç¾©ããã¦ããçµäºå¤ã³ã¼ãã§ãããã使ããªãã¦ãè¯ãããæ¢åã®ã³ã¼ãã«åããã¦ä½¿ç¨ãã¦ããã
xdp_stats.c
@@ -191,7 +201,7 @@ static void stats_collect(int map_fd, __u32 map_type, } } -static void stats_poll(int map_fd, __u32 map_type, int interval) +static int stats_poll(int map_fd, __u32 map_type, int interval, __u32 id, char * pin_dir) { struct stats_record prev, record = { 0 }; @@ -203,6 +213,11 @@ static void stats_poll(int map_fd, __u32 map_type, int interval) usleep(1000000/4); while (1) { + int result; + result = is_wrong_fd(id, pin_dir); + if (result != 0) { + return result; + } prev = record; /* struct copy */ stats_collect(map_fd, map_type, &record); stats_print(&record, &prev); @@ -247,29 +262,38 @@ int main(int argc, char **argv) return EXIT_FAIL_OPTION; } - stats_map_fd = open_bpf_map_file(pin_dir, "xdp_stats_map", &info); - if (stats_map_fd < 0) { - return EXIT_FAIL_BPF; - } - - /* check map info, e.g. datarec is expected size */ - map_expect.key_size = sizeof(__u32); - map_expect.value_size = sizeof(struct datarec); - map_expect.max_entries = XDP_ACTION_MAX; - err = check_map_fd_info(&info, &map_expect); - if (err) { - fprintf(stderr, "ERR: map via FD not compatible\n"); - return err; - } - if (verbose) { - printf("\nCollecting stats from BPF map\n"); - printf(" - BPF map (bpf_map_type:%d) id:%d name:%s" - " key_size:%d value_size:%d max_entries:%d\n", - info.type, info.id, info.name, - info.key_size, info.value_size, info.max_entries - ); + while (1) { + stats_map_fd = open_bpf_map_file(pin_dir, "xdp_stats_map", &info); + if (stats_map_fd < 0) { + return EXIT_FAIL_BPF; + } + + /* check map info, e.g. datarec is expected size */ + map_expect.key_size = sizeof(__u32); + map_expect.value_size = sizeof(struct datarec); + map_expect.max_entries = XDP_ACTION_MAX; + err = check_map_fd_info(&info, &map_expect); + if (err) { + fprintf(stderr, "ERR: map via FD not compatible\n"); + return err; + } + if (verbose) { + printf("\nCollecting stats from BPF map\n"); + printf(" - BPF map (bpf_map_type:%d) id:%d name:%s" + " key_size:%d value_size:%d max_entries:%d\n", + info.type, info.id, info.name, + info.key_size, info.value_size, info.max_entries + ); + } + + int result; + result = stats_poll(stats_map_fd, info.type, interval, info.id, pin_dir); + if (result == -1) { + goto ERROR_CASE; + } + printf("bpf map will be reloaded.\n"); } - - stats_poll(stats_map_fd, info.type, interval); return EXIT_OK; +ERROR_CASE: + return EXIT_FAIL; }
試ãã¦ã¿ã
注æç¹ã¨ãã¦basic04ã§ã¯ä»¥ä¸ã®ããã«prgonameãæ示çã«æå®ããå¿
è¦ãããã
ã¾ããattach_modeã¨ãã¦ããã©ã«ãã§XDP_MODE_NATIVEãæå®ããã¦ããã
ç§ã®ç°å¢ã¯XDP_MODE_NATIVEãå®è¡ã§ããç°å¢ã§ã¯ãªãã®ã§ã-Aãæå®ãã¦SKBã¢ã¼ãã§åä½ããã¦ããã
$ sudo ./xdp_loader --progname=xdp_pass_func --dev=lo -A $ sudo ./xdp_stats --dev=lo
ä¸è¨ãå®è¡å¾ãå¥ã¦ã£ã³ãã¦ã§xdpããã°ã©ã ã以ä¸ã®ããã«åãã¼ãããã
$ sudo ./xdp_loader --progname=xdp_pass_func --dev=lo -A
xdp_statsã®åºåã¯ä»¥ä¸ã¨ãªã£ãã
Collecting stats from BPF map - BPF map (bpf_map_type:6) id:71 name:xdp_stats_map key_size:4 value_size:16 max_entries:5 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250328 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250258 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250259 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250259 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250260 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000448 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000450 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000450 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000450 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000450 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000432 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000432 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000432 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000432 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000432 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000417 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000417 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000417 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000417 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000417 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000586 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000586 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000586 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000586 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000586 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000425 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000425 XDP_PASS 2 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000425 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000425 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:2.000425 bpf map will be reloaded. Collecting stats from BPF map - BPF map (bpf_map_type:6) id:79 name:xdp_stats_map key_size:4 value_size:16 max_entries:5 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250216 XDP_DROP 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250216 XDP_PASS 1 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250216 XDP_TX 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250216 XDP_REDIRECT 0 pkts ( 0 pps) 0 Kbytes ( 0 Mbits/s) period:0.250217
bpf_loaderãåå®è¡ããã¿ã¤ãã³ã°ã§bpf map will be reloaded
ã¨ãããã°ãåºãã®ã確èªã§ããã
Assignment2
ãã³ã©ããããbpfããããåå©ç¨ããããã«ä¿®æ£ããã
åå©ç¨ã®ããæ¹ã¯åé¡æãè¼ã£ã¦ãããã¼ã¸ã«ã»ã¼è¼ã£ã¦ããã
解ãã¦ã¿ã
åé¡æãè¼ã£ã¦ãããã¼ã¸ã®ãã³ãã«ãããã£ã¦è§£ã
ãã³ã©ããããbpfããããããã¨ãããã®ã¾ã¾ã®ç¶æ ã ã¨ãããã¯ãªã¢ãã¦ãããããããã¯ãªã¢ããã«åå©ç¨ãããã
xdp_loader.c
@@ -69,7 +69,7 @@ const char *pin_basedir = "/sys/fs/bpf"; const char *map_name = "xdp_stats_map"; /* Pinning maps under /sys/fs/bpf in subdir */ -int pin_maps_in_bpf_object(struct bpf_object *bpf_obj, const char *subdir) +int pin_maps_in_bpf_object(struct bpf_object *bpf_obj, const char *subdir, char *filename) { char map_filename[PATH_MAX]; char pin_dir[PATH_MAX]; @@ -90,16 +90,11 @@ int pin_maps_in_bpf_object(struct bpf_object *bpf_obj, const char *subdir) /* Existing/previous XDP prog might not have cleaned up */ if (access(map_filename, F_OK ) != -1 ) { - if (verbose) - printf(" - Unpinning (remove) prev maps in %s/\n", - pin_dir); - - /* Basically calls unlink(3) on map_filename */ - err = bpf_object__unpin_maps(bpf_obj, pin_dir); - if (err) { - fprintf(stderr, "ERR: UNpinning maps in %s\n", pin_dir); - return EXIT_FAIL_BPF; - } + int pinned_map_fd = bpf_obj_get(map_filename); + struct bpf_object *obj = bpf_object__open(filename); + struct bpf_map *map = bpf_object__find_map_by_name(obj, map_name); + bpf_map__reuse_fd(map, pinned_map_fd); + bpf_object__load(obj); } if (verbose) printf(" - Pinning maps in %s/\n", pin_dir); @@ -150,7 +145,7 @@ int main(int argc, char **argv) } /* Use the --dev name as subdir for exporting/pinning maps */ - err = pin_maps_in_bpf_object(xdp_program__bpf_obj(program), cfg.ifname); + err = pin_maps_in_bpf_object(xdp_program__bpf_obj(program), cfg.ifname, cfg.filename); if (err) { fprintf(stderr, "ERR: pinning maps\n"); return err;
xdp-tutorial basic03-map-counterããã£ã¦ã¿ã
Assignment 1
ç¾ç¶ã®å®è£ ã«byteã«ã¦ã³ã¿ã®æ©è½ã追å ããã¨ãããã®ã
解ãã¦ã¿ã
xdp_stats_mapã®ã¨ã³ããªã®æ§é ä½ã«byteã«ã¦ã³ã¿ã®ãã£ã¼ã«ãã追å ãã
eBPFãããã¯ä»¥ä¸ã®ããã«å®ç¾©ããã¦ããã
struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, __u32); __type(value, struct datarec); __uint(max_entries, XDP_ACTION_MAX); } xdp_stats_map SEC(".maps");
ã¨ã³ããªã®æ§é ä½ã§ããdatarec
ã«rx_bytes
ã¨ããbyteã«ã¦ã³ã¿ã®ãã£ã¼ã«ãã追å ããã
file: basic03-map-counter/common_kern_user.h
@@ -7,6 +7,7 @@ /* This is the data record stored in the map */ struct datarec { __u64 rx_packets; + __u64 rx_bytes; /* Assignment#1: Add byte counters */ };
packetã®é·ããæ±ãã¦ããããä¸ã§ä½ã£ãrx_bytes
ã¨ãããã£ã¼ã«ãã«å ç®ããã
xdp programã®ctxã«æå®ããã¦ããxdp_md
ã¨ãããã¤ãããpacketã®å
容ãèªããã
/* user accessible metadata for XDP packet hook * new fields must be added to the end of this structure */ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; /* Below access go through struct xdp_rxq_info */ __u32 ingress_ifindex; /* rxq->dev->ifindex */ __u32 rx_queue_index; /* rxq->queue_index */ __u32 egress_ifindex; /* txq->dev->ifindex */ };
xdp_mdta
ã¯ã«ã¼ãã«ã«ãã£ã¦ãªããããããå¾ã®å½¢ã§ãå®éã¯xdp_buff
ã¨xdp_rxq_info
ã®å½¢ã«ãªã£ã¦ããããã ãï¼åé¡æã®èª¬æã«ããæ¸ãã¦ãã£ããï¼
struct xdp_buff { void *data; void *data_end; void *data_meta; void *data_hard_start; unsigned long handle; struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); struct xdp_rxq_info { /* Structure does not need to contain all entries, * as "preserve_access_index" will use BTF to fix this... */ struct net_device *dev; __u32 queue_index; } __attribute__((preserve_access_index));
dataããã±ããã®å
é ã®ã¢ãã¬ã¹ã§data_endããã±ããã®æ«å°¾ã®ã¢ãã¬ã¹ã§ããããã ã
ã¤ã¾ãã¯dataã¨data_endã®è·é¢ããã±ããé·ã«ãªãã
ã¨ãããã¨ã§ä»¥ä¸ã®ããã«ä¿®æ£ã
basic03-map-counter/xdp_prog_kern.c
@@ -25,8 +25,8 @@ struct { SEC("xdp") int xdp_stats1_func(struct xdp_md *ctx) { - // void *data_end = (void *)(long)ctx->data_end; - // void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; struct datarec *rec; __u32 key = XDP_PASS; /* XDP_PASS = 2 */ @@ -39,13 +39,14 @@ int xdp_stats1_func(struct xdp_md *ctx) if (!rec) return XDP_ABORTED; + /* Multiple CPUs can access data record. Thus, the accounting needs to * use an atomic operation. */ lock_xadd(&rec->rx_packets, 1); - /* Assignment#1: Add byte counters - * - Hint look at struct xdp_md *ctx (copied below) - * + __u64 bytes = data_end - data; + lock_xadd(&rec->rx_bytes, bytes); + /* * Assignment#3: Avoid the atomic operation * - Hint there is a map type named BPF_MAP_TYPE_PERCPU_ARRAY */
ã¦ã¼ã¶ç©ºéã®ããã°ã©ã å´ã§mapä¸ã®byteã«ã¦ã³ã¿ã®å¤ãåå¾ãã
rx_packets
ã¨åæ§ã«rx_bytes
ã®å¤ãåå¾ããããã«ããã
xdp_load_and_stats.c
@@ -181,6 +187,7 @@ static bool map_collect(int fd, __u32 map_type, __u32 key, struct record * rec) /* Assignment#1: Add byte counters */ rec->total.rx_packets = value.rx_packets; + rec->total.rx_bytes = value.rx_bytes; return true; }
ã¦ã¼ã¶ç©ºéã®ããã°ã©ã å´ã§mapä¸ã®byteã«ã¦ã³ã¿ã®æ å ±ãåºåãã
ãããrx_packets
ã¨åæ§ã«rx_bytes
ã®å¤ã®æ
å ±ãåºåããã
(ã³ã¼ãä¸ã®ã³ã¡ã³ãã«å¾ãã°ãMpbsãåºåãããæå³ãããããã ãããã®å¾ã®æ¤è¨¼ã§ãããªã«ããããã®ãã¼ã¿ãæµãäºå®ã¯ãªãã®ã§ãbpsã«å¤æ´ãã¦ããã)
xdp_load_and_stats.c
@@ -118,12 +118,15 @@ static void stats_print(struct stats_record *stats_rec, struct record *rec, *prev; double period; __u64 packets; + __u64 bytes; + __u64 bits; double pps; /* packets per sec */ + double bps; /* bits per sec */ /* Assignment#2: Print other XDP actions stats */ { char *fmt = "%-12s %'11lld pkts (%'10.0f pps)" - //" %'11lld Kbytes (%'6.0f Mbits/s)" + " %'11lld bytes (%'6.0f bits/s)" " period:%f\n"; const char *action = action2str(XDP_PASS); rec = &stats_rec->stats[0]; @@ -135,8 +138,11 @@ static void stats_print(struct stats_record *stats_rec, packets = rec->total.rx_packets - prev->total.rx_packets; pps = packets / period; + bytes = rec->total.rx_bytes - prev->total.rx_bytes; + bits = bytes << 3; + bps = bits / period; - printf(fmt, action, rec->total.rx_packets, pps, period); + printf(fmt, action, rec->total.rx_packets, pps, rec->total.rx_bytes, bps, period); } }
æ¤è¨¼
$ make CC xdp_load_and_stats CLANG xdp_prog_kern.o LLC xdp_prog_kern.o $ sudo ./xdp_load_and_stats -d lo libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata Success: Loaded BPF-object(xdp_prog_kern.o) and used section(xdp_stats1_func) - XDP prog id:211 attached on device:lo(ifindex:1) Collecting stats from BPF map - BPF map (bpf_map_type:2) id:75 name:xdp_stats_map key_size:4 value_size:16 max_entries:5 XDP-action XDP_PASS 1 pkts ( 4 pps) 90 bytes ( 2877 bits/s) period:0.250223 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 360 bits/s) period:2.000256 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 0 bits/s) period:2.000286 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 0 bits/s) period:2.000314 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 0 bits/s) period:2.000300 XDP_PASS 4 pkts ( 1 pps) 376 bytes ( 784 bits/s) period:2.000297 XDP_PASS 4 pkts ( 0 pps) 376 bytes ( 0 bits/s) period:2.000259 XDP_PASS 4 pkts ( 0 pps) 376 bytes ( 0 bits/s) period:2.000316
packetã®ã«ã¦ã³ã¿ã2pktsãã4pktsã«ä¸ãã£ã¦ããé¨åãããã
ããã®ã¨ããã§å®ã¯ä»¥ä¸ã®éããpingãæã£ãã
$ping 127.0.0.1 -c 1
ãã®é¨åã®bytesã«ã¦ã³ã¿ãè¦ãã¨180ãã376ã«ä¸ãã£ã¦ããã ã¤ã¾ãããã®ã¨ããloãééããicmpãã±ããåè¨ã196 bytes(376-180)ã§ããã°ãæ£ããå®è£ ã§ããã¨ãããã¨ã
ããã§åå¾ããpcapãè¦ã¦ã¿ãã
requestã¨replyããããã98 bytesã§åè¨196 bytesãªã®ã§ãã¾ããã£ã¦ããã
Assignment 2
user 空éå´ã®ããã°ã©ã ãä¿®æ£ãã¦xdp_action
ãã¨ã«statsãéè¨ããããã«ããã¨ãããã®ã
xdp_action
ã¨ã¯ä»¥ä¸ã®ãã¨ã
linux/bpf.h
/* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, XDP_REDIRECT, };
解ãã¦ã¿ã
xdp_stats_mapã®æ§é ãè¦ã¦ã¿ã
xdp_stats_mapã¯ä»¥ä¸ã®ããã«ãªã£ã¦ããã
xdp_prog_kern.c
struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, __u32); __type(value, struct datarec); __uint(max_entries, XDP_ACTION_MAX); } xdp_stats_map SEC(".maps");
XDP_ACTION_MAXã¯ä»¥ä¸ã®ããã«å®ç¾©ããã¦ããã
common_kern_user.h
#ifndef XDP_ACTION_MAX #define XDP_ACTION_MAX (XDP_REDIRECT + 1) #endif
ã¤ã¾ããxdp_stats_mapã¯xdp_action
ã®ç¨®é¡åã ãã¨ã³ããªãããã¨ãããã¨ã
ããã¦ãxdp_stats_mapã®keyã«ã¯xdp_action
ãæå®ãããã
xdp_prog_kern.c
__u32 key = XDP_PASS; /* XDP_PASS = 2 */ /* Lookup in kernel BPF-side return pointer to actual data record */ rec = bpf_map_lookup_elem(&xdp_stats_map, &key);
è¤æ°ã®ã¨ã³ããªãæ±ããããã«æ¹è¯ãã
xdp_load_and_statsããã°ã©ã ã§ã¯xdp_stats_mapã®ã¨ã³ããªãstats_recordã¨ããæ§é ä½ã«å
¥ãã¦æ±ãã
stats_recordã¯xdp_stats_mapã®ã¨ã³ããªã®çã®æ§é ä½ã§ããdata_recãã©ãããããã®ã«ãªã£ã¦ããã
xdp_load_and_stats.c
struct record { __u64 timestamp; struct datarec total; /* defined in common_kern_user.h */ }; struct stats_record { struct record stats[1]; };
stats_recordãxdp_stats_mapã®ã¨ã³ããªãxdp_actionåæ°åä¿åã§ããããã«ä¿®æ£ããã
mapããxdp_actionã®åæ°åã ããã¨ã³ããªãåãåºãã
xdp_load_and_stats.c
@@ -97,7 +97,7 @@ struct record { }; struct stats_record { - struct record stats[1]; /* Assignment#2: Hint */ + struct record stats[XDP_ACTION_MAX]; };
xdp_load_and_stats.c
@@ -194,10 +194,11 @@ static bool map_collect(int fd, __u32 map_type, __u32 key, struct record *rec) static void stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec) { - /* Assignment#2: Collect other XDP actions stats */ - __u32 key = XDP_PASS; - - map_collect(map_fd, map_type, key, &stats_rec->stats[0]); + map_collect(map_fd, map_type, XDP_ABORTED, &stats_rec->stats[XDP_ABORTED]); + map_collect(map_fd, map_type, XDP_DROP, &stats_rec->stats[XDP_DROP]); + map_collect(map_fd, map_type, XDP_PASS, &stats_rec->stats[XDP_PASS]); + map_collect(map_fd, map_type, XDP_TX, &stats_rec->stats[XDP_TX]); + map_collect(map_fd, map_type, XDP_REDIRECT, &stats_rec->stats[XDP_REDIRECT]); } static void stats_poll(int map_fd, __u32 map_type, int interval)
statsã®åºåãä¿®æ£ãã
xdp_load_and_stats.c
@@ -123,14 +123,14 @@ static void stats_print(struct stats_record *stats_rec, double pps; /* packets per sec */ double bps; /* bits per sec */ - /* Assignment#2: Print other XDP actions stats */ - { + int xdpact = 0; + for (xdpact = 0; xdpact < XDP_ACTION_MAX; xdpact++) { char *fmt = "%-12s %'11lld pkts (%'10.0f pps)" " %'11lld bytes (%'6.0f bits/s)" " period:%f\n"; - const char *action = action2str(XDP_PASS); - rec = &stats_rec->stats[0]; - prev = &stats_prev->stats[0]; + const char *action = action2str(xdpact); + rec = &stats_rec->stats[xdpact]; + prev = &stats_prev->stats[xdpact]; period = calc_period(rec, prev); if (period == 0)
åããã¦ã¿ã
xdp_actionãã¨ã«statsãåºåãããããã ããkernelå´ã§åãããã°ã©ã ã«ã¯ã¿ãããã¦ãªãã®ã§ãXDP_PASSããã«ã¦ã³ã¿ã¯ä¸ãããªãã
$ sudo ./xdp_load_and_stats --dev=lo libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata Success: Loaded BPF-object(xdp_prog_kern.o) and used section(xdp_stats1_func) - XDP prog id:52 attached on device:lo(ifindex:1) Collecting stats from BPF map - BPF map (bpf_map_type:2) id:11 name:xdp_stats_map key_size:4 value_size:16 max_entries:5 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250372 XDP_DROP 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250382 XDP_PASS 1 pkts ( 4 pps) 90 bytes ( 2876 bits/s) period:0.250382 XDP_TX 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250382 XDP_REDIRECT 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250382 XDP_ABORTED 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000472 XDP_DROP 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000472 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 360 bits/s) period:2.000472 XDP_TX 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000472 XDP_REDIRECT 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000472 XDP_ABORTED 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000388 XDP_DROP 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000387 XDP_PASS 2 pkts ( 0 pps) 180 bytes ( 0 bits/s) period:2.000387 XDP_TX 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000386 XDP_REDIRECT 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000386
Assignment3
atomicãªå ç®å¦çã¯ã³ã¹ããé«ãã®ã§ããããããã¦ãBPF_MAP_TYPE_PERCPU_ARRAYã®eBPFãããã使ãã¨ãããã®ã
atomicãªå ç®ã¨ã¯ä½ãï¼
__sync_fetch_and_addã¯ã³ã³ãã¤ã©ã§æä¾ãããçµã¿è¾¼ã¿é¢æ°ã§ããã使ãã¨atomicãªå ç®ãå®ç¾ã§ããã
#ifndef lock_xadd #define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val)) #endif
BPF_MAP_TYPE_PERCPU_ARRAYã¨ã¯ä½ãï¼
CPUæ¯ã«ã¡ã¢ãªé åãå²ãå½ã¦ã¦ãããBPF_MAP_TYPE_ARRAYã
CPUæ¯ã«ã¡ã¢ãªé åãããããåå¨ãããããå¤ã®åãåºããããã¨ãã¯ä»¥ä¸ã®ããã«cpuãæå®ãã¦ããå¿ è¦ãããã
void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
解ãã¦ã¿ã
MAPã®ç¨®é¡ãå¤æ´ãã
xdp_prog_kern.c
@@ -9,7 +9,7 @@ * - The idea is to keep stats per (enum) xdp_action */ struct { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __type(key, __u32); __type(value, struct datarec); __uint(max_entries, XDP_ACTION_MAX);
atomicãªå ç®ãããã¦ãæ®éã«å ç®ãã
xdp_prog_kern.c
@@ -43,13 +43,9 @@ int xdp_stats1_func(struct xdp_md *ctx) /* Multiple CPUs can access data record. Thus, the accounting needs to * use an atomic operation. */ - lock_xadd(&rec->rx_packets, 1); + rec->rx_packets++; __u64 bytes = data_end - data; - lock_xadd(&rec->rx_bytes, bytes); - /* - * Assignment#3: Avoid the atomic operation - * - Hint there is a map type named BPF_MAP_TYPE_PERCPU_ARRAY - */ + rec->rx_bytes += bytes; return XDP_PASS; }
ã¦ã¼ã¶ç©ºéã§ã®å¤ã®éè¨
(åé¡æã§cpuãã¨ã®å¤ã®éè¨ãè¡ãã³ã¼ããæä¾ããã¦ããã®ã§ãããããã®ã¾ã¾ä½¿ã£ã¦ããã)
xdp_load_and_stats.c
@@ -158,11 +158,26 @@ void map_get_value_array(int fd, __u32 key, struct datarec *value) /* BPF_MAP_TYPE_PERCPU_ARRAY */ void map_get_value_percpu_array(int fd, __u32 key, struct datarec *value) { - /* For percpu maps, userspace gets a value per possible CPU */ - // unsigned int nr_cpus = libbpf_num_possible_cpus(); - // struct datarec values[nr_cpus]; + /* For percpu maps, user space gets a value per possible CPU */ + unsigned int nr_cpus = libbpf_num_possible_cpus(); + struct datarec values[nr_cpus]; + __u64 sum_bytes = 0; + __u64 sum_pkts = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return; + struct datarec values[nr_cpus]; + __u64 sum_bytes = 0; + __u64 sum_pkts = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return; + } - fprintf(stderr, "ERR: %s() not impl. see assignment#3", __func__); + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum_pkts += values[i].rx_packets; + sum_bytes += values[i].rx_bytes; + } + value->rx_packets = sum_pkts; + value->rx_bytes = sum_bytes; } static bool map_collect(int fd, __u32 map_type, __u32 key, struct record *rec) @@ -177,7 +192,8 @@ static bool map_collect(int fd, __u32 map_type, __u32 key, struct record * rec) map_get_value_array(fd, key, &value); break; case BPF_MAP_TYPE_PERCPU_ARRAY: - /* fall-through */ + map_get_value_percpu_array(fd, key, &value); + break; default: fprintf(stderr, "ERR: Unknown map_type(%u) cannot handle\n", map_type);
解説
ã«ã¼ãã«ç©ºé
- é©å½ãªCPUã§ããããæ´æ°ããã
ã¦ã¼ã¶ç©ºé
- åCPUã«åå¥ã«å²å½ãã¦ããããããæã¤ã«ã¦ã³ã¿ãå ¨ã¦è¶³ãåããã¦ããããstatsã¨ãã¦è¡¨ç¤ºããã
åããã¦ã¿ã
$ sudo ./xdp_load_and_stats --dev=lo libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata libbpf: elf: skipping unrecognized data section(7) xdp_metadata Success: Loaded BPF-object(xdp_prog_kern.o) and used section(xdp_stats1_func) - XDP prog id:74 attached on device:lo(ifindex:1) Collecting stats from BPF map - BPF map (bpf_map_type:6) id:19 name:xdp_stats_map key_size:4 value_size:16 max_entries:5 XDP-action XDP_ABORTED 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250211 XDP_DROP 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250183 XDP_PASS 1 pkts ( 4 pps) 90 bytes ( 2878 bits/s) period:0.250184 XDP_TX 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250184 XDP_REDIRECT 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:0.250184 XDP_ABORTED 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000381 XDP_DROP 0 pkts ( 0 pps) 0 bytes ( 0 bits/s) period:2.000383
vim-lsp/clangd/bear/compdb
clangd
clangdã¯clangã®LSPãµã¼ãã
ä¾ãã°ãMakefileã§-Iãªãã·ã§ã³ã¨ãã使ã£ã¦ã¦ãããã«ãã£ã¦ã¨ãã£ã¿ãincludeãã¹ãæ£ããè¦ã¤ããããªãã¦ãã¨ã©ã¼ãåºãã¿ãããªåé¡ã解決ã§ããã
vim-lsp
LSPã¯ã©ã¤ã¢ã³ãã¯vim-lspãã¤ãã£ã¦ããã®è¨å®ã¯vim-lsp-settingsã«ä»»ãã¦ããã
bear/compdb
# bear -- <BUILD COMMAND> bear -- make
ä¸è¨ãå®è¡ããã¨compile_commands.jsonã¨ããã®ãã«ã¬ã³ããã£ã¬ã¯ããªã«çæãããã
compile_commands.jsonãè¦ãã³ã³ãã¤ã«æ
å ±ã®ãã¼ã¿ãã¼ã¹ã¨æãã°è¯ãã
ããã¦ãbearã¯makeã®å®è¡å
容ãè¦ã¦ããã¦ãcompile_commands.jsonãçæãã¦ãããã
compile_commands.jsonã®ãããã£ã¬ã¯ããªä»¥ä¸ã§ã¯ã¨ãã£ã¿ã¯clangdçµç±ã§é©åã«includeãã¹çã解決ã§ããããã«ãªãã
ããããã¡ã¤ã«ã«ã¤ãã¦LSPã§é¢åã¿ã¦ãããããã«ã¯compdbã使ããã 以ä¸ãå®è¡ããã¨compile_commands.jsonã«ããããã¡ã¤ã«ã®æ å ±ã追è¨ãããã
compdb list | sponge compile_commands.json
ãããã¼ãã¡ã¤ã«ä¸ã«æªå®ç¾©ã®åãããåé¡
ãããã¼ãã¡ã¤ã«ã¨ãã ã¨ããããã¼ãã¡ã¤ã«åä½ã§ã¯æªå®ç¾©ã®åã¨ããããå ´åããã(ãã®ããããã¡ã¤ã«ãä¾åããä»ã®ãããã¼ãã¡ã¤ã«ãincludeãã¦ãªãå ´åç)ã ãã®ç¶æ ã§æçµçãªãã«ãã§ã¯åé¡ãªãã¦ããvimã§ãããããããã¼ãã¡ã¤ã«ãéãã¨LSPãã¨ã©ã¼ãåºãã¦ãã¾ãã ããã«å¯¾ãããã¾ã解決æ¹æ³ãããããªãã®ã§ãç¾ç¶ã ã¨ãcompile_commands.jsonã®è©²å½ã®ãããã¼ãã¡ã¤ã«ã«é¢ããè¨è¿°ã®é¨åã«gccã®-includeãªãã·ã§ã³ãæåã§è¿½è¨ãã¦ç¡çããä¾åã解決ããã¦ããã
perfããã¾ããããªãã£ãã¡ã¢
xdp_exceptionã®ã¤ãã³ãã確èªãããã¦ãperfãå®è¡ãããã以ä¸ã®ããã«ã¨ã©ã¼ã«ééããã
sudo perf record -a -e xdp:xdp_exception sleep 4 event syntax error: 'xdp:xdp_exception' \___ unsupported tracepoint libtraceevent is necessary for tracepoint support Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events
以ä¸ã確èªãããã¨
- perf listã®åºåã«ã¯xdp:xdp_exceptionã¯ã¡ããã¨è¼ã£ã¦ããã
- libtraceeventããªããã¨æã£ã¦ã¿ã¦èª¿ã¹ãããã¡ããã¨libtraceevent-devããã±ã¼ã¸ã¯ã¤ã³ã¹ãã¼ã«ããã¦ããã
åå ããããªãã®ã§ãã¯ã¼ã¯ã¢ã©ã¦ã³ãã¨ãã¦ä»¥ä¸ã§xdp_exceptionã®ã¤ãã³ãã確èªãããã¨ã«ããã
sudo trace-cmd record -e 'xdp:xdp_exception' sleep 4 sudo trace-cmd report