diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 8 | ||||
-rw-r--r-- | lib/Kconfig.debug | 9 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/dim/Makefile | 9 | ||||
-rw-r--r-- | lib/dim/dim.c | 83 | ||||
-rw-r--r-- | lib/dim/net_dim.c | 190 | ||||
-rw-r--r-- | lib/objagg.c | 6 | ||||
-rw-r--r-- | lib/test_blackhole_dev.c | 100 |
8 files changed, 403 insertions, 4 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 8c8eefc5e54c..52a7b2e6fb74 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -562,6 +562,14 @@ config SIGNATURE Digital signature verification. Currently only RSA is supported. Implementation is done using GnuPG MPI library +config DIMLIB + bool "DIM library" + default y + help + Dynamic Interrupt Moderation library. + Implements an algorithm for dynamically change CQ modertion values + according to run time performance. + # # libfdt files, only selected if needed. # diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0c5f6ca9a5c..d4c8c9323aa4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1929,6 +1929,15 @@ config TEST_BPF If unsure, say N. +config TEST_BLACKHOLE_DEV + tristate "Test blackhole netdev functionality" + depends on m && NET + help + This builds the "test_blackhole_dev" module that validates the + data path through this blackhole netdev. + + If unsure, say N. + config FIND_BIT_BENCHMARK tristate "Test find_bit functions" help diff --git a/lib/Makefile b/lib/Makefile index d3daedf93c5a..fdd56bc219b8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -91,6 +91,7 @@ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o +obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ @@ -202,6 +203,7 @@ obj-$(CONFIG_GLOB) += glob.o obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_MPILIB) += mpi/ +obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o diff --git a/lib/dim/Makefile b/lib/dim/Makefile new file mode 100644 index 000000000000..160afe288df0 --- /dev/null +++ b/lib/dim/Makefile @@ -0,0 +1,9 @@ +# +# DIM Dynamic Interrupt Moderation library +# + +obj-$(CONFIG_DIMLIB) = net_dim.o + +net_dim-y = \ + dim.o \ + net_dim.o diff --git a/lib/dim/dim.c b/lib/dim/dim.c new file mode 100644 index 000000000000..439d641ec796 --- /dev/null +++ b/lib/dim/dim.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/dim.h> + +bool dim_on_top(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + return true; + case DIM_GOING_RIGHT: + return (dim->steps_left > 1) && (dim->steps_right == 1); + default: /* DIM_GOING_LEFT */ + return (dim->steps_right > 1) && (dim->steps_left == 1); + } +} +EXPORT_SYMBOL(dim_on_top); + +void dim_turn(struct dim *dim) +{ + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + dim->tune_state = DIM_GOING_LEFT; + dim->steps_left = 0; + break; + case DIM_GOING_LEFT: + dim->tune_state = DIM_GOING_RIGHT; + dim->steps_right = 0; + break; + } +} +EXPORT_SYMBOL(dim_turn); + +void dim_park_on_top(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tired = 0; + dim->tune_state = DIM_PARKING_ON_TOP; +} +EXPORT_SYMBOL(dim_park_on_top); + +void dim_park_tired(struct dim *dim) +{ + dim->steps_right = 0; + dim->steps_left = 0; + dim->tune_state = DIM_PARKING_TIRED; +} +EXPORT_SYMBOL(dim_park_tired); + +void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, + struct dim_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); + u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr, + start->comp_ctr); + + if (!delta_us) + return; + + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, + delta_us); + curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us); + if (curr_stats->epms != 0) + curr_stats->cpe_ratio = + (curr_stats->cpms * 100) / curr_stats->epms; + else + curr_stats->cpe_ratio = 0; + +} +EXPORT_SYMBOL(dim_calc_stats); diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c new file mode 100644 index 000000000000..5bcc902c5388 --- /dev/null +++ b/lib/dim/net_dim.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/dim.h> + +struct dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_rx_moderation); + +struct dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_rx_moderation); + +struct dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) +{ + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} +EXPORT_SYMBOL(net_dim_get_tx_moderation); + +struct dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); +} +EXPORT_SYMBOL(net_dim_get_def_tx_moderation); + +static int net_dim_step(struct dim *dim) +{ + if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) + return DIM_TOO_TIRED; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + case DIM_PARKING_TIRED: + break; + case DIM_GOING_RIGHT: + if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) + return DIM_ON_EDGE; + dim->profile_ix++; + dim->steps_right++; + break; + case DIM_GOING_LEFT: + if (dim->profile_ix == 0) + return DIM_ON_EDGE; + dim->profile_ix--; + dim->steps_left++; + break; + } + + dim->tired++; + return DIM_STEPPED; +} + +static void net_dim_exit_parking(struct dim *dim) +{ + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; + net_dim_step(dim); +} + +static int net_dim_stats_compare(struct dim_stats *curr, + struct dim_stats *prev) +{ + if (!prev->bpms) + return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->ppms) + return curr->ppms ? DIM_STATS_BETTER : + DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + if (!prev->epms) + return DIM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : + DIM_STATS_WORSE; + + return DIM_STATS_SAME; +} + +static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) +{ + int prev_state = dim->tune_state; + int prev_ix = dim->profile_ix; + int stats_res; + int step_res; + + switch (dim->tune_state) { + case DIM_PARKING_ON_TOP: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_SAME) + net_dim_exit_parking(dim); + break; + + case DIM_PARKING_TIRED: + dim->tired--; + if (!dim->tired) + net_dim_exit_parking(dim); + break; + + case DIM_GOING_RIGHT: + case DIM_GOING_LEFT: + stats_res = net_dim_stats_compare(curr_stats, + &dim->prev_stats); + if (stats_res != DIM_STATS_BETTER) + dim_turn(dim); + + if (dim_on_top(dim)) { + dim_park_on_top(dim); + break; + } + + step_res = net_dim_step(dim); + switch (step_res) { + case DIM_ON_EDGE: + dim_park_on_top(dim); + break; + case DIM_TOO_TIRED: + dim_park_tired(dim); + break; + } + + break; + } + + if (prev_state != DIM_PARKING_ON_TOP || + dim->tune_state != DIM_PARKING_ON_TOP) + dim->prev_stats = *curr_stats; + + return dim->profile_ix != prev_ix; +} + +void net_dim(struct dim *dim, struct dim_sample end_sample) +{ + struct dim_stats curr_stats; + u16 nevents; + + switch (dim->state) { + case DIM_MEASURE_IN_PROGRESS: + nevents = BIT_GAP(BITS_PER_TYPE(u16), + end_sample.event_ctr, + dim->start_sample.event_ctr); + if (nevents < DIM_NEVENTS) + break; + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); + if (net_dim_decision(&curr_stats, dim)) { + dim->state = DIM_APPLY_NEW_PROFILE; + schedule_work(&dim->work); + break; + } + /* fall through */ + case DIM_START_MEASURE: + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, + end_sample.byte_ctr, &dim->start_sample); + dim->state = DIM_MEASURE_IN_PROGRESS; + break; + case DIM_APPLY_NEW_PROFILE: + break; + } +} +EXPORT_SYMBOL(net_dim); diff --git a/lib/objagg.c b/lib/objagg.c index 576be22e86de..55621fb82e0a 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -605,12 +605,10 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) { struct objagg_stats *objagg_stats; struct objagg_obj *objagg_obj; - size_t alloc_size; int i; - alloc_size = sizeof(*objagg_stats) + - sizeof(objagg_stats->stats_info[0]) * objagg->obj_count; - objagg_stats = kzalloc(alloc_size, GFP_KERNEL); + objagg_stats = kzalloc(struct_size(objagg_stats, stats_info, + objagg->obj_count), GFP_KERNEL); if (!objagg_stats) return ERR_PTR(-ENOMEM); diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c new file mode 100644 index 000000000000..4c40580a99a3 --- /dev/null +++ b/lib/test_blackhole_dev.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This module tests the blackhole_dev that is created during the + * net subsystem initialization. The test this module performs is + * by injecting an skb into the stack with skb->dev as the + * blackhole_dev and expects kernel to behave in a sane manner + * (in other words, *not crash*)! + * + * Copyright (c) 2018, Mahesh Bandewar <maheshb@google.com> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/udp.h> +#include <linux/ipv6.h> + +#include <net/dst.h> + +#define SKB_SIZE 256 +#define HEAD_SIZE (14+40+8) /* Ether + IPv6 + UDP */ +#define TAIL_SIZE 32 /* random tail-room */ + +#define UDP_PORT 1234 + +static int __init test_blackholedev_init(void) +{ + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *ethh; + struct udphdr *uh; + int data_len; + int ret; + + skb = alloc_skb(SKB_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* Reserve head-room for the headers */ + skb_reserve(skb, HEAD_SIZE); + + /* Add data to the skb */ + data_len = SKB_SIZE - (HEAD_SIZE + TAIL_SIZE); + memset(__skb_put(skb, data_len), 0xf, data_len); + + /* Add protocol data */ + /* (Transport) UDP */ + uh = (struct udphdr *)skb_push(skb, sizeof(struct udphdr)); + skb_set_transport_header(skb, 0); + uh->source = uh->dest = htons(UDP_PORT); + uh->len = htons(data_len); + uh->check = 0; + /* (Network) IPv6 */ + ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr)); + skb_set_network_header(skb, 0); + ip6h->hop_limit = 32; + ip6h->payload_len = data_len + sizeof(struct udphdr); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->saddr = in6addr_loopback; + ip6h->daddr = in6addr_loopback; + /* Ether */ + ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr)); + skb_set_mac_header(skb, 0); + + skb->protocol = htons(ETH_P_IPV6); + skb->pkt_type = PACKET_HOST; + skb->dev = blackhole_netdev; + + /* Now attempt to send the packet */ + ret = dev_queue_xmit(skb); + + switch (ret) { + case NET_XMIT_SUCCESS: + pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n"); + break; + case NET_XMIT_DROP: + pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n"); + break; + case NET_XMIT_CN: + pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n"); + break; + default: + pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret); + } + + return 0; +} + +static void __exit test_blackholedev_exit(void) +{ + pr_warn("test_blackholedev module terminating.\n"); +} + +module_init(test_blackholedev_init); +module_exit(test_blackholedev_exit); + +MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); +MODULE_LICENSE("GPL"); |