From 75d65a425c0163d3ec476ddc12b51087217a070c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2011 13:50:07 -0700 Subject: hlist: remove software prefetching in hlist iterators They not only increase the code footprint, they actually make things slower rather than faster. On internationally acclaimed benchmarks ("make -j16" on an already fully built kernel source tree) the hlist prefetching slows down the build by up to 1%. (Almost all of it comes from hlist_for_each_entry_rcu() as used by avc_has_perm_noaudit(), which is very hot due to all the pathname lookups to see if there is anything to do). The cause seems to be two-fold: - on at least some Intel cores, prefetch(NULL) ends up with some microarchitectural stall due to the TLB miss that it incurs. The hlist case triggers this very commonly, since the NULL pointer is the last entry in the list. - the prefetch appears to cause more D$ activity, probably because it prefetches hash list entries that are never actually used (because we ended the search early due to a hit). Regardless, the numbers clearly say that the implicit prefetching is simply a bad idea. If some _particular_ user of the hlist iterators wants to prefetch the next list entry, they can do so themselves explicitly, rather than depend on all list iterators doing so implicitly. Acked-by: Ingo Molnar Acked-by: David S. Miller Cc: linux-arch@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/rculist.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/rculist.h') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2dea94fc4402..900a97a44769 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, #define __hlist_for_each_rcu(pos, head) \ for (pos = rcu_dereference(hlist_first_rcu(head)); \ - pos && ({ prefetch(pos->next); 1; }); \ + pos; \ pos = rcu_dereference(hlist_next_rcu(pos))) /** @@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_next_rcu(pos))) @@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ for (pos = rcu_dereference_bh((head)->first); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_bh(pos->next)) @@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ for (pos = rcu_dereference((pos)->next); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) @@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ for (pos = rcu_dereference_bh((pos)->next); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_bh(pos->next)) -- cgit v1.2.3