From 7ec88e4be461590b5a3817460c34603f76d9b3ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:28 +0200 Subject: ntp/pps: use timespec64 for hardpps() There is only one user of the hardpps function in the kernel, so it makes sense to atomically change it over to using 64-bit timestamps for y2038 safety. In the hardpps implementation, we also need to change the pps_normtime structure, which is similar to struct timespec and also requires a 64-bit seconds portion. This introduces two temporary variables in pps_kc_event() to do the conversion, they will be removed again in the next step, which seemed preferable to having a larger patch changing it all at the same time. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 9d3f1a5b6178..39c25dbebfe8 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -152,7 +152,7 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) extern int do_adjtimex(struct timex *); -extern void hardpps(const struct timespec *, const struct timespec *); +extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); void ntp_notify_cmos_timer(void); -- cgit v1.2.3 From 071eee45b1650d53d21c636d344bdcebd4577ed2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:29 +0200 Subject: ntp/pps: replace getnstime_raw_and_real with 64-bit version There is exactly one caller of getnstime_raw_and_real in the kernel, which is the pps_get_ts function. This changes the caller and the implementation to work on timespec64 types rather than timespec, to avoid the time_t overflow on 32-bit architectures. For consistency with the other new functions (ktime_get_seconds, ktime_get_real_*, ...), I'm renaming the function to ktime_get_raw_and_real_ts64. We still need to convert from the internal 64-bit type to 32 bit types in the caller, but this conversion is now pushed out from getnstime_raw_and_real to pps_get_ts. A follow-up patch changes the remaining pps code to completely avoid the conversion. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/pps_kernel.h | 7 ++++++- include/linux/timekeeping.h | 4 ++-- kernel/time/timekeeping.c | 12 ++++++------ 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 1d2cd21242e8..b2fbd62ab18d 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -115,7 +115,12 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, static inline void pps_get_ts(struct pps_event_time *ts) { - getnstime_raw_and_real(&ts->ts_raw, &ts->ts_real); + struct timespec64 raw, real; + + ktime_get_raw_and_real_ts64(&raw, &real); + + ts->ts_raw = timespec64_to_timespec(raw); + ts->ts_real = timespec64_to_timespec(real); } #else /* CONFIG_NTP_PPS */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ba0ae09cbb21..ec89d846324c 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -263,8 +263,8 @@ extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); /* * PPS accessor */ -extern void getnstime_raw_and_real(struct timespec *ts_raw, - struct timespec *ts_real); +extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, + struct timespec64 *ts_real); /* * Persistent clock related interfaces diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 177188b11a2e..274ed5e88456 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -849,7 +849,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds); #ifdef CONFIG_NTP_PPS /** - * getnstime_raw_and_real - get day and raw monotonic time in timespec format + * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format * @ts_raw: pointer to the timespec to be set to raw monotonic time * @ts_real: pointer to the timespec to be set to the time of day * @@ -857,7 +857,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds); * same time atomically and stores the resulting timestamps in timespec * format. */ -void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) +void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; @@ -868,7 +868,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) do { seq = read_seqcount_begin(&tk_core.seq); - *ts_raw = timespec64_to_timespec(tk->raw_time); + *ts_raw = tk->raw_time; ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; @@ -877,10 +877,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) } while (read_seqcount_retry(&tk_core.seq, seq)); - timespec_add_ns(ts_raw, nsecs_raw); - timespec_add_ns(ts_real, nsecs_real); + timespec64_add_ns(ts_raw, nsecs_raw); + timespec64_add_ns(ts_real, nsecs_real); } -EXPORT_SYMBOL(getnstime_raw_and_real); +EXPORT_SYMBOL(ktime_get_raw_and_real_ts64); #endif /* CONFIG_NTP_PPS */ -- cgit v1.2.3 From ade1bdffe90e59cd257cb9bd4f5abe4de5f14911 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:31 +0200 Subject: ntp/pps: use y2038 safe types in pps_event_time The pps_event_time uses two 'timespec' structures internally, which suffer from the y2038 problem. The uses of this structure are fairly self-contained in the pps code, so this replaces them all at once. Unfortunately, this includes the sfc ethernet driver aside from the pps subsystem, so we change that one as well. Both touch the same data structure, and there probably is no good way to split the patch into smaller units. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- drivers/net/ethernet/sfc/ptp.c | 16 ++++++++-------- drivers/pps/kapi.c | 4 ++-- drivers/pps/kc.c | 4 +--- include/linux/pps_kernel.h | 21 ++++++++------------- 4 files changed, 19 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index ad62615a93dc..fe849dbf9f80 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -646,28 +646,28 @@ static void efx_ptp_send_times(struct efx_nic *efx, struct pps_event_time *last_time) { struct pps_event_time now; - struct timespec limit; + struct timespec64 limit; struct efx_ptp_data *ptp = efx->ptp_data; - struct timespec start; + struct timespec64 start; int *mc_running = ptp->start.addr; pps_get_ts(&now); start = now.ts_real; limit = now.ts_real; - timespec_add_ns(&limit, SYNCHRONISE_PERIOD_NS); + timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS); /* Write host time for specified period or until MC is done */ - while ((timespec_compare(&now.ts_real, &limit) < 0) && + while ((timespec64_compare(&now.ts_real, &limit) < 0) && ACCESS_ONCE(*mc_running)) { - struct timespec update_time; + struct timespec64 update_time; unsigned int host_time; /* Don't update continuously to avoid saturating the PCIe bus */ update_time = now.ts_real; - timespec_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS); + timespec64_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS); do { pps_get_ts(&now); - } while ((timespec_compare(&now.ts_real, &update_time) < 0) && + } while ((timespec64_compare(&now.ts_real, &update_time) < 0) && ACCESS_ONCE(*mc_running)); /* Synchronise NIC with single word of time only */ @@ -723,7 +723,7 @@ efx_ptp_process_times(struct efx_nic *efx, MCDI_DECLARE_STRUCT_PTR(synch_buf), struct efx_ptp_data *ptp = efx->ptp_data; u32 last_sec; u32 start_sec; - struct timespec delta; + struct timespec64 delta; ktime_t mc_time; if (number_readings == 0) diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c index cdad4d95b20e..805c749ac1ad 100644 --- a/drivers/pps/kapi.c +++ b/drivers/pps/kapi.c @@ -179,8 +179,8 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, /* check event type */ BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0); - dev_dbg(pps->dev, "PPS event at %ld.%09ld\n", - ts->ts_real.tv_sec, ts->ts_real.tv_nsec); + dev_dbg(pps->dev, "PPS event at %lld.%09ld\n", + (s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec); timespec_to_pps_ktime(&ts_real, ts->ts_real); diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c index a16cea2ba980..e219db1f1c84 100644 --- a/drivers/pps/kc.c +++ b/drivers/pps/kc.c @@ -113,12 +113,10 @@ void pps_kc_event(struct pps_device *pps, struct pps_event_time *ts, int event) { unsigned long flags; - struct timespec64 real = timespec_to_timespec64(ts->ts_real); - struct timespec64 raw = timespec_to_timespec64(ts->ts_raw); /* Pass some events to kernel consumer if activated */ spin_lock_irqsave(&pps_kc_hardpps_lock, flags); if (pps == pps_kc_hardpps_dev && event & pps_kc_hardpps_mode) - hardpps(&real, &raw); + hardpps(&ts->ts_real, &ts->ts_raw); spin_unlock_irqrestore(&pps_kc_hardpps_lock, flags); } diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index b2fbd62ab18d..54bf1484d41f 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -48,9 +48,9 @@ struct pps_source_info { struct pps_event_time { #ifdef CONFIG_NTP_PPS - struct timespec ts_raw; + struct timespec64 ts_raw; #endif /* CONFIG_NTP_PPS */ - struct timespec ts_real; + struct timespec64 ts_real; }; /* The main struct */ @@ -105,7 +105,7 @@ extern void pps_event(struct pps_device *pps, struct pps_device *pps_lookup_dev(void const *cookie); static inline void timespec_to_pps_ktime(struct pps_ktime *kt, - struct timespec ts) + struct timespec64 ts) { kt->sec = ts.tv_sec; kt->nsec = ts.tv_nsec; @@ -115,29 +115,24 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, static inline void pps_get_ts(struct pps_event_time *ts) { - struct timespec64 raw, real; - - ktime_get_raw_and_real_ts64(&raw, &real); - - ts->ts_raw = timespec64_to_timespec(raw); - ts->ts_real = timespec64_to_timespec(real); + ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); } #else /* CONFIG_NTP_PPS */ static inline void pps_get_ts(struct pps_event_time *ts) { - getnstimeofday(&ts->ts_real); + ktime_get_real_ts64(&ts->ts_real); } #endif /* CONFIG_NTP_PPS */ /* Subtract known time delay from PPS event time(s) */ -static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec delta) +static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) { - ts->ts_real = timespec_sub(ts->ts_real, delta); + ts->ts_real = timespec64_sub(ts->ts_real, delta); #ifdef CONFIG_NTP_PPS - ts->ts_raw = timespec_sub(ts->ts_raw, delta); + ts->ts_raw = timespec64_sub(ts->ts_raw, delta); #endif } -- cgit v1.2.3 From d5c373eb5610686162ff50429f63f4c00c554799 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:55 -0700 Subject: posix_cpu_timer: Convert cputimer->running to bool In the next patch in this series, a new field 'checking_timer' will be added to 'struct thread_group_cputimer'. Both this and the existing 'running' integer field are just used as boolean values. To save space in the structure, we can make both of these fields booleans. This is a preparatory patch to convert the existing running integer field to a boolean. Suggested-by: George Spelvin Signed-off-by: Jason Low Reviewed: George Spelvin Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-4-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- include/linux/init_task.h | 2 +- include/linux/sched.h | 6 +++--- kernel/fork.c | 2 +- kernel/time/posix-cpu-timers.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e38681f4912d..c43b80f3f875 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -59,7 +59,7 @@ extern struct fs_struct init_fs; .rlim = INIT_RLIMITS, \ .cputimer = { \ .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = 0, \ + .running = false, \ }, \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..6c8504ade2ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -617,15 +617,15 @@ struct task_cputime_atomic { /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. - * @running: non-zero when there are timers running and - * @cputime receives updates. + * @running: true when there are timers running and + * @cputime_atomic receives updates. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; - int running; + bool running; }; #include diff --git a/kernel/fork.c b/kernel/fork.c index 2845623fb582..6ac894244d39 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1101,7 +1101,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (cpu_limit != RLIM_INFINITY) { sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); - sig->cputimer.running = 1; + sig->cputimer.running = true; } /* The timer lists. */ diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 6f6e252ec761..2d58153074d9 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -249,7 +249,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) * but barriers are not required because update_gt_cputime() * can handle concurrent updates. */ - WRITE_ONCE(cputimer->running, 1); + WRITE_ONCE(cputimer->running, true); } sample_cputime_atomic(times, &cputimer->cputime_atomic); } @@ -918,7 +918,7 @@ static inline void stop_process_timers(struct signal_struct *sig) struct thread_group_cputimer *cputimer = &sig->cputimer; /* Turn off cputimer->running. This is done without locking. */ - WRITE_ONCE(cputimer->running, 0); + WRITE_ONCE(cputimer->running, false); } static u32 onecputick; -- cgit v1.2.3 From c8d75aa47dd585c9538a8205e9bb9847e12cfb84 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:56 -0700 Subject: posix_cpu_timer: Reduce unnecessary sighand lock contention It was found while running a database workload on large systems that significant time was spent trying to acquire the sighand lock. The issue was that whenever an itimer expired, many threads ended up simultaneously trying to send the signal. Most of the time, nothing happened after acquiring the sighand lock because another thread had just already sent the signal and updated the "next expire" time. The fastpath_timer_check() didn't help much since the "next expire" time was updated after the threads exit fastpath_timer_check(). This patch addresses this by having the thread_group_cputimer structure maintain a boolean to signify when a thread in the group is already checking for process wide timers, and adds extra logic in the fastpath to check the boolean. Signed-off-by: Jason Low Reviewed-by: Oleg Nesterov Reviewed-by: George Spelvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-5-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- include/linux/init_task.h | 1 + include/linux/sched.h | 3 +++ kernel/time/posix-cpu-timers.c | 26 ++++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index c43b80f3f875..810a34f60424 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -60,6 +60,7 @@ extern struct fs_struct init_fs; .cputimer = { \ .cputime_atomic = INIT_CPUTIME_ATOMIC, \ .running = false, \ + .checking_timer = false, \ }, \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6c8504ade2ba..f87559df5b75 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -619,6 +619,8 @@ struct task_cputime_atomic { * @cputime_atomic: atomic thread group interval timers. * @running: true when there are timers running and * @cputime_atomic receives updates. + * @checking_timer: true when a thread in the group is in the + * process of checking for thread group timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. @@ -626,6 +628,7 @@ struct task_cputime_atomic { struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; bool running; + bool checking_timer; }; #include diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2d58153074d9..f5e86d282d52 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -975,6 +975,12 @@ static void check_process_timers(struct task_struct *tsk, if (!READ_ONCE(tsk->signal->cputimer.running)) return; + /* + * Signify that a thread is checking for process timers. + * Write access to this field is protected by the sighand lock. + */ + sig->cputimer.checking_timer = true; + /* * Collect the current process totals. */ @@ -1029,6 +1035,8 @@ static void check_process_timers(struct task_struct *tsk, sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); + + sig->cputimer.checking_timer = false; } /* @@ -1142,8 +1150,22 @@ static inline int fastpath_timer_check(struct task_struct *tsk) } sig = tsk->signal; - /* Check if cputimer is running. This is accessed without locking. */ - if (READ_ONCE(sig->cputimer.running)) { + /* + * Check if thread group timers expired when the cputimer is + * running and no other thread in the group is already checking + * for thread group cputimers. These fields are read without the + * sighand lock. However, this is fine because this is meant to + * be a fastpath heuristic to determine whether we should try to + * acquire the sighand lock to check/handle timers. + * + * In the worst case scenario, if 'running' or 'checking_timer' gets + * set but the current thread doesn't see the change yet, we'll wait + * until the next thread in the group gets a scheduler interrupt to + * handle the timer. This isn't an issue in practice because these + * types of delays with signals actually getting sent are expected. + */ + if (READ_ONCE(sig->cputimer.running) && + !READ_ONCE(sig->cputimer.checking_timer)) { struct task_cputime group_sample; sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); -- cgit v1.2.3