From 0af02a8e356fc6d3b1eebb32fae7d35625127835 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:06 +0200 Subject: [PATCH 01/43] selftests/timers/posix_timers: Simplify error handling No point in returning to main() on fatal errors. Just exit right away. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen --- tools/testing/selftests/timers/posix_timers.c | 151 ++++++------------ 1 file changed, 52 insertions(+), 99 deletions(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 07c81c0093c0..38db82cf2a3a 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,20 @@ #define DELAY 2 #define USECS_PER_SEC 1000000 +static void __fatal_error(const char *test, const char *name, const char *what) +{ + char buf[64]; + + strerror_r(errno, buf, sizeof(buf)); + + if (name && strlen(name)) + ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf); + else + ksft_exit_fail_msg("%s %s %s\n", test, what, buf); +} + +#define fatal_error(name, what) __fatal_error(__func__, name, what) + static volatile int done; /* Busy loop in userspace to elapse ITIMER_VIRTUAL */ @@ -74,24 +89,13 @@ static int check_diff(struct timeval start, struct timeval end) return 0; } -static int check_itimer(int which) +static void check_itimer(int which, const char *name) { - const char *name; - int err; struct timeval start, end; struct itimerval val = { .it_value.tv_sec = DELAY, }; - if (which == ITIMER_VIRTUAL) - name = "ITIMER_VIRTUAL"; - else if (which == ITIMER_PROF) - name = "ITIMER_PROF"; - else if (which == ITIMER_REAL) - name = "ITIMER_REAL"; - else - return -1; - done = 0; if (which == ITIMER_VIRTUAL) @@ -101,17 +105,11 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); - err = setitimer(which, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (setitimer(which, &val, NULL) < 0) + fatal_error(name, "setitimer()"); if (which == ITIMER_VIRTUAL) user_loop(); @@ -120,68 +118,41 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) idle_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, "%s\n", name); - - return 0; } -static int check_timer_create(int which) +static void check_timer_create(int which, const char *name) { - const char *type; - int err; - timer_t id; struct timeval start, end; struct itimerspec val = { .it_value.tv_sec = DELAY, }; - - if (which == CLOCK_THREAD_CPUTIME_ID) { - type = "thread"; - } else if (which == CLOCK_PROCESS_CPUTIME_ID) { - type = "process"; - } else { - ksft_print_msg("Unknown timer_create() type %d\n", which); - return -1; - } + timer_t id; done = 0; - err = timer_create(which, NULL, &id); - if (err < 0) { - ksft_perror("Can't create timer"); - return -1; - } - signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (timer_create(which, NULL, &id) < 0) + fatal_error(name, "timer_create()"); - err = timer_settime(id, 0, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (signal(SIGALRM, sig_handler) == SIG_ERR) + fatal_error(name, "signal()"); + + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); + + if (timer_settime(id, 0, &val, NULL) < 0) + fatal_error(name, "timer_settime()"); user_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, - "timer_create() per %s\n", type); - - return 0; + "timer_create() per %s\n", name); } static pthread_t ctd_thread; @@ -209,15 +180,14 @@ static void *ctd_thread_func(void *arg) ctd_count = 100; if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id)) - return "Can't create timer\n"; + fatal_error(NULL, "timer_create()"); if (timer_settime(id, 0, &val, NULL)) - return "Can't set timer\n"; - + fatal_error(NULL, "timer_settime()"); while (ctd_count > 0 && !ctd_failed) ; if (timer_delete(id)) - return "Can't delete timer\n"; + fatal_error(NULL, "timer_delete()"); return NULL; } @@ -225,19 +195,16 @@ static void *ctd_thread_func(void *arg) /* * Test that only the running thread receives the timer signal. */ -static int check_timer_distribution(void) +static void check_timer_distribution(void) { - const char *errmsg; + if (signal(SIGALRM, ctd_sighandler) == SIG_ERR) + fatal_error(NULL, "signal()"); - signal(SIGALRM, ctd_sighandler); - - errmsg = "Can't create thread\n"; if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL)) - goto err; + fatal_error(NULL, "pthread_create()"); - errmsg = "Can't join thread\n"; - if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg) - goto err; + if (pthread_join(ctd_thread, NULL)) + fatal_error(NULL, "pthread_join()"); if (!ctd_failed) ksft_test_result_pass("check signal distribution\n"); @@ -245,10 +212,6 @@ static int check_timer_distribution(void) ksft_test_result_fail("check signal distribution\n"); else ksft_test_result_skip("check signal distribution (old kernel)\n"); - return 0; -err: - ksft_print_msg("%s", errmsg); - return -1; } int main(int argc, char **argv) @@ -259,17 +222,10 @@ int main(int argc, char **argv) ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); - if (check_itimer(ITIMER_VIRTUAL) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_PROF) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_REAL) < 0) - ksft_exit_fail(); - - if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) - ksft_exit_fail(); + check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL"); + check_itimer(ITIMER_PROF, "ITIMER_PROF"); + check_itimer(ITIMER_REAL, "ITIMER_REAL"); + check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); /* * It's unfortunately hard to reliably test a timer expiration @@ -280,11 +236,8 @@ int main(int argc, char **argv) * to ensure true parallelism. So test only one thread until we * find a better solution. */ - if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) - ksft_exit_fail(); - - if (check_timer_distribution() < 0) - ksft_exit_fail(); + check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_timer_distribution(); ksft_finished(); } From 45c4225c3dcc7db2c0cdbf889cc7a9c72a53f742 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:07 +0200 Subject: [PATCH 02/43] selftests/timers/posix_timers: Add SIG_IGN test Add a test case to validate correct behaviour vs. SIG_IGN. The posix specification states: "Setting a signal action to SIG_IGN for a signal that is pending shall cause the pending signal to be discarded, whether or not it is blocked." The kernel implements this in the signal handling code, but due to the way how posix timers are handling SIG_IGN for periodic timers, the behaviour after installing a real handler again is inconsistent and suprising. The following sequence is expected to deliver a signal: install_handler(SIG); block_signal(SIG); timer_create(...); <- Should send SIG timer_settime(value=100ms, interval=100ms); sleep(1); <- Timer expires and queues signal, timer is not rearmed as that should happen in the signal delivery path ignore_signal(SIG); <- Discards queued signal install_handler(SIG); <- Restore handler, should rearm but does not sleep(1); unblock_signal(SIG); <- Should deliver one signal with overrun count set in siginfo This fails because nothing rearms the timer when the signal handler is restored. Add a test for this case which fails until the signal and posix timer code is fixed. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- tools/testing/selftests/timers/posix_timers.c | 127 +++++++++++++++++- 1 file changed, 125 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 38db82cf2a3a..8a6139a1d27a 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -6,8 +6,9 @@ * * Kernel loop code stolen from Steven Rostedt */ - +#define _GNU_SOURCE #include +#include #include #include #include @@ -214,10 +215,129 @@ static void check_timer_distribution(void) ksft_test_result_skip("check signal distribution (old kernel)\n"); } +struct tmrsig { + int signals; + int overruns; +}; + +static void siginfo_handler(int sig, siginfo_t *si, void *uc) +{ + struct tmrsig *tsig = si ? si->si_ptr : NULL; + + if (tsig) { + tsig->signals++; + tsig->overruns += si->si_overrun; + } +} + +static void *ignore_thread(void *arg) +{ + unsigned int *tid = arg; + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + *tid = gettid(); + sleep(100); + + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + return NULL; +} + +static void check_sig_ign(int thread) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + unsigned int tid = 0; + struct sigaction sa; + struct sigevent sev; + pthread_t pthread; + timer_t timerid; + sigset_t set; + + if (thread) { + if (pthread_create(&pthread, NULL, ignore_thread, &tid)) + fatal_error(NULL, "pthread_create()"); + sleep(1); + } + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (thread) { + sev.sigev_notify = SIGEV_THREAD_ID; + sev._sigev_un._tid = tid; + } + + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + sleep(1); + + /* Set the signal to be ignored */ + if (signal(SIGUSR1, SIG_IGN) == SIG_ERR) + fatal_error(NULL, "signal(SIG_IGN)"); + + sleep(1); + + if (thread) { + /* Stop the thread first. No signal should be delivered to it */ + if (pthread_cancel(pthread)) + fatal_error(NULL, "pthread_cancel()"); + if (pthread_join(pthread, NULL)) + fatal_error(NULL, "pthread_join()"); + } + + /* Restore the handler */ + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + sleep(1); + + /* Unblock it, which should deliver the signal in the !thread case*/ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + if (!thread) { + ksft_test_result(tsig.signals == 1 && tsig.overruns == 29, + "check_sig_ign SIGEV_SIGNAL\n"); + } else { + ksft_test_result(tsig.signals == 0 && tsig.overruns == 0, + "check_sig_ign SIGEV_THREAD_ID\n"); + } +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(8); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); @@ -239,5 +359,8 @@ int main(int argc, char **argv) check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); check_timer_distribution(); + check_sig_ign(0); + check_sig_ign(1); + ksft_finished(); } From e65bb03e442751ccf1631382516dcca5b3a20122 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:09 +0200 Subject: [PATCH 03/43] selftests/timers/posix_timers: Validate signal rules Add a test case to validate correct behaviour vs. timer reprogramming and deletion. The handling of queued signals in case of timer reprogramming or deletion is inconsistent at best. POSIX does not really specify the behaviour for that: - "The effect of disarming or resetting a timer with pending expiration notifications is unspecified." - "The disposition of pending signals for the deleted timer is unspecified." In both cases it is reasonable to expect that pending signals are discarded. Especially in the reprogramming case it does not make sense to account for previous overruns or to deliver a signal for a timer which has been disarmed. Add tests to validate that no unexpected signals are delivered. They fail for now until the signal and posix timer code is updated. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- tools/testing/selftests/timers/posix_timers.c | 108 +++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 8a6139a1d27a..41b43d1327eb 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -334,10 +334,114 @@ static void check_sig_ign(int thread) } } +static void check_rearm(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + /* Reprogram the timer to single shot */ + its.it_value.tv_sec = 10; + its.it_value.tv_nsec = 0; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + ksft_test_result(!tsig.signals, "check_rearm\n"); +} + +static void check_delete(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + ksft_test_result(!tsig.signals, "check_delete\n"); +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(8); + ksft_set_plan(10); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); @@ -361,6 +465,8 @@ int main(int argc, char **argv) check_sig_ign(0); check_sig_ign(1); + check_rearm(); + check_delete(); ksft_finished(); } From 2c2b56132bb74b6b801dcb82f57489ae1cf81a91 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:10 +0200 Subject: [PATCH 04/43] selftests/timers/posix-timers: Validate SIGEV_NONE Posix timers with a delivery mode of SIGEV_NONE deliver no signals but the remaining expiry time must be readable via timer_gettime() for both one shot and interval timers. That's implemented correctly for regular posix timers but broken for posix CPU timers. Add a self test so the fixes can be verified. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- tools/testing/selftests/timers/posix_timers.c | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 41b43d1327eb..097a132615f6 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #define DELAY 2 #define USECS_PER_SEC 1000000 +#define NSECS_PER_SEC 1000000000 static void __fatal_error(const char *test, const char *name, const char *what) { @@ -438,10 +440,57 @@ static void check_delete(void) ksft_test_result(!tsig.signals, "check_delete\n"); } +static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) +{ + int64_t diff; + + diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); + return diff; +} + +static void check_sigev_none(int which, const char *name) +{ + struct timespec start, now; + struct itimerspec its; + struct sigevent sev; + timer_t timerid; + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_NONE; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec, + "check_sigev_none %s\n", name); +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(10); + ksft_set_plan(12); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); @@ -467,6 +516,8 @@ int main(int argc, char **argv) check_sig_ign(1); check_rearm(); check_delete(); + check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); ksft_finished(); } From f924f868ed05d954d79db419e97ba11293110d52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:11 +0200 Subject: [PATCH 05/43] selftests/timers/posix-timers: Validate timer_gettime() timer_gettime() must return the correct expiry time for interval timers even when the timer is not armed, which is the case when a signal is pending but blocked. Works correctly for regular posix timers, but not for posix CPU timers. Add a selftest to validate the fixes. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- tools/testing/selftests/timers/posix_timers.c | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 097a132615f6..4c993dbbdbf2 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -487,10 +487,63 @@ static void check_sigev_none(int which, const char *name) "check_sigev_none %s\n", name); } +static void check_gettime(int which, const char *name) +{ + struct itimerspec its, prev; + struct timespec start, now; + struct sigevent sev; + timer_t timerid; + int wraps = 0; + sigset_t set; + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (timer_gettime(timerid, &prev)) + fatal_error(name, "timer_gettime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + if (its.it_value.tv_nsec > prev.it_value.tv_nsec) + wraps++; + prev = its; + + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(wraps > 1, "check_gettime %s\n", name); +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(12); + ksft_set_plan(15); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); @@ -518,6 +571,9 @@ int main(int argc, char **argv) check_delete(); check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); ksft_finished(); } From 73339b82f86521eeadbb781d8d7e04813cbd0998 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:12 +0200 Subject: [PATCH 06/43] selftests/timers/posix-timers: Validate overrun after unblock When a timer signal is blocked and later unblocked then one signal should be delivered with the correct number of overruns since the timer was queued. Validate that behaviour. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- tools/testing/selftests/timers/posix_timers.c | 61 ++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 4c993dbbdbf2..16bd49492efa 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -540,10 +540,66 @@ static void check_gettime(int which, const char *name) ksft_test_result(wraps > 1, "check_gettime %s\n", name); } +static void check_overrun(int which, const char *name) +{ + struct timespec start, now; + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(name, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + /* Unblock it, which should deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(tsig.signals == 1 && tsig.overruns == 9, + "check_overrun %s\n", name); +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(15); + ksft_set_plan(18); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); @@ -574,6 +630,9 @@ int main(int argc, char **argv) check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); + check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); ksft_finished(); } From d859704bf18519739c231403d53461e008eea4bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:14 +0200 Subject: [PATCH 07/43] posix-cpu-timers: Split up posix_cpu_timer_get() In preparation for addressing issues in the timer_get() and timer_set() functions of posix CPU timers. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 51 ++++++++++++++++------------------ 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e9c6f9d0e42c..558be8d53e47 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -785,33 +785,9 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, return ret; } -static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) +static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now) { - clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - struct cpu_timer *ctmr = &timer->it.cpu; - u64 now, expires = cpu_timer_getexpires(ctmr); - struct task_struct *p; - - rcu_read_lock(); - p = cpu_timer_task_rcu(timer); - if (!p) - goto out; - - /* - * Easy part: convert the reload time. - */ - itp->it_interval = ktime_to_timespec64(timer->it_interval); - - if (!expires) - goto out; - - /* - * Sample the clock to take the difference with the expiry time. - */ - if (CPUCLOCK_PERTHREAD(timer->it_clock)) - now = cpu_clock_sample(clkid, p); - else - now = cpu_clock_sample_group(clkid, p, false); + u64 expires = cpu_timer_getexpires(&timer->it.cpu); if (now < expires) { itp->it_value = ns_to_timespec64(expires - now); @@ -823,7 +799,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp itp->it_value.tv_nsec = 1; itp->it_value.tv_sec = 0; } -out: +} + +static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) +{ + clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); + struct task_struct *p; + u64 now; + + rcu_read_lock(); + p = cpu_timer_task_rcu(timer); + if (p) { + itp->it_interval = ktime_to_timespec64(timer->it_interval); + + if (cpu_timer_getexpires(&timer->it.cpu)) { + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + now = cpu_clock_sample(clkid, p); + else + now = cpu_clock_sample_group(clkid, p, false); + + __posix_cpu_timer_get(timer, itp, now); + } + } rcu_read_unlock(); } From b3e866b2dffbc36b31be7811ebded91ce82ecd10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:15 +0200 Subject: [PATCH 08/43] posix-cpu-timers: Save interval only for armed timers There is no point to return the interval for timers which have been disarmed. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 558be8d53e47..5aac0886bbc7 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -809,17 +809,15 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp rcu_read_lock(); p = cpu_timer_task_rcu(timer); - if (p) { + if (p && cpu_timer_getexpires(&timer->it.cpu)) { itp->it_interval = ktime_to_timespec64(timer->it_interval); - if (cpu_timer_getexpires(&timer->it.cpu)) { - if (CPUCLOCK_PERTHREAD(timer->it_clock)) - now = cpu_clock_sample(clkid, p); - else - now = cpu_clock_sample_group(clkid, p, false); + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + now = cpu_clock_sample(clkid, p); + else + now = cpu_clock_sample_group(clkid, p, false); - __posix_cpu_timer_get(timer, itp, now); - } + __posix_cpu_timer_get(timer, itp, now); } rcu_read_unlock(); } From 1c5028425793ea98fcb403852331664662d97226 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:16 +0200 Subject: [PATCH 09/43] posix-cpu-timers: Handle interval timers correctly in timer_get() timer_gettime() must return the remaining time to the next expiry of a timer or 0 if the timer is not armed and no signal pending, but posix CPU timers fail to forward a timer which is already expired. Add the required logic to address that. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5aac0886bbc7..92222ca89f49 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -787,8 +787,24 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now) { - u64 expires = cpu_timer_getexpires(&timer->it.cpu); + u64 expires, iv = timer->it_interval; + /* + * Make sure that interval timers are moved forward for the + * following cases: + * - Timers which expired, but the signal has not yet been + * delivered + */ + if (iv && (timer->it_requeue_pending & REQUEUE_PENDING)) + expires = bump_cpu_timer(timer, now); + else + expires = cpu_timer_getexpires(&timer->it.cpu); + + /* + * Expired interval timers cannot have a remaining time <= 0. + * The kernel has to move them forward so that the next + * timer expiry is > @now. + */ if (now < expires) { itp->it_value = ns_to_timespec64(expires - now); } else { From d786b8ba9f01b361817033d06766b2dadf619c60 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:17 +0200 Subject: [PATCH 10/43] posix-cpu-timers: Handle SIGEV_NONE timers correctly in timer_get() Expired SIGEV_NONE oneshot timers must return 0 nsec for the expiry time in timer_get(), but the posix CPU timer implementation returns 1 nsec. Add the missing conditional. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 92222ca89f49..d70879d502f9 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -787,15 +787,17 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now) { + bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; u64 expires, iv = timer->it_interval; /* * Make sure that interval timers are moved forward for the * following cases: + * - SIGEV_NONE timers which are never armed * - Timers which expired, but the signal has not yet been * delivered */ - if (iv && (timer->it_requeue_pending & REQUEUE_PENDING)) + if (iv && ((timer->it_requeue_pending & REQUEUE_PENDING) || sigev_none)) expires = bump_cpu_timer(timer, now); else expires = cpu_timer_getexpires(&timer->it.cpu); @@ -809,11 +811,13 @@ static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *i itp->it_value = ns_to_timespec64(expires - now); } else { /* - * The timer should have expired already, but the firing - * hasn't taken place yet. Say it's just about to expire. + * A single shot SIGEV_NONE timer must return 0, when it is + * expired! Timers which have a real signal delivery mode + * must return a remaining time greater than 0 because the + * signal has not yet been delivered. */ - itp->it_value.tv_nsec = 1; - itp->it_value.tv_sec = 0; + if (!sigev_none) + itp->it_value.tv_nsec = 1; } } From 5f9d4a1065949a64c107f93a89dc2b62f806c833 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jun 2024 13:16:02 +0200 Subject: [PATCH 11/43] posix-cpu-timers: Handle SIGEV_NONE timers correctly in timer_set() Expired SIGEV_NONE oneshot timers must return 0 nsec for the expiry time in timer_get(), but the posix CPU timer implementation returns 1 nsec. Add the missing conditional. This will be cleaned up in a follow up patch. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index d70879d502f9..cf8904423cba 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -623,6 +623,7 @@ static void cpu_timer_fire(struct k_itimer *timer) static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { + bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); u64 old_expires, new_expires, old_incr, val; struct cpu_timer *ctmr = &timer->it.cpu; @@ -706,7 +707,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, old_expires = exp - val; old->it_value = ns_to_timespec64(old_expires); } else { - old->it_value.tv_nsec = 1; + /* + * A single shot SIGEV_NONE timer must return 0, when it is + * expired! Timers which have a real signal delivery mode + * must return a remaining time greater than 0 because the + * signal has not yet been delivered. + */ + if (sigev_none) + old->it_value.tv_nsec = 0; + else + old->it_value.tv_nsec = 1; old->it_value.tv_sec = 0; } } From d471ff397c3571cfa648a20e7018aa04f8873cb3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jun 2024 13:17:08 +0200 Subject: [PATCH 12/43] posix-cpu-timers: Replace old expiry retrieval in posix_cpu_timer_set() Reuse the split out __posix_cpu_timer_get() function which does already the right thing. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 37 ++++++---------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index cf8904423cba..040d5c36184e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -614,6 +614,8 @@ static void cpu_timer_fire(struct k_itimer *timer) } } +static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now); + /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -623,7 +625,6 @@ static void cpu_timer_fire(struct k_itimer *timer) static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { - bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); u64 old_expires, new_expires, old_incr, val; struct cpu_timer *ctmr = &timer->it.cpu; @@ -689,37 +690,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, else val = cpu_clock_sample_group(clkid, p, true); + /* Retrieve the previous expiry value if requested. */ if (old) { - if (old_expires == 0) { - old->it_value.tv_sec = 0; - old->it_value.tv_nsec = 0; - } else { - /* - * Update the timer in case it has overrun already. - * If it has, we'll report it as having overrun and - * with the next reloaded timer already ticking, - * though we are swallowing that pending - * notification here to install the new setting. - */ - u64 exp = bump_cpu_timer(timer, val); - - if (val < exp) { - old_expires = exp - val; - old->it_value = ns_to_timespec64(old_expires); - } else { - /* - * A single shot SIGEV_NONE timer must return 0, when it is - * expired! Timers which have a real signal delivery mode - * must return a remaining time greater than 0 because the - * signal has not yet been delivered. - */ - if (sigev_none) - old->it_value.tv_nsec = 0; - else - old->it_value.tv_nsec = 1; - old->it_value.tv_sec = 0; - } - } + old->it_value = (struct timespec64){ }; + if (old_expires) + __posix_cpu_timer_get(timer, old, val); } if (unlikely(ret)) { From bd29d773ea8d2c7fc36dc3f70d4c9d1cf404aa4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:21 +0200 Subject: [PATCH 13/43] posix-cpu-timers: Do not arm SIGEV_NONE timers There is no point in arming SIGEV_NONE timers as they never deliver a signal. timer_gettime() is handling the expiry time correctly and that's all SIGEV_NONE timers care about. Prevent arming them and remove the expiry handler code which just disarms them. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 040d5c36184e..62e6b907c21d 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -584,12 +584,7 @@ static void cpu_timer_fire(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; - if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - /* - * User don't want any signal. - */ - cpu_timer_setexpires(ctmr, 0); - } else if (unlikely(timer->sigq == NULL)) { + if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. @@ -625,6 +620,7 @@ static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *i static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { + bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); u64 old_expires, new_expires, old_incr, val; struct cpu_timer *ctmr = &timer->it.cpu; @@ -688,7 +684,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, if (CPUCLOCK_PERTHREAD(timer->it_clock)) val = cpu_clock_sample(clkid, p); else - val = cpu_clock_sample_group(clkid, p, true); + val = cpu_clock_sample_group(clkid, p, !sigev_none); /* Retrieve the previous expiry value if requested. */ if (old) { @@ -708,19 +704,20 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, goto out; } - if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { + /* Convert relative expiry time to absolute */ + if (new_expires && !(timer_flags & TIMER_ABSTIME)) new_expires += val; - } + + /* Set the new expiry time (might be 0) */ + cpu_timer_setexpires(ctmr, new_expires); /* - * Install the new expiry time (or zero). - * For a timer with no notification action, we don't actually - * arm the timer (we'll just fake it for timer_gettime). + * Arm the timer if it is not disabled, the new expiry value has + * not yet expired and the timer requires signal delivery. + * SIGEV_NONE timers are never armed. */ - cpu_timer_setexpires(ctmr, new_expires); - if (new_expires != 0 && val < new_expires) { + if (!sigev_none && new_expires && val < new_expires) arm_timer(timer, p); - } unlock_task_sighand(p, &flags); /* @@ -739,7 +736,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (val >= new_expires) { + if (!sigev_none && val >= new_expires) { if (new_expires != 0) { /* * The designated time already passed, so we notify From c44462661e4c5967c5df451393af727d1886c8ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:22 +0200 Subject: [PATCH 14/43] posix-cpu-timers: Use @now instead of @val for clarity posix_cpu_timer_set() uses @val as variable for the current time. That's confusing at best. Use @now as anywhere else and rewrite the confusing comment about clock sampling. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 62e6b907c21d..b1a9a2ffddc9 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -622,7 +622,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, { bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - u64 old_expires, new_expires, old_incr, val; + u64 old_expires, new_expires, old_incr, now; struct cpu_timer *ctmr = &timer->it.cpu; struct sighand_struct *sighand; struct task_struct *p; @@ -674,23 +674,19 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, } /* - * We need to sample the current value to convert the new - * value from to relative and absolute, and to convert the - * old value from absolute to relative. To set a process - * timer, we need a sample to balance the thread expiry - * times (in arm_timer). With an absolute time, we must - * check if it's already passed. In short, we need a sample. + * Sample the current clock for saving the previous setting + * and for rearming the timer. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) - val = cpu_clock_sample(clkid, p); + now = cpu_clock_sample(clkid, p); else - val = cpu_clock_sample_group(clkid, p, !sigev_none); + now = cpu_clock_sample_group(clkid, p, !sigev_none); /* Retrieve the previous expiry value if requested. */ if (old) { old->it_value = (struct timespec64){ }; if (old_expires) - __posix_cpu_timer_get(timer, old, val); + __posix_cpu_timer_get(timer, old, now); } if (unlikely(ret)) { @@ -706,7 +702,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* Convert relative expiry time to absolute */ if (new_expires && !(timer_flags & TIMER_ABSTIME)) - new_expires += val; + new_expires += now; /* Set the new expiry time (might be 0) */ cpu_timer_setexpires(ctmr, new_expires); @@ -716,7 +712,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * not yet expired and the timer requires signal delivery. * SIGEV_NONE timers are never armed. */ - if (!sigev_none && new_expires && val < new_expires) + if (!sigev_none && new_expires && now < new_expires) arm_timer(timer, p); unlock_task_sighand(p, &flags); @@ -736,7 +732,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (!sigev_none && val >= new_expires) { + if (!sigev_none && now >= new_expires) { if (new_expires != 0) { /* * The designated time already passed, so we notify From 286bfaccea76e0bd3805ac6e77c8ec4a18ecb3fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:23 +0200 Subject: [PATCH 15/43] posix-cpu-timers: Remove incorrect comment in posix_cpu_timer_set() A leftover from historical code which describes fiction. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index b1a9a2ffddc9..eb28150c8d63 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -689,13 +689,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, __posix_cpu_timer_get(timer, old, now); } + /* Retry if the timer expiry is running concurrently */ if (unlikely(ret)) { - /* - * We are colliding with the timer actually firing. - * Punt after filling in the timer's old value, and - * disable this firing since we are already reporting - * it as an overrun (thanks to bump_cpu_timer above). - */ unlock_task_sighand(p, &flags); goto out; } From c20b99e3243f9e72b6fa0e260766adcba115f25b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:25 +0200 Subject: [PATCH 16/43] posix-cpu-timers: Simplify posix_cpu_timer_set() Avoid the late sighand lock/unlock dance when a timer is not armed to enforce reevaluation of the timer base so that the process wide CPU timer sampling can be disabled. Do it right at the point where the arming decision is made which already has sighand locked. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 44 +++++++++++++--------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index eb28150c8d63..c6fe017193b8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -705,10 +705,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* * Arm the timer if it is not disabled, the new expiry value has * not yet expired and the timer requires signal delivery. - * SIGEV_NONE timers are never armed. + * SIGEV_NONE timers are never armed. In case the timer is not + * armed, enforce the reevaluation of the timer base so that the + * process wide cputime counter can be disabled eventually. */ - if (!sigev_none && new_expires && now < new_expires) - arm_timer(timer, p); + if (likely(!sigev_none)) { + if (new_expires && now < new_expires) + arm_timer(timer, p); + else + trigger_base_recalc_expires(timer, p); + } unlock_task_sighand(p, &flags); /* @@ -727,30 +733,14 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (!sigev_none && now >= new_expires) { - if (new_expires != 0) { - /* - * The designated time already passed, so we notify - * immediately, even if the thread never runs to - * accumulate more time on this clock. - */ - cpu_timer_fire(timer); - } - - /* - * Make sure we don't keep around the process wide cputime - * counter or the tick dependency if they are not necessary. - */ - sighand = lock_task_sighand(p, &flags); - if (!sighand) - goto out; - - if (!cpu_timer_queued(ctmr)) - trigger_base_recalc_expires(timer, p); - - unlock_task_sighand(p, &flags); - } - out: + /* + * If the new expiry time was already in the past the timer was not + * queued. Fire it immediately even if the thread never runs to + * accumulate more time on this clock. + */ + if (!sigev_none && new_expires && now >= new_expires) + cpu_timer_fire(timer); +out: rcu_read_unlock(); if (old) old->it_interval = ns_to_timespec64(old_incr); From bfa408f03fc74bcfe8f275a434294bde06eabb00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:26 +0200 Subject: [PATCH 17/43] posix-timers: Retrieve interval in common timer_settime() code No point in doing this all over the place. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 10 ++-------- kernel/time/posix-timers.c | 5 ++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index c6fe017193b8..410797706ccf 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -622,8 +622,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, { bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - u64 old_expires, new_expires, old_incr, now; struct cpu_timer *ctmr = &timer->it.cpu; + u64 old_expires, new_expires, now; struct sighand_struct *sighand; struct task_struct *p; unsigned long flags; @@ -660,10 +660,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, return -ESRCH; } - /* - * Disarm any old timer after extracting its expiry time. - */ - old_incr = timer->it_interval; + /* Retrieve the current expiry time before disarming the timer */ old_expires = cpu_timer_getexpires(ctmr); if (unlikely(timer->it.cpu.firing)) { @@ -742,9 +739,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, cpu_timer_fire(timer); out: rcu_read_unlock(); - if (old) - old->it_interval = ns_to_timespec64(old_incr); - return ret; } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b924f0f096fa..056966b9d7da 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -904,7 +904,7 @@ static int do_timer_settime(timer_t timer_id, int tmr_flags, const struct k_clock *kc; struct k_itimer *timr; unsigned long flags; - int error = 0; + int error; if (!timespec64_valid(&new_spec64->it_interval) || !timespec64_valid(&new_spec64->it_value)) @@ -918,6 +918,9 @@ retry: if (!timr) return -EINVAL; + if (old_spec64) + old_spec64->it_interval = ktime_to_timespec64(timr->it_interval); + kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; From aca1dc0ce128a9b12640c39c0e035266bf9c9fa5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:27 +0200 Subject: [PATCH 18/43] posix-timers: Clear overrun in common_timer_set() Keeping the overrun count of the previous setup around is just wrong. The new setting has nothing to do with the previous one and has to start from a clean slate. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-timers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 056966b9d7da..53a993e4dbfa 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -881,6 +881,7 @@ int common_timer_set(struct k_itimer *timr, int flags, timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; + timr->it_overrun = -1LL; /* Switch off the timer when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) From 52dea0a15cc888e89f0144dca7b712fb56d12a28 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:28 +0200 Subject: [PATCH 19/43] posix-timers: Convert timer list to hlist No requirement for a real list. Spare a few bytes. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- fs/proc/base.c | 6 +++--- include/linux/posix-timers.h | 2 +- include/linux/sched/signal.h | 2 +- init/init_task.c | 2 +- kernel/fork.c | 2 +- kernel/time/posix-timers.c | 19 ++++++++----------- 6 files changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 72a1acd03675..dd579332a7f8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2456,13 +2456,13 @@ static void *timers_start(struct seq_file *m, loff_t *pos) if (!tp->sighand) return ERR_PTR(-ESRCH); - return seq_list_start(&tp->task->signal->posix_timers, *pos); + return seq_hlist_start(&tp->task->signal->posix_timers, *pos); } static void *timers_next(struct seq_file *m, void *v, loff_t *pos) { struct timers_private *tp = m->private; - return seq_list_next(v, &tp->task->signal->posix_timers, pos); + return seq_hlist_next(v, &tp->task->signal->posix_timers, pos); } static void timers_stop(struct seq_file *m, void *v) @@ -2491,7 +2491,7 @@ static int show_timer(struct seq_file *m, void *v) [SIGEV_THREAD] = "thread", }; - timer = list_entry((struct list_head *)v, struct k_itimer, list); + timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list); notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dc7b738de299..453691710839 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -158,7 +158,7 @@ static inline void posix_cputimers_init_work(void) { } * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct list_head list; + struct hlist_node list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0a0e23c45406..622fdef78e14 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -137,7 +137,7 @@ struct signal_struct { /* POSIX.1b Interval Timers */ unsigned int next_posix_timer_id; - struct list_head posix_timers; + struct hlist_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; diff --git a/init/init_task.c b/init/init_task.c index eeb110c65fe2..5d0399bc8d2f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -29,7 +29,7 @@ static struct signal_struct init_signals = { .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock), #ifdef CONFIG_POSIX_TIMERS - .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), + .posix_timers = HLIST_HEAD_INIT, .cputimer = { .cputime_atomic = INIT_CPUTIME_ATOMIC, }, diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f201..c1b343cba560 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1861,7 +1861,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) prev_cputime_init(&sig->prev_cputime); #ifdef CONFIG_POSIX_TIMERS - INIT_LIST_HEAD(&sig->posix_timers); + INIT_HLIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; #endif diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 53a993e4dbfa..fa75e9493fd6 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -515,7 +515,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, spin_lock_irq(¤t->sighand->siglock); /* This makes the timer valid in the hash table */ WRITE_ONCE(new_timer->it_signal, current->signal); - list_add(&new_timer->list, ¤t->signal->posix_timers); + hlist_add_head(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); /* * After unlocking sighand::siglock @new_timer is subject to @@ -1025,7 +1025,7 @@ retry_delete: } spin_lock(¤t->sighand->siglock); - list_del(&timer->list); + hlist_del(&timer->list); spin_unlock(¤t->sighand->siglock); /* * A concurrent lookup could check timer::it_signal lockless. It @@ -1075,7 +1075,7 @@ retry_delete: goto retry_delete; } - list_del(&timer->list); + hlist_del(&timer->list); /* * Setting timer::it_signal to NULL is technically not required @@ -1096,22 +1096,19 @@ retry_delete: */ void exit_itimers(struct task_struct *tsk) { - struct list_head timers; - struct k_itimer *tmr; + struct hlist_head timers; - if (list_empty(&tsk->signal->posix_timers)) + if (hlist_empty(&tsk->signal->posix_timers)) return; /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); - list_replace_init(&tsk->signal->posix_timers, &timers); + hlist_move_list(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); /* The timers are not longer accessible via tsk::signal */ - while (!list_empty(&timers)) { - tmr = list_first_entry(&timers, struct k_itimer, list); - itimer_delete(tmr); - } + while (!hlist_empty(&timers)) + itimer_delete(hlist_entry(timers.first, struct k_itimer, list)); } SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, From 20f13385b5836d00d64698748565fc6d3ce9b419 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:30 +0200 Subject: [PATCH 20/43] posix-timers: Consolidate timer setup hrtimer based and CPU timers have their own way to install the new interval and to reset overrun and signal handling related data. Create a helper function and do the same operation for all variants. This also makes the handling of the interval consistent. It's only stored when the timer is actually armed, i.e. timer->it_value != 0. Before that it was stored unconditionally for posix CPU timers and conditionally for the other posix timers. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 15 +-------------- kernel/time/posix-timers.c | 25 +++++++++++++++++++------ kernel/time/posix-timers.h | 1 + 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 410797706ccf..43cf3f63f31b 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -714,21 +714,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, } unlock_task_sighand(p, &flags); - /* - * Install the new reload setting, and - * set up the signal and overrun bookkeeping. - */ - timer->it_interval = timespec64_to_ktime(new->it_interval); - /* - * This acts as a modification timestamp for the timer, - * so any automatic reload attempt will punt on seeing - * that we have reset the timer manually. - */ - timer->it_requeue_pending = (timer->it_requeue_pending + 2) & - ~REQUEUE_PENDING; - timer->it_overrun_last = 0; - timer->it_overrun = -1; + posix_timer_set_common(timer, new); /* * If the new expiry time was already in the past the timer was not diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fa75e9493fd6..679be902be7c 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -856,6 +856,23 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer, return lock_timer(timer_id, flags); } +/* + * Set up the new interval and reset the signal delivery data + */ +void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting) +{ + if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec) + timer->it_interval = timespec64_to_ktime(new_setting->it_interval); + else + timer->it_interval = 0; + + /* Prevent reloading in case there is a signal pending */ + timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING; + /* Reset overrun accounting */ + timer->it_overrun_last = 0; + timer->it_overrun = -1LL; +} + /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, @@ -878,16 +895,12 @@ int common_timer_set(struct k_itimer *timr, int flags, return TIMER_RETRY; timr->it_active = 0; - timr->it_requeue_pending = (timr->it_requeue_pending + 2) & - ~REQUEUE_PENDING; - timr->it_overrun_last = 0; - timr->it_overrun = -1LL; + posix_timer_set_common(timr, new_setting); - /* Switch off the timer when it_value is zero */ + /* Keep timer disarmed when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; - timr->it_interval = timespec64_to_ktime(new_setting->it_interval); expires = timespec64_to_ktime(new_setting->it_value); if (flags & TIMER_ABSTIME) expires = timens_ktime_to_host(timr->it_clock, expires); diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index f32a2ebba9b8..630a77b2ec6a 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -42,4 +42,5 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting); +void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting); int common_timer_del(struct k_itimer *timer); From 24aea4cc483240ead3fdf581045a636dc7ea1352 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:31 +0200 Subject: [PATCH 21/43] posix-cpu-timers: Make k_itimer::it_active consistent Posix CPU timers are not updating k_itimer::it_active which makes it impossible to base decisions in the common posix timer code on it. Update it when queueing or dequeueing posix CPU timers. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Anna-Maria Behnsen Acked-by: Peter Zijlstra (Intel) --- kernel/time/posix-cpu-timers.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 43cf3f63f31b..bcc2b83e6666 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -453,6 +453,7 @@ static void disarm_timer(struct k_itimer *timer, struct task_struct *p) struct cpu_timer *ctmr = &timer->it.cpu; struct posix_cputimer_base *base; + timer->it_active = 0; if (!cpu_timer_dequeue(ctmr)) return; @@ -559,6 +560,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p) struct cpu_timer *ctmr = &timer->it.cpu; u64 newexp = cpu_timer_getexpires(ctmr); + timer->it_active = 1; if (!cpu_timer_enqueue(&base->tqhead, ctmr)) return; @@ -584,6 +586,7 @@ static void cpu_timer_fire(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; + timer->it_active = 0; if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, @@ -668,6 +671,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ret = TIMER_RETRY; } else { cpu_timer_dequeue(ctmr); + timer->it_active = 0; } /* From 566e2d82536cf77b5ed7c03f887e7c36bf86feaa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:32 +0200 Subject: [PATCH 22/43] posix-timers: Consolidate signal queueing Rename posix_timer_event() to posix_timer_queue_signal() as this is what the function is about. Consolidate the requeue pending and deactivation updates into that function as there is no point in doing this in all incarnations of posix timers. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- kernel/time/alarmtimer.c | 7 +------ kernel/time/posix-cpu-timers.c | 4 ++-- kernel/time/posix-timers.c | 21 +++++++++++---------- kernel/time/posix-timers.h | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5abfa4390673..76bd4fda3472 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -574,15 +574,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, it.alarm.alarmtimer); enum alarmtimer_restart result = ALARMTIMER_NORESTART; unsigned long flags; - int si_private = 0; spin_lock_irqsave(&ptr->it_lock, flags); - ptr->it_active = 0; - if (ptr->it_interval) - si_private = ++ptr->it_requeue_pending; - - if (posix_timer_event(ptr, si_private) && ptr->it_interval) { + if (posix_timer_queue_signal(ptr) && ptr->it_interval) { /* * Handle ignored signals and rearm the timer. This will go * away once we handle ignored signals proper. Ensure that diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index bcc2b83e6666..6bcee4704059 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -598,9 +598,9 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * One-shot timer. Clear it as soon as it's fired. */ - posix_timer_event(timer, 0); + posix_timer_queue_signal(timer); cpu_timer_setexpires(ctmr, 0); - } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { + } else if (posix_timer_queue_signal(timer)) { /* * The signal did not get queued because the signal * was ignored, so we won't get any callback to diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 679be902be7c..1cc830ef93a7 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -277,10 +277,17 @@ void posixtimer_rearm(struct kernel_siginfo *info) unlock_timer(timr, flags); } -int posix_timer_event(struct k_itimer *timr, int si_private) +int posix_timer_queue_signal(struct k_itimer *timr) { + int ret, si_private = 0; enum pid_type type; - int ret; + + lockdep_assert_held(&timr->it_lock); + + timr->it_active = 0; + if (timr->it_interval) + si_private = ++timr->it_requeue_pending; + /* * FIXME: if ->sigq is queued we can race with * dequeue_signal()->posixtimer_rearm(). @@ -309,19 +316,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { + struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer); enum hrtimer_restart ret = HRTIMER_NORESTART; - struct k_itimer *timr; unsigned long flags; - int si_private = 0; - timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); - timr->it_active = 0; - if (timr->it_interval != 0) - si_private = ++timr->it_requeue_pending; - - if (posix_timer_event(timr, si_private)) { + if (posix_timer_queue_signal(timr)) { /* * The signal was not queued due to SIG_IGN. As a * consequence the timer is not going to be rearmed from diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 630a77b2ec6a..4784ea65f685 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -36,7 +36,7 @@ extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; -int posix_timer_event(struct k_itimer *timr, int si_private); +int posix_timer_queue_signal(struct k_itimer *timr); void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); int common_timer_set(struct k_itimer *timr, int flags, From a2b80ce87a87fc18c594e74d13031d5e347b69cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:33 +0200 Subject: [PATCH 23/43] signal: Remove task argument from dequeue_signal() The task pointer which is handed to dequeue_signal() is always current. The argument along with the first comment about signalfd in that function is confusing at best. Remove it and use current internally. Update the stale comment for dequeue_signal() while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) --- fs/signalfd.c | 4 ++-- include/linux/sched/signal.h | 5 ++--- kernel/signal.c | 23 ++++++++++------------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index ec7b2da2477a..d0333bce015e 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -159,7 +159,7 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info DECLARE_WAITQUEUE(wait, current); spin_lock_irq(¤t->sighand->siglock); - ret = dequeue_signal(current, &ctx->sigmask, info, &type); + ret = dequeue_signal(&ctx->sigmask, info, &type); switch (ret) { case 0: if (!nonblock) @@ -174,7 +174,7 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(current, &ctx->sigmask, info, &type); + ret = dequeue_signal(&ctx->sigmask, info, &type); if (ret != 0) break; if (signal_pending(current)) { diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 622fdef78e14..c8ed09ac29ac 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -276,8 +276,7 @@ static inline void signal_set_stop_flags(struct signal_struct *sig, extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *task, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type); +extern int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { @@ -287,7 +286,7 @@ static inline int kernel_dequeue_signal(void) int ret; spin_lock_irq(&task->sighand->siglock); - ret = dequeue_signal(task, &task->blocked, &__info, &__type); + ret = dequeue_signal(&task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; diff --git a/kernel/signal.c b/kernel/signal.c index 60c737e423a1..897765b254f9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -618,20 +618,18 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, } /* - * Dequeue a signal and return the element to the caller, which is - * expected to free it. - * - * All callers have to hold the siglock. + * Try to dequeue a signal. If a deliverable signal is found fill in the + * caller provided siginfo and return the signal number. Otherwise return + * 0. */ -int dequeue_signal(struct task_struct *tsk, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type) +int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type) { + struct task_struct *tsk = current; bool resched_timer = false; int signr; - /* We only dequeue private signals from ourselves, we don't let - * signalfd steal them - */ + lockdep_assert_held(&tsk->sighand->siglock); + *type = PIDTYPE_PID; signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); if (!signr) { @@ -2793,8 +2791,7 @@ relock: type = PIDTYPE_PID; signr = dequeue_synchronous_signal(&ksig->info); if (!signr) - signr = dequeue_signal(current, ¤t->blocked, - &ksig->info, &type); + signr = dequeue_signal(¤t->blocked, &ksig->info, &type); if (!signr) break; /* will return 0 */ @@ -3648,7 +3645,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, signotset(&mask); spin_lock_irq(&tsk->sighand->siglock); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(&mask, info, &type); if (!sig && timeout) { /* * None ready, temporarily unblock those we're interested @@ -3667,7 +3664,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); sigemptyset(&tsk->real_blocked); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(&mask, info, &type); } spin_unlock_irq(&tsk->sighand->siglock); From 7f8af7bac5380f2d95a63a6f19964e22437166e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 18:42:34 +0200 Subject: [PATCH 24/43] signal: Replace BUG_ON()s These really can be handled gracefully without killing the machine. Signed-off-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) --- kernel/signal.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 897765b254f9..6f3a5aa39b09 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1938,10 +1938,11 @@ struct sigqueue *sigqueue_alloc(void) void sigqueue_free(struct sigqueue *q) { - unsigned long flags; spinlock_t *lock = ¤t->sighand->siglock; + unsigned long flags; - BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC))) + return; /* * We must hold ->siglock while testing q->list * to serialize with collect_signal() or with @@ -1969,7 +1970,10 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) unsigned long flags; int ret, result; - BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC))) + return 0; + if (WARN_ON_ONCE(q->info.si_code != SI_TIMER)) + return 0; ret = -1; rcu_read_lock(); @@ -2004,7 +2008,6 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) * If an SI_TIMER entry is already queue just increment * the overrun count. */ - BUG_ON(q->info.si_code != SI_TIMER); q->info.si_overrun++; result = TRACE_SIGNAL_ALREADY_PENDING; goto out; From 38cd4cee73a87c40a3e9ef31c0ca7b179fd282f0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:51:04 +0200 Subject: [PATCH 25/43] timers: Add sparse annotation for timer_sync_wait_running(). timer_sync_wait_running() first releases two locks and then acquires them again. This is unexpected and sparse complains about it. Add sparse annotation for timer_sync_wait_running() to note that the locking is expected. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812105326.2240000-2-bigeasy@linutronix.de --- kernel/time/timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 64b0d8a0aa0f..429232d8659a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1561,6 +1561,8 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) * the waiter to acquire the lock and make progress. */ static void timer_sync_wait_running(struct timer_base *base) + __releases(&base->lock) __releases(&base->expiry_lock) + __acquires(&base->expiry_lock) __acquires(&base->lock) { if (atomic_read(&base->timer_waiters)) { raw_spin_unlock_irq(&base->lock); From 330dd6d9c0fce69718b53ca0bc4f2e3920f7f600 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:51:05 +0200 Subject: [PATCH 26/43] hrtimer: Annotate hrtimer_cpu_base_.*_expiry() for sparse. The two hrtimer_cpu_base_.*_expiry() functions are wrappers around the locking functions and sparse complains about the missing counterpart. Add sparse annotation to denote that this bevaviour is expected. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812105326.2240000-3-bigeasy@linutronix.de --- kernel/time/hrtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index b8ee320208d4..f56ef23aad90 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1351,11 +1351,13 @@ static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) } static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) + __acquires(&base->softirq_expiry_lock) { spin_lock(&base->softirq_expiry_lock); } static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) + __releases(&base->softirq_expiry_lock) { spin_unlock(&base->softirq_expiry_lock); } From ed4fb6d7ef68111bb539283561953e5c6e9a6e38 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Wed, 14 Aug 2024 14:10:32 +0200 Subject: [PATCH 27/43] hrtimer: Use and report correct timerslack values for realtime tasks The timerslack_ns setting is used to specify how much the hardware timers should be delayed, to potentially dispatch multiple timers in a single interrupt. This is a performance optimization. Timers of realtime tasks (having a realtime scheduling policy) should not be delayed. This logic was inconsitently applied to the hrtimers, leading to delays of realtime tasks which used timed waits for events (e.g. condition variables). Due to the downstream override of the slack for rt tasks, the procfs reported incorrect (non-zero) timerslack_ns values. This is changed by setting the timer_slack_ns task attribute to 0 for all tasks with a rt policy. By that, downstream users do not need to specially handle rt tasks (w.r.t. the slack), and the procfs entry shows the correct value of "0". Setting non-zero slack values (either via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored, as stated in "man 2 PR_SET_TIMERSLACK": Timer slack is not applied to threads that are scheduled under a real-time scheduling policy (see sched_setscheduler(2)). The special handling of timerslack on rt tasks in downstream users is removed as well. Signed-off-by: Felix Moessbauer Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com --- fs/proc/base.c | 9 +++++---- fs/select.c | 11 ++++------- kernel/sched/syscalls.c | 8 ++++++++ kernel/sys.c | 2 ++ kernel/time/hrtimer.c | 18 +++--------------- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index dd579332a7f8..afe573e8d8f1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2569,10 +2569,11 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, } task_lock(p); - if (slack_ns == 0) - p->timer_slack_ns = p->default_timer_slack_ns; - else - p->timer_slack_ns = slack_ns; + if (task_is_realtime(p)) + slack_ns = 0; + else if (slack_ns == 0) + slack_ns = p->default_timer_slack_ns; + p->timer_slack_ns = slack_ns; task_unlock(p); out: diff --git a/fs/select.c b/fs/select.c index 9515c3fa1a03..ad171b7a5c11 100644 --- a/fs/select.c +++ b/fs/select.c @@ -77,19 +77,16 @@ u64 select_estimate_accuracy(struct timespec64 *tv) { u64 ret; struct timespec64 now; + u64 slack = current->timer_slack_ns; - /* - * Realtime tasks get a slack of 0 for obvious reasons. - */ - - if (rt_task(current)) + if (slack == 0) return 0; ktime_get_ts64(&now); now = timespec64_sub(*tv, now); ret = __estimate_accuracy(&now); - if (ret < current->timer_slack_ns) - return current->timer_slack_ns; + if (ret < slack) + return slack; return ret; } diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index ae1b42775ef9..195d2f2834a9 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -406,6 +406,14 @@ static void __setscheduler_params(struct task_struct *p, else if (fair_policy(policy)) p->static_prio = NICE_TO_PRIO(attr->sched_nice); + /* rt-policy tasks do not have a timerslack */ + if (task_is_realtime(p)) { + p->timer_slack_ns = 0; + } else if (p->timer_slack_ns == 0) { + /* when switching back to non-rt policy, restore timerslack */ + p->timer_slack_ns = p->default_timer_slack_ns; + } + /* * __sched_setscheduler() ensures attr->sched_priority == 0 when * !rt_policy. Always setting this ensures that things like diff --git a/kernel/sys.c b/kernel/sys.c index 3a2df1bd9f64..e3c4cffb520c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2557,6 +2557,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = current->timer_slack_ns; break; case PR_SET_TIMERSLACK: + if (task_is_realtime(current)) + break; if (arg2 <= 0) current->timer_slack_ns = current->default_timer_slack_ns; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f56ef23aad90..a023946f8558 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2074,14 +2074,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; - u64 slack; - - slack = current->timer_slack_ns; - if (rt_task(current)) - slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); + hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) goto out; @@ -2251,7 +2246,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2278,13 +2273,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - /* - * Override any slack passed by the user if under - * rt contraints. - */ - if (rt_task(current)) - delta = 0; - hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2304,7 +2292,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * * Make the current task sleep until the given expiry time has From 4381b895f544bb84b8cfb34ada64df67c9b2a4f0 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Thu, 29 Aug 2024 09:41:33 +0200 Subject: [PATCH 28/43] timers: Remove historical extra jiffie for timeout in msleep() msleep() and msleep_interruptible() add a jiffie to the requested timeout. This extra jiffie was introduced to ensure that the timeout will not happen earlier than specified. Since the rework of the timer wheel, the enqueue path already takes care of this. So the extra jiffie added by msleep*() is pointless now. Remove this extra jiffie in msleep() and msleep_interruptible(). Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20240829074133.4547-1-anna-maria@linutronix.de --- kernel/time/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 429232d8659a..d8eb368a5a5b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2732,7 +2732,7 @@ void __init init_timers(void) */ void msleep(unsigned int msecs) { - unsigned long timeout = msecs_to_jiffies(msecs) + 1; + unsigned long timeout = msecs_to_jiffies(msecs); while (timeout) timeout = schedule_timeout_uninterruptible(timeout); @@ -2746,7 +2746,7 @@ EXPORT_SYMBOL(msleep); */ unsigned long msleep_interruptible(unsigned int msecs) { - unsigned long timeout = msecs_to_jiffies(msecs) + 1; + unsigned long timeout = msecs_to_jiffies(msecs); while (timeout && !signal_pending(current)) timeout = schedule_timeout_interruptible(timeout); From 79f8b28e85f83563c86f528b91eff19c0c4d1177 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Thu, 29 Aug 2024 17:43:05 +0200 Subject: [PATCH 29/43] timers: Annotate possible non critical data race of next_expiry Global timers could be expired remotely when the target CPU is idle. After a remote timer expiry, the remote timer_base->next_expiry value is updated while holding the timer_base->lock. When the formerly idle CPU becomes active at the same time and checks whether timers need to expire, this check is done lockless as it is on the local CPU. This could lead to a data race, which was reported by sysbot: https://lore.kernel.org/r/000000000000916e55061f969e14@google.com When the value is read lockless but changed by the remote CPU, only two non critical scenarios could happen: 1) The already update value is read -> everything is perfect 2) The old value is read -> a superfluous timer soft interrupt is raised The same situation could happen when enqueueing a new first pinned timer by a remote CPU also with non critical scenarios: 1) The already update value is read -> everything is perfect 2) The old value is read -> when the CPU is idle, an IPI is executed nevertheless and when the CPU isn't idle, the updated value will be visible on the next tick and the timer might be late one jiffie. As this is very unlikely to happen, the overhead of doing the check under the lock is a way more effort, than a superfluous timer soft interrupt or a possible 1 jiffie delay of the timer. Document and annotate this non critical behavior in the code by using READ/WRITE_ONCE() pair when accessing timer_base->next_expiry. Reported-by: syzbot+bf285fcc0a048e028118@syzkaller.appspotmail.com Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20240829154305.19259-1-anna-maria@linutronix.de Closes: https://lore.kernel.org/lkml/000000000000916e55061f969e14@google.com --- kernel/time/timer.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d8eb368a5a5b..311ea459b976 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -672,7 +672,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, * Set the next expiry time and kick the CPU so it * can reevaluate the wheel: */ - base->next_expiry = bucket_expiry; + WRITE_ONCE(base->next_expiry, bucket_expiry); base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); @@ -1966,7 +1966,7 @@ static void next_expiry_recalc(struct timer_base *base) clk += adj; } - base->next_expiry = next; + WRITE_ONCE(base->next_expiry, next); base->next_expiry_recalc = false; base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); } @@ -2020,7 +2020,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base, * easy comparable to find out which base holds the first pending timer. */ if (!base->timers_pending) - base->next_expiry = basej + NEXT_TIMER_MAX_DELTA; + WRITE_ONCE(base->next_expiry, basej + NEXT_TIMER_MAX_DELTA); return base->next_expiry; } @@ -2464,8 +2464,40 @@ static void run_local_timers(void) hrtimer_run_queues(); for (int i = 0; i < NR_BASES; i++, base++) { - /* Raise the softirq only if required. */ - if (time_after_eq(jiffies, base->next_expiry) || + /* + * Raise the softirq only if required. + * + * timer_base::next_expiry can be written by a remote CPU while + * holding the lock. If this write happens at the same time than + * the lockless local read, sanity checker could complain about + * data corruption. + * + * There are two possible situations where + * timer_base::next_expiry is written by a remote CPU: + * + * 1. Remote CPU expires global timers of this CPU and updates + * timer_base::next_expiry of BASE_GLOBAL afterwards in + * next_timer_interrupt() or timer_recalc_next_expiry(). The + * worst outcome is a superfluous raise of the timer softirq + * when the not yet updated value is read. + * + * 2. A new first pinned timer is enqueued by a remote CPU + * and therefore timer_base::next_expiry of BASE_LOCAL is + * updated. When this update is missed, this isn't a + * problem, as an IPI is executed nevertheless when the CPU + * was idle before. When the CPU wasn't idle but the update + * is missed, then the timer would expire one jiffie late - + * bad luck. + * + * Those unlikely corner cases where the worst outcome is only a + * one jiffie delay or a superfluous raise of the softirq are + * not that expensive as doing the check always while holding + * the lock. + * + * Possible remote writers are using WRITE_ONCE(). Local reader + * uses therefore READ_ONCE(). + */ + if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) || (i == BASE_DEF && tmigr_requires_handle_remote())) { raise_softirq(TIMER_SOFTIRQ); return; From 0c87282074be532eedc8d14ff4420c585c5c57fe Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Fri, 2 Aug 2024 17:45:35 -0400 Subject: [PATCH 30/43] dt-bindings: timer: rockchip: Add rk3576 compatible Add compatible string for Rockchip RK3576 timer. Signed-off-by: Detlev Casanova Acked-by: Krzysztof Kozlowski Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240802214612.434179-9-detlev.casanova@collabora.com Signed-off-by: Daniel Lezcano --- Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml index 19e56b7577a0..6d0eb0014eee 100644 --- a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml +++ b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml @@ -24,6 +24,7 @@ properties: - rockchip,rk3228-timer - rockchip,rk3229-timer - rockchip,rk3368-timer + - rockchip,rk3576-timer - rockchip,rk3588-timer - rockchip,px30-timer - const: rockchip,rk3288-timer From a7456d7d1b8e781fdaa16c10947bd2363c6fd342 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Wed, 7 Aug 2024 15:46:55 +0800 Subject: [PATCH 31/43] clocksource/drivers/arm_arch_timer: Using for_each_available_child_of_node_scoped() for_each_available_child_of_node_scoped() can put the device_node automatically. So, using it to make the code logic more simple and remove the device_node clean up code. Signed-off-by: Zhang Zekun Link: https://lore.kernel.org/r/20240807074655.52157-1-zhangzekun11@huawei.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aeafc74181f0..03733101e231 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1594,7 +1594,6 @@ static int __init arch_timer_mem_of_init(struct device_node *np) { struct arch_timer_mem *timer_mem; struct arch_timer_mem_frame *frame; - struct device_node *frame_node; struct resource res; int ret = -EINVAL; u32 rate; @@ -1608,33 +1607,29 @@ static int __init arch_timer_mem_of_init(struct device_node *np) timer_mem->cntctlbase = res.start; timer_mem->size = resource_size(&res); - for_each_available_child_of_node(np, frame_node) { + for_each_available_child_of_node_scoped(np, frame_node) { u32 n; struct arch_timer_mem_frame *frame; if (of_property_read_u32(frame_node, "frame-number", &n)) { pr_err(FW_BUG "Missing frame-number.\n"); - of_node_put(frame_node); goto out; } if (n >= ARCH_TIMER_MEM_MAX_FRAMES) { pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n", ARCH_TIMER_MEM_MAX_FRAMES - 1); - of_node_put(frame_node); goto out; } frame = &timer_mem->frame[n]; if (frame->valid) { pr_err(FW_BUG "Duplicated frame-number.\n"); - of_node_put(frame_node); goto out; } - if (of_address_to_resource(frame_node, 0, &res)) { - of_node_put(frame_node); + if (of_address_to_resource(frame_node, 0, &res)) goto out; - } + frame->cntbase = res.start; frame->size = resource_size(&res); From 56bd72e9cd8272687aa2a8eccddabc5526cd7845 Mon Sep 17 00:00:00 2001 From: Marek Maslanka Date: Mon, 12 Aug 2024 18:41:42 +0000 Subject: [PATCH 32/43] clocksource: acpi_pm: Add external callback for suspend/resume Provides the capability to register an external callback for the ACPI PM timer, which is called during the suspend and resume processes. Signed-off-by: Marek Maslanka Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240812184150.1079924-1-mmaslanka@google.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/acpi_pm.c | 32 ++++++++++++++++++++++++++++++++ include/linux/acpi_pmtmr.h | 13 +++++++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 82338773602c..b4330a01a566 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -25,6 +25,10 @@ #include #include +static void *suspend_resume_cb_data; + +static void (*suspend_resume_callback)(void *data, bool suspend); + /* * The I/O port the PMTMR resides at. * The location is detected during setup_arch(), @@ -58,6 +62,32 @@ u32 acpi_pm_read_verified(void) return v2; } +void acpi_pmtmr_register_suspend_resume_callback(void (*cb)(void *data, bool suspend), void *data) +{ + suspend_resume_callback = cb; + suspend_resume_cb_data = data; +} +EXPORT_SYMBOL_GPL(acpi_pmtmr_register_suspend_resume_callback); + +void acpi_pmtmr_unregister_suspend_resume_callback(void) +{ + suspend_resume_callback = NULL; + suspend_resume_cb_data = NULL; +} +EXPORT_SYMBOL_GPL(acpi_pmtmr_unregister_suspend_resume_callback); + +static void acpi_pm_suspend(struct clocksource *cs) +{ + if (suspend_resume_callback) + suspend_resume_callback(suspend_resume_cb_data, true); +} + +static void acpi_pm_resume(struct clocksource *cs) +{ + if (suspend_resume_callback) + suspend_resume_callback(suspend_resume_cb_data, false); +} + static u64 acpi_pm_read(struct clocksource *cs) { return (u64)read_pmtmr(); @@ -69,6 +99,8 @@ static struct clocksource clocksource_acpi_pm = { .read = acpi_pm_read, .mask = (u64)ACPI_PM_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .suspend = acpi_pm_suspend, + .resume = acpi_pm_resume, }; diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 50d88bf1498d..0ded9220d379 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -26,6 +26,19 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +/** + * Register callback for suspend and resume event + * + * @cb Callback triggered on suspend and resume + * @data Data passed with the callback + */ +void acpi_pmtmr_register_suspend_resume_callback(void (*cb)(void *data, bool suspend), void *data); + +/** + * Remove registered callback for suspend and resume event + */ +void acpi_pmtmr_unregister_suspend_resume_callback(void); + #else static inline u32 acpi_pm_read_early(void) From e86c8186d03a6ba018e881ed45f0962ad553e861 Mon Sep 17 00:00:00 2001 From: Marek Maslanka Date: Mon, 12 Aug 2024 18:42:00 +0000 Subject: [PATCH 33/43] platform/x86:intel/pmc: Enable the ACPI PM Timer to be turned off when suspended Allow to disable ACPI PM Timer on suspend and enable on resume. A disabled timer helps optimise power consumption when the system is suspended. On resume the timer is only reactivated if it was activated prior to suspend, so unless the ACPI PM timer is enabled in the BIOS, this won't change anything. The ACPI PM timer is used by Intel's iTCO/wdat_wdt watchdog to drive the watchdog, so it doesn't need to run during suspend. Signed-off-by: Marek Maslanka Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240812184208.1080710-1-mmaslanka@google.com Signed-off-by: Daniel Lezcano --- drivers/platform/x86/intel/pmc/adl.c | 2 ++ drivers/platform/x86/intel/pmc/cnp.c | 2 ++ drivers/platform/x86/intel/pmc/core.c | 45 +++++++++++++++++++++++++++ drivers/platform/x86/intel/pmc/core.h | 8 +++++ drivers/platform/x86/intel/pmc/icl.c | 2 ++ drivers/platform/x86/intel/pmc/mtl.c | 2 ++ drivers/platform/x86/intel/pmc/spt.c | 2 ++ drivers/platform/x86/intel/pmc/tgl.c | 2 ++ 8 files changed, 65 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/adl.c b/drivers/platform/x86/intel/pmc/adl.c index e7878558fd90..9d9c07f44ff6 100644 --- a/drivers/platform/x86/intel/pmc/adl.c +++ b/drivers/platform/x86/intel/pmc/adl.c @@ -295,6 +295,8 @@ const struct pmc_reg_map adl_reg_map = { .ppfear_buckets = CNP_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .ltr_ignore_max = ADL_NUM_IP_IGN_ALLOWED, .lpm_num_modes = ADL_LPM_NUM_MODES, .lpm_num_maps = ADL_LPM_NUM_MAPS, diff --git a/drivers/platform/x86/intel/pmc/cnp.c b/drivers/platform/x86/intel/pmc/cnp.c index dd72974bf71e..513c02670c5a 100644 --- a/drivers/platform/x86/intel/pmc/cnp.c +++ b/drivers/platform/x86/intel/pmc/cnp.c @@ -200,6 +200,8 @@ const struct pmc_reg_map cnp_reg_map = { .ppfear_buckets = CNP_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .ltr_ignore_max = CNP_NUM_IP_IGN_ALLOWED, .etr3_offset = ETR3_OFFSET, }; diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 01ae71c6df59..c91c753b120e 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1208,6 +1209,38 @@ static bool pmc_core_is_pson_residency_enabled(struct pmc_dev *pmcdev) return val == 1; } +/** + * Enable or disable ACPI PM Timer + * + * This function is intended to be a callback for ACPI PM suspend/resume event. + * The ACPI PM Timer is enabled on resume only if it was enabled during suspend. + */ +static void pmc_core_acpi_pm_timer_suspend_resume(void *data, bool suspend) +{ + struct pmc_dev *pmcdev = data; + struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; + const struct pmc_reg_map *map = pmc->map; + bool enabled; + u32 reg; + + if (!map->acpi_pm_tmr_ctl_offset) + return; + + guard(mutex)(&pmcdev->lock); + + if (!suspend && !pmcdev->enable_acpi_pm_timer_on_resume) + return; + + reg = pmc_core_reg_read(pmc, map->acpi_pm_tmr_ctl_offset); + enabled = !(reg & map->acpi_pm_tmr_disable_bit); + if (suspend) + reg |= map->acpi_pm_tmr_disable_bit; + else + reg &= ~map->acpi_pm_tmr_disable_bit; + pmc_core_reg_write(pmc, map->acpi_pm_tmr_ctl_offset, reg); + + pmcdev->enable_acpi_pm_timer_on_resume = suspend && enabled; +} static void pmc_core_dbgfs_unregister(struct pmc_dev *pmcdev) { @@ -1404,6 +1437,7 @@ static int pmc_core_probe(struct platform_device *pdev) struct pmc_dev *pmcdev; const struct x86_cpu_id *cpu_id; int (*core_init)(struct pmc_dev *pmcdev); + const struct pmc_reg_map *map; struct pmc *primary_pmc; int ret; @@ -1462,6 +1496,11 @@ static int pmc_core_probe(struct platform_device *pdev) pm_report_max_hw_sleep(FIELD_MAX(SLP_S0_RES_COUNTER_MASK) * pmc_core_adjust_slp_s0_step(primary_pmc, 1)); + map = primary_pmc->map; + if (map->acpi_pm_tmr_ctl_offset) + acpi_pmtmr_register_suspend_resume_callback(pmc_core_acpi_pm_timer_suspend_resume, + pmcdev); + device_initialized = true; dev_info(&pdev->dev, " initialized\n"); @@ -1471,6 +1510,12 @@ static int pmc_core_probe(struct platform_device *pdev) static void pmc_core_remove(struct platform_device *pdev) { struct pmc_dev *pmcdev = platform_get_drvdata(pdev); + const struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; + const struct pmc_reg_map *map = pmc->map; + + if (map->acpi_pm_tmr_ctl_offset) + acpi_pmtmr_unregister_suspend_resume_callback(); + pmc_core_dbgfs_unregister(pmcdev); pmc_core_clean_structure(pdev); } diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index ea04de7eb9e8..4d37ef7113d7 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -68,6 +68,8 @@ struct telem_endpoint; #define SPT_PMC_LTR_SCC 0x3A0 #define SPT_PMC_LTR_ISH 0x3A4 +#define SPT_PMC_ACPI_PM_TMR_CTL_OFFSET 0x18FC + /* Sunrise Point: PGD PFET Enable Ack Status Registers */ enum ppfear_regs { SPT_PMC_XRAM_PPFEAR0A = 0x590, @@ -148,6 +150,8 @@ enum ppfear_regs { #define SPT_PMC_VRIC1_SLPS0LVEN BIT(13) #define SPT_PMC_VRIC1_XTALSDQDIS BIT(22) +#define SPT_PMC_BIT_ACPI_PM_TMR_DISABLE BIT(1) + /* Cannonlake Power Management Controller register offsets */ #define CNP_PMC_SLPS0_DBG_OFFSET 0x10B4 #define CNP_PMC_PM_CFG_OFFSET 0x1818 @@ -351,6 +355,8 @@ struct pmc_reg_map { const u8 *lpm_reg_index; const u32 pson_residency_offset; const u32 pson_residency_counter_step; + const u32 acpi_pm_tmr_ctl_offset; + const u32 acpi_pm_tmr_disable_bit; }; /** @@ -424,6 +430,8 @@ struct pmc_dev { u32 die_c6_offset; struct telem_endpoint *punit_ep; struct pmc_info *regmap_list; + + bool enable_acpi_pm_timer_on_resume; }; enum pmc_index { diff --git a/drivers/platform/x86/intel/pmc/icl.c b/drivers/platform/x86/intel/pmc/icl.c index 71b0fd6cb7d8..cbbd44054468 100644 --- a/drivers/platform/x86/intel/pmc/icl.c +++ b/drivers/platform/x86/intel/pmc/icl.c @@ -46,6 +46,8 @@ const struct pmc_reg_map icl_reg_map = { .ppfear_buckets = ICL_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .ltr_ignore_max = ICL_NUM_IP_IGN_ALLOWED, .etr3_offset = ETR3_OFFSET, }; diff --git a/drivers/platform/x86/intel/pmc/mtl.c b/drivers/platform/x86/intel/pmc/mtl.c index c7d15d864039..91f2fa728f5c 100644 --- a/drivers/platform/x86/intel/pmc/mtl.c +++ b/drivers/platform/x86/intel/pmc/mtl.c @@ -462,6 +462,8 @@ const struct pmc_reg_map mtl_socm_reg_map = { .ppfear_buckets = MTL_SOCM_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .lpm_num_maps = ADL_LPM_NUM_MAPS, .ltr_ignore_max = MTL_SOCM_NUM_IP_IGN_ALLOWED, .lpm_res_counter_step_x2 = TGL_PMC_LPM_RES_COUNTER_STEP_X2, diff --git a/drivers/platform/x86/intel/pmc/spt.c b/drivers/platform/x86/intel/pmc/spt.c index ab993a69e33e..2cd2b3c68e46 100644 --- a/drivers/platform/x86/intel/pmc/spt.c +++ b/drivers/platform/x86/intel/pmc/spt.c @@ -130,6 +130,8 @@ const struct pmc_reg_map spt_reg_map = { .ppfear_buckets = SPT_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = SPT_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = SPT_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .ltr_ignore_max = SPT_NUM_IP_IGN_ALLOWED, .pm_vric1_offset = SPT_PMC_VRIC1_OFFSET, }; diff --git a/drivers/platform/x86/intel/pmc/tgl.c b/drivers/platform/x86/intel/pmc/tgl.c index e0580de18077..371b4e30f142 100644 --- a/drivers/platform/x86/intel/pmc/tgl.c +++ b/drivers/platform/x86/intel/pmc/tgl.c @@ -197,6 +197,8 @@ const struct pmc_reg_map tgl_reg_map = { .ppfear_buckets = ICL_PPFEAR_NUM_ENTRIES, .pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT, + .acpi_pm_tmr_ctl_offset = SPT_PMC_ACPI_PM_TMR_CTL_OFFSET, + .acpi_pm_tmr_disable_bit = SPT_PMC_BIT_ACPI_PM_TMR_DISABLE, .ltr_ignore_max = TGL_NUM_IP_IGN_ALLOWED, .lpm_num_maps = TGL_LPM_NUM_MAPS, .lpm_res_counter_step_x2 = TGL_PMC_LPM_RES_COUNTER_STEP_X2, From 414b2fb4bb5aa09f39262c24ed90b831d18ff984 Mon Sep 17 00:00:00 2001 From: Huan Yang Date: Tue, 20 Aug 2024 17:46:03 +0800 Subject: [PATCH 34/43] clocksource/drivers/ingenic: Use devm_clk_get_enabled() helpers The devm_clk_get_enabled() helpers: - call devm_clk_get() - call clk_prepare_enable() and register what is needed in order to call clk_disable_unprepare() when needed, as a managed resource. This simplifies the code and avoids the calls to clk_disable_unprepare(). Signed-off-by: Huan Yang Link: https://lore.kernel.org/r/20240820094603.103598-1-link@vivo.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/ingenic-ost.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/clocksource/ingenic-ost.c b/drivers/clocksource/ingenic-ost.c index 9f7c280a1336..e0ec33307c84 100644 --- a/drivers/clocksource/ingenic-ost.c +++ b/drivers/clocksource/ingenic-ost.c @@ -93,14 +93,10 @@ static int __init ingenic_ost_probe(struct platform_device *pdev) return PTR_ERR(map); } - ost->clk = devm_clk_get(dev, "ost"); + ost->clk = devm_clk_get_enabled(dev, "ost"); if (IS_ERR(ost->clk)) return PTR_ERR(ost->clk); - err = clk_prepare_enable(ost->clk); - if (err) - return err; - /* Clear counter high/low registers */ if (soc_info->is64bit) regmap_write(map, TCU_REG_OST_CNTL, 0); @@ -129,7 +125,6 @@ static int __init ingenic_ost_probe(struct platform_device *pdev) err = clocksource_register_hz(cs, rate); if (err) { dev_err(dev, "clocksource registration failed"); - clk_disable_unprepare(ost->clk); return err; } From ca140a0dc0a18acd4653b56db211fec9b2339986 Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Sat, 13 Jul 2024 15:27:13 +0530 Subject: [PATCH 35/43] clocksource/drivers/qcom: Add missing iounmap() on errors in msm_dt_timer_init() Add the missing iounmap() when clock frequency fails to get read by the of_property_read_u32() call, or if the call to msm_timer_init() fails. Fixes: 6e3321631ac2 ("ARM: msm: Add DT support to msm_timer") Signed-off-by: Ankit Agrawal Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240713095713.GA430091@bnew-VirtualBox Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-qcom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-qcom.c b/drivers/clocksource/timer-qcom.c index b4afe3a67583..eac4c95c6127 100644 --- a/drivers/clocksource/timer-qcom.c +++ b/drivers/clocksource/timer-qcom.c @@ -233,6 +233,7 @@ static int __init msm_dt_timer_init(struct device_node *np) } if (of_property_read_u32(np, "clock-frequency", &freq)) { + iounmap(cpu0_base); pr_err("Unknown frequency\n"); return -EINVAL; } @@ -243,7 +244,11 @@ static int __init msm_dt_timer_init(struct device_node *np) freq /= 4; writel_relaxed(DGT_CLK_CTL_DIV_4, source_base + DGT_CLK_CTL); - return msm_timer_init(freq, 32, irq, !!percpu_offset); + ret = msm_timer_init(freq, 32, irq, !!percpu_offset); + if (ret) + iounmap(cpu0_base); + + return ret; } TIMER_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); TIMER_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init); From 6cc11b65e5e0a6f1487274d1ad91088f7ac01d18 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 3 Aug 2024 14:42:52 +0800 Subject: [PATCH 36/43] clocksource/drivers/asm9260: Add missing clk_disable_unprepare in asm9260_timer_init Add the missing clk_disable_unprepare() before return in asm9260_timer_init(). Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240803064253.331946-2-cuigaosheng1@huawei.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/asm9260_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c index 5b39d3701fa3..8f97ab0b01ec 100644 --- a/drivers/clocksource/asm9260_timer.c +++ b/drivers/clocksource/asm9260_timer.c @@ -210,6 +210,7 @@ static int __init asm9260_timer_init(struct device_node *np) DRIVER_NAME, &event_dev); if (ret) { pr_err("Failed to setup irq!\n"); + clk_disable_unprepare(clk); return ret; } From 2e02da1d86c97dfaa8ee083d98df1d069b28a526 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 3 Aug 2024 14:42:53 +0800 Subject: [PATCH 37/43] clocksource/drivers/cadence-ttc: Add missing clk_disable_unprepare in ttc_setup_clockevent Add the missing clk_disable_unprepare() before return in ttc_setup_clockevent(). Signed-off-by: Gaosheng Cui Reviewed-by: Michal Simek Link: https://lore.kernel.org/r/20240803064253.331946-3-cuigaosheng1@huawei.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-cadence-ttc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c index ca7a06489c40..b8a1cf59b9d6 100644 --- a/drivers/clocksource/timer-cadence-ttc.c +++ b/drivers/clocksource/timer-cadence-ttc.c @@ -435,7 +435,7 @@ static int __init ttc_setup_clockevent(struct clk *clk, &ttcce->ttc.clk_rate_change_nb); if (err) { pr_warn("Unable to register clock notifier.\n"); - goto out_kfree; + goto out_clk_unprepare; } ttcce->ttc.freq = clk_get_rate(ttcce->ttc.clk); @@ -465,13 +465,15 @@ static int __init ttc_setup_clockevent(struct clk *clk, err = request_irq(irq, ttc_clock_event_interrupt, IRQF_TIMER, ttcce->ce.name, ttcce); if (err) - goto out_kfree; + goto out_clk_unprepare; clockevents_config_and_register(&ttcce->ce, ttcce->ttc.freq / PRESCALE, 1, 0xfffe); return 0; +out_clk_unprepare: + clk_disable_unprepare(ttcce->ttc.clk); out_kfree: kfree(ttcce); return err; From 69a9dcbd2d65217cf52f55dc57b50845dc9d8d3f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 2 Sep 2024 12:48:04 +0200 Subject: [PATCH 38/43] clocksource/drivers/jcore: Use request_percpu_irq() Use request_percpu_irq() instead of request_irq() to solve the following sparse warning: jcore-pit.c:173:40: warning: incorrect type in argument 5 (different address spaces) jcore-pit.c:173:40: expected void *dev jcore-pit.c:173:40: got struct jcore_pit [noderef] __percpu *static [assigned] [toplevel] jcore_pit_percpu Compile tested only. Signed-off-by: Uros Bizjak Cc: Daniel Lezcano Cc: Thomas Gleixner Cc: Rich Felker Link: https://lore.kernel.org/r/20240902104810.21080-1-ubizjak@gmail.com Signed-off-by: Daniel Lezcano --- drivers/clocksource/jcore-pit.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c index a4a991101fa3..a3fe98cd3838 100644 --- a/drivers/clocksource/jcore-pit.c +++ b/drivers/clocksource/jcore-pit.c @@ -120,7 +120,7 @@ static int jcore_pit_local_init(unsigned cpu) static irqreturn_t jcore_timer_interrupt(int irq, void *dev_id) { - struct jcore_pit *pit = this_cpu_ptr(dev_id); + struct jcore_pit *pit = dev_id; if (clockevent_state_oneshot(&pit->ced)) jcore_pit_disable(pit); @@ -168,9 +168,8 @@ static int __init jcore_pit_init(struct device_node *node) return -ENOMEM; } - err = request_irq(pit_irq, jcore_timer_interrupt, - IRQF_TIMER | IRQF_PERCPU, - "jcore_pit", jcore_pit_percpu); + err = request_percpu_irq(pit_irq, jcore_timer_interrupt, + "jcore_pit", jcore_pit_percpu); if (err) { pr_err("pit irq request failed: %d\n", err); free_percpu(jcore_pit_percpu); From 2376d871f8552aadea19f5bc0b1370db54a3a5f2 Mon Sep 17 00:00:00 2001 From: Marek Maslanka Date: Wed, 4 Sep 2024 09:47:53 +0000 Subject: [PATCH 39/43] platform/x86:intel/pmc: Fix comment for the pmc_core_acpi_pm_timer_suspend_resume function Change incorrect kernel-doc comment to regular comment for the pmc_core_acpi_pm_timer_suspend_resume function. Signed-off-by: Marek Maslanka Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409031410.a9beukFc-lkp@intel.com/ Link: https://lore.kernel.org/r/20240904094753.1615549-1-mmaslanka@google.com Signed-off-by: Daniel Lezcano --- drivers/platform/x86/intel/pmc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index c91c753b120e..e2b4c74ce7f6 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1209,7 +1209,7 @@ static bool pmc_core_is_pson_residency_enabled(struct pmc_dev *pmcdev) return val == 1; } -/** +/* * Enable or disable ACPI PM Timer * * This function is intended to be a callback for ACPI PM suspend/resume event. From fe90c5ba88ad43d42acefb21b57df837be86a61a Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 4 Sep 2024 15:04:51 +0200 Subject: [PATCH 40/43] timers: Rename next_expiry_recalc() to be unique next_expiry_recalc is the name of a function as well as the name of a struct member of struct timer_base. This might lead to confusion. Rename next_expiry_recalc() to timer_recalc_next_expiry(). No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20240904-devel-anna-maria-b4-timers-flseep-v1-1-e98760256370@linutronix.de --- kernel/time/timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 311ea459b976..5e021a2d8d61 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1900,7 +1900,7 @@ static int next_pending_bucket(struct timer_base *base, unsigned offset, * * Store next expiry time in base->next_expiry. */ -static void next_expiry_recalc(struct timer_base *base) +static void timer_recalc_next_expiry(struct timer_base *base) { unsigned long clk, next, adj; unsigned lvl, offset = 0; @@ -2009,7 +2009,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base, unsigned long basej) { if (base->next_expiry_recalc) - next_expiry_recalc(base); + timer_recalc_next_expiry(base); /* * Move next_expiry for the empty base into the future to prevent an @@ -2413,7 +2413,7 @@ static inline void __run_timers(struct timer_base *base) * jiffies to avoid endless requeuing to current jiffies. */ base->clk++; - next_expiry_recalc(base); + timer_recalc_next_expiry(base); while (levels--) expire_timers(base, heads + levels); From 662a1bfb907cd850da4de9b871640ad47a355bc0 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 4 Sep 2024 15:04:52 +0200 Subject: [PATCH 41/43] cpu: Use already existing usleep_range() usleep_range() is a wrapper arount usleep_range_state() which hands in TASK_UNTINTERRUPTIBLE as state argument. Use already exising wrapper usleep_range(). No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20240904-devel-anna-maria-b4-timers-flseep-v1-2-e98760256370@linutronix.de --- kernel/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026..031a2c15481b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -330,7 +330,7 @@ static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state st /* Poll for one millisecond */ arch_cpuhp_sync_state_poll(); } else { - usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE); + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } sync = atomic_read(st); } From bd7c8ff9fef4b21a97f9b30a7364845ee6eaaf23 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 4 Sep 2024 15:04:53 +0200 Subject: [PATCH 42/43] treewide: Fix wrong singular form of jiffies in comments There are several comments all over the place, which uses a wrong singular form of jiffies. Replace 'jiffie' by 'jiffy'. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: Geert Uytterhoeven # m68k Link: https://lore.kernel.org/all/20240904-devel-anna-maria-b4-timers-flseep-v1-3-e98760256370@linutronix.de --- Documentation/admin-guide/media/vivid.rst | 2 +- Documentation/timers/timers-howto.rst | 2 +- .../sp_SP/scheduler/sched-design-CFS.rst | 2 +- arch/arm/mach-versatile/spc.c | 2 +- arch/m68k/q40/q40ints.c | 2 +- arch/x86/kernel/cpu/mce/dev-mcelog.c | 2 +- drivers/char/ipmi/ipmi_ssif.c | 2 +- drivers/dma-buf/st-dma-fence.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 2 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 4 ++-- drivers/gpu/drm/i915/i915_utils.c | 2 +- drivers/gpu/drm/v3d/v3d_bo.c | 2 +- drivers/isdn/mISDN/dsp_cmx.c | 2 +- drivers/net/ethernet/marvell/mvmdio.c | 2 +- fs/xfs/xfs_buf.h | 2 +- include/linux/jiffies.h | 2 +- include/linux/timekeeper_internal.h | 2 +- kernel/time/alarmtimer.c | 2 +- kernel/time/clockevents.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/posix-timers.c | 4 ++-- kernel/time/timer.c | 12 ++++++------ lib/Kconfig.debug | 2 +- net/batman-adv/types.h | 2 +- 24 files changed, 31 insertions(+), 31 deletions(-) diff --git a/Documentation/admin-guide/media/vivid.rst b/Documentation/admin-guide/media/vivid.rst index 1306f19ecb5a..c9d301ab46a3 100644 --- a/Documentation/admin-guide/media/vivid.rst +++ b/Documentation/admin-guide/media/vivid.rst @@ -328,7 +328,7 @@ and an HDMI input, one input for each input type. Those are described in more detail below. Special attention has been given to the rate at which new frames become -available. The jitter will be around 1 jiffie (that depends on the HZ +available. The jitter will be around 1 jiffy (that depends on the HZ configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second), but the long-term behavior is exactly following the framerate. So a framerate of 59.94 Hz is really different from 60 Hz. If the framerate diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst index 5c169e3d29a8..ef7a4652ccc9 100644 --- a/Documentation/timers/timers-howto.rst +++ b/Documentation/timers/timers-howto.rst @@ -19,7 +19,7 @@ it really need to delay in atomic context?" If so... ATOMIC CONTEXT: You must use the `*delay` family of functions. These - functions use the jiffie estimation of clock speed + functions use the jiffy estimation of clock speed and will busy wait for enough loop cycles to achieve the desired delay: diff --git a/Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst b/Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst index 90a153cad4e8..731c266beb1a 100644 --- a/Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst +++ b/Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst @@ -109,7 +109,7 @@ para que se ejecute, y la tarea en ejecución es interrumpida. ================================== CFS usa una granularidad de nanosegundos y no depende de ningún -jiffie o detalles como HZ. De este modo, el gestor de tareas CFS no tiene +jiffy o detalles como HZ. De este modo, el gestor de tareas CFS no tiene noción de "ventanas de tiempo" de la forma en que tenía el gestor de tareas previo, y tampoco tiene heurísticos. Únicamente hay un parámetro central ajustable (se ha de cambiar en CONFIG_SCHED_DEBUG): diff --git a/arch/arm/mach-versatile/spc.c b/arch/arm/mach-versatile/spc.c index 5e44170e1a9a..790092734cf6 100644 --- a/arch/arm/mach-versatile/spc.c +++ b/arch/arm/mach-versatile/spc.c @@ -73,7 +73,7 @@ /* * Even though the SPC takes max 3-5 ms to complete any OPP/COMMS - * operation, the operation could start just before jiffie is about + * operation, the operation could start just before jiffy is about * to be incremented. So setting timeout value of 20ms = 2jiffies@100Hz */ #define TIMEOUT_US 20000 diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c index 10f1f294e91f..14b774b9d308 100644 --- a/arch/m68k/q40/q40ints.c +++ b/arch/m68k/q40/q40ints.c @@ -106,7 +106,7 @@ void __init q40_init_IRQ(void) * this stuff doesn't really belong here.. */ -int ql_ticks; /* 200Hz ticks since last jiffie */ +int ql_ticks; /* 200Hz ticks since last jiffy */ static int sound_ticks; #define SVOL 45 diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index a05ac0716ecf..a3aa0199222e 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -314,7 +314,7 @@ static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, /* * Need to give user space some time to set everything up, - * so do it a jiffie or two later everywhere. + * so do it a jiffy or two later everywhere. */ schedule_timeout(2); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 96ad571d041a..e093028391af 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -980,7 +980,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, ipmi_ssif_unlock_cond(ssif_info, flags); start_get(ssif_info); } else { - /* Wait a jiffie then request the next message */ + /* Wait a jiffy then request the next message */ ssif_info->waiting_alert = true; ssif_info->retries_left = SSIF_RECV_RETRIES; if (!ssif_info->stopping) diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index 6a1bfcd0cc21..cf2ce3744ce6 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -402,7 +402,7 @@ static int test_wait_timeout(void *arg) if (dma_fence_wait_timeout(wt.f, false, 2) == -ETIME) { if (timer_pending(&wt.timer)) { - pr_notice("Timer did not fire within the jiffie!\n"); + pr_notice("Timer did not fire within the jiffy!\n"); err = 0; /* not our fault! */ } else { pr_err("Wait reported incomplete after timeout\n"); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index d4b918fb11ce..1f55e62044a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -266,7 +266,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) args->timeout_ns = 0; - /* Asked to wait beyond the jiffie/scheduler precision? */ + /* Asked to wait beyond the jiffy/scheduler precision? */ if (ret == -ETIME && args->timeout_ns) ret = -EAGAIN; } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 4202df5b8c12..222ca7c44951 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -93,7 +93,7 @@ static int wait_for_reset(struct intel_engine_cs *engine, return -EINVAL; } - /* Give the request a jiffie to complete after flushing the worker */ + /* Give the request a jiffy to complete after flushing the worker */ if (i915_request_wait(rq, 0, max(0l, (long)(timeout - jiffies)) + 1) < 0) { pr_err("%s: hanging request %llx:%lld did not complete\n", @@ -3426,7 +3426,7 @@ static int live_preempt_timeout(void *arg) cpu_relax(); saved_timeout = engine->props.preempt_timeout_ms; - engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffy */ i915_request_get(rq); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 6f9e7b354b54..f2ba51c20e97 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -110,7 +110,7 @@ void set_timer_ms(struct timer_list *t, unsigned long timeout) * Paranoia to make sure the compiler computes the timeout before * loading 'jiffies' as jiffies is volatile and may be updated in * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffie. + * of the addition and reduce the risk of losing a jiffy. */ barrier(); diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index a165cbcdd27b..9eafe53a8f41 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -279,7 +279,7 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data, else args->timeout_ns = 0; - /* Asked to wait beyond the jiffie/scheduler precision? */ + /* Asked to wait beyond the jiffy/scheduler precision? */ if (ret == -ETIME && args->timeout_ns) ret = -EAGAIN; diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c index 61cb45c5d0d8..53fad9487574 100644 --- a/drivers/isdn/mISDN/dsp_cmx.c +++ b/drivers/isdn/mISDN/dsp_cmx.c @@ -82,7 +82,7 @@ * - has multiple clocks. * - has no usable clock due to jitter or packet loss (VoIP). * In this case the system's clock is used. The clock resolution depends on - * the jiffie resolution. + * the jiffy resolution. * * If a member joins a conference: * diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 9190eff6c0bb..e1d003fdbc2e 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -104,7 +104,7 @@ static int orion_mdio_wait_ready(const struct orion_mdio_ops *ops, return 0; } else { /* wait_event_timeout does not guarantee a delay of at - * least one whole jiffie, so timeout must be no less + * least one whole jiffy, so timeout must be no less * than two. */ timeout = max(usecs_to_jiffies(MVMDIO_SMI_TIMEOUT), 2); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index b1580644501f..209a389f2abc 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -210,7 +210,7 @@ struct xfs_buf { * success the write is considered to be failed permanently and the * iodone handler will take appropriate action. * - * For retry timeouts, we record the jiffie of the first failure. This + * For retry timeouts, we record the jiffy of the first failure. This * means that we can change the retry timeout for buffers already under * I/O and thus avoid getting stuck in a retry loop with a long timeout. * diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d9f1435a5a13..1220f0fbe5bf 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -418,7 +418,7 @@ extern unsigned long preset_lpj; #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) /* - * The maximum jiffie value is (MAX_INT >> 1). Here we translate that + * The maximum jiffy value is (MAX_INT >> 1). Here we translate that * into seconds. The 64-bit case will overflow if we are not careful, * so use the messy SH_DIV macro to do it. Still all constants. */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 84ff2844df2a..902c20ef495a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -73,7 +73,7 @@ struct tk_read_base { * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what - * we need to add to xtime (or xtime corrected for sub jiffie times) + * we need to add to xtime (or xtime corrected for sub jiffy times) * to get to monotonic time. Monotonic is pegged at zero at system * boot time, so wall_to_monotonic will be negative, however, we will * ALWAYS keep the tv_nsec part positive so we can use the usual diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 76bd4fda3472..8bf888641694 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -493,7 +493,7 @@ static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throt * promised in the context of posix_timer_fn() never * materialized, but someone should really work on it. * - * To prevent DOS fake @now to be 1 jiffie out which keeps + * To prevent DOS fake @now to be 1 jiffy out which keeps * the overrun accounting correct but creates an * inconsistency vs. timer_gettime(2). */ diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 60a6484831b1..78c7bd64d0dd 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -190,7 +190,7 @@ int clockevents_tick_resume(struct clock_event_device *dev) #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST -/* Limit min_delta to a jiffie */ +/* Limit min_delta to a jiffy */ #define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index a023946f8558..e834b2bd83df 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1177,7 +1177,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, /* * CONFIG_TIME_LOW_RES indicates that the system has no way to return * granular time values. For relative timers we add hrtimer_resolution - * (i.e. one jiffie) to prevent short timeouts. + * (i.e. one jiffy) to prevent short timeouts. */ timer->is_rel = mode & HRTIMER_MODE_REL; if (timer->is_rel) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1cc830ef93a7..4576aaed13b2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -339,14 +339,14 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * change to the signal handling code. * * For now let timers with an interval less than a - * jiffie expire every jiffie and recheck for a + * jiffy expire every jiffy and recheck for a * valid signal handler. * * This avoids interrupt starvation in case of a * very small interval, which would expire the * timer immediately again. * - * Moving now ahead of time by one jiffie tricks + * Moving now ahead of time by one jiffy tricks * hrtimer_forward() to expire the timer later, * while it still maintains the overrun accuracy * for the price of a slight inconsistency in the diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 5e021a2d8d61..2b38f3035a3e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -365,7 +365,7 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, rem = j % HZ; /* - * If the target jiffie is just after a whole second (which can happen + * If the target jiffy is just after a whole second (which can happen * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. @@ -1930,7 +1930,7 @@ static void timer_recalc_next_expiry(struct timer_base *base) * bits are zero, we look at the next level as is. If not we * need to advance it by one because that's going to be the * next expiring bucket in that level. base->clk is the next - * expiring jiffie. So in case of: + * expiring jiffy. So in case of: * * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 * 0 0 0 0 0 0 @@ -1995,7 +1995,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return basem; /* - * Round up to the next jiffie. High resolution timers are + * Round up to the next jiffy. High resolution timers are * off, so the hrtimers are expired in the tick and we need to * make sure that this tick really expires the timer to avoid * a ping pong of the nohz stop code. @@ -2254,7 +2254,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, base_global, &tevt); /* - * If the next event is only one jiffie ahead there is no need to call + * If the next event is only one jiffy ahead there is no need to call * timer migration hierarchy related functions. The value for the next * global timer in @tevt struct equals then KTIME_MAX. This is also * true, when the timer base is idle. @@ -2486,11 +2486,11 @@ static void run_local_timers(void) * updated. When this update is missed, this isn't a * problem, as an IPI is executed nevertheless when the CPU * was idle before. When the CPU wasn't idle but the update - * is missed, then the timer would expire one jiffie late - + * is missed, then the timer would expire one jiffy late - * bad luck. * * Those unlikely corner cases where the worst outcome is only a - * one jiffie delay or a superfluous raise of the softirq are + * one jiffy delay or a superfluous raise of the softirq are * not that expensive as doing the check always while holding * the lock. * diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a30c03a66172..a40aa606cd04 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -97,7 +97,7 @@ config BOOT_PRINTK_DELAY using "boot_delay=N". It is likely that you would also need to use "lpj=M" to preset - the "loops per jiffie" value. + the "loops per jiffy" value. See a previous boot log for the "lpj" value to use for your system, and then set "lpj=M" before setting "boot_delay=N". NOTE: Using this option may adversely affect SMP systems. diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 00840d5784fe..04f6398b3a40 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -287,7 +287,7 @@ struct batadv_frag_table_entry { /** @lock: lock to protect the list of fragments */ spinlock_t lock; - /** @timestamp: time (jiffie) of last received fragment */ + /** @timestamp: time (jiffy) of last received fragment */ unsigned long timestamp; /** @seqno: sequence number of the fragments in the list */ From 35b603f8a78b0bd51566db277c4f7b56b3ff6bac Mon Sep 17 00:00:00 2001 From: Benjamin ROBIN Date: Sun, 8 Sep 2024 16:08:36 +0200 Subject: [PATCH 43/43] ntp: Make sure RTC is synchronized when time goes backwards sync_hw_clock() is normally called every 11 minutes when time is synchronized. This issue is that this periodic timer uses the REALTIME clock, so when time moves backwards (the NTP server jumps into the past), the timer expires late. If the timer expires late, which can be days later, the RTC will no longer be updated, which is an issue if the device is abruptly powered OFF during this period. When the device will restart (when powered ON), it will have the date prior to the ADJ_SETOFFSET call. A normal NTP server should not jump in the past like that, but it is possible... Another way of reproducing this issue is to use phc2sys to synchronize the REALTIME clock with, for example, an IRIG timecode with the source always starting at the same date (not synchronized). Also, if the time jump in the future by less than 11 minutes, the RTC may not be updated immediately (minor issue). Consider the following scenario: - Time is synchronized, and sync_hw_clock() was just called (the timer expires in 11 minutes). - A time jump is realized in the future by a couple of minutes. - The time is synchronized again. - Users may expect that RTC to be updated as soon as possible, and not after 11 minutes (for the same reason, if a power loss occurs in this period). Cancel periodic timer on any time jump (ADJ_SETOFFSET) greater than or equal to 1s. The timer will be relaunched at the end of do_adjtimex() if NTP is still considered synced. Otherwise the timer will be relaunched later when NTP is synced. This way, when the time is synchronized again, the RTC is updated after less than 2 seconds. Signed-off-by: Benjamin ROBIN Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240908140836.203911-1-dev@benjarobin.fr --- kernel/time/ntp.c | 10 +++++++++- kernel/time/ntp_internal.h | 4 ++-- kernel/time/timekeeping.c | 4 +++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8d2dd214ec68..802b336f4b8c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -660,8 +660,16 @@ rearm: sched_sync_hw_clock(offset_nsec, res != 0); } -void ntp_notify_cmos_timer(void) +void ntp_notify_cmos_timer(bool offset_set) { + /* + * If the time jumped (using ADJ_SETOFFSET) cancels sync timer, + * which may have been running if the time was synchronized + * prior to the ADJ_SETOFFSET call. + */ + if (offset_set) + hrtimer_cancel(&sync_hrtimer); + /* * When the work is currently executed but has not yet the timer * rearmed this queues the work immediately again. No big issue, diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 23d1b74c3065..5a633dce9057 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -14,9 +14,9 @@ extern int __do_adjtimex(struct __kernel_timex *txc, extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) -extern void ntp_notify_cmos_timer(void); +extern void ntp_notify_cmos_timer(bool offset_set); #else -static inline void ntp_notify_cmos_timer(void) { } +static inline void ntp_notify_cmos_timer(bool offset_set) { } #endif #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5391e4167d60..7e6f409bf311 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2553,6 +2553,7 @@ int do_adjtimex(struct __kernel_timex *txc) { struct timekeeper *tk = &tk_core.timekeeper; struct audit_ntp_data ad; + bool offset_set = false; bool clock_set = false; struct timespec64 ts; unsigned long flags; @@ -2575,6 +2576,7 @@ int do_adjtimex(struct __kernel_timex *txc) if (ret) return ret; + offset_set = delta.tv_sec != 0; audit_tk_injoffset(delta); } @@ -2608,7 +2610,7 @@ int do_adjtimex(struct __kernel_timex *txc) if (clock_set) clock_was_set(CLOCK_SET_WALL); - ntp_notify_cmos_timer(); + ntp_notify_cmos_timer(offset_set); return ret; }