linux/drivers/ptp/ptp_clock.c
Vladis Dronov a33121e548 ptp: fix the race between the release of ptp_clock and cdev
In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
device is removed, closing this file leads to a race. This reproduces
easily in a kvm virtual machine:

ts# cat openptp0.c
int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
ts# uname -r
5.5.0-rc3-46cf053e
ts# cat /proc/cmdline
... slub_debug=FZP
ts# modprobe ptp_kvm
ts# ./openptp0 &
[1] 670
opened /dev/ptp0, sleeping 10s...
ts# rmmod ptp_kvm
ts# ls /dev/ptp*
ls: cannot access '/dev/ptp*': No such file or directory
ts# ...woken up
[   48.010809] general protection fault: 0000 [#1] SMP
[   48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
[   48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
[   48.016270] RIP: 0010:module_put.part.0+0x7/0x80
[   48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
[   48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
[   48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
[   48.019470] ...                                              ^^^ a slub poison
[   48.023854] Call Trace:
[   48.024050]  __fput+0x21f/0x240
[   48.024288]  task_work_run+0x79/0x90
[   48.024555]  do_exit+0x2af/0xab0
[   48.024799]  ? vfs_write+0x16a/0x190
[   48.025082]  do_group_exit+0x35/0x90
[   48.025387]  __x64_sys_exit_group+0xf/0x10
[   48.025737]  do_syscall_64+0x3d/0x130
[   48.026056]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   48.026479] RIP: 0033:0x7f53b12082f6
[   48.026792] ...
[   48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
[   48.045001] Fixing recursive fault but reboot is needed!

This happens in:

static void __fput(struct file *file)
{   ...
    if (file->f_op->release)
        file->f_op->release(inode, file); <<< cdev is kfree'd here
    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
             !(mode & FMODE_PATH))) {
        cdev_put(inode->i_cdev); <<< cdev fields are accessed here

Namely:

__fput()
  posix_clock_release()
    kref_put(&clk->kref, delete_clock) <<< the last reference
      delete_clock()
        delete_ptp_clock()
          kfree(ptp) <<< cdev is embedded in ptp
  cdev_put
    module_put(p->owner) <<< *p is kfree'd, bang!

Here cdev is embedded in posix_clock which is embedded in ptp_clock.
The race happens because ptp_clock's lifetime is controlled by two
refcounts: kref and cdev.kobj in posix_clock. This is wrong.

Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
created especially for such cases. This way the parent device with its
ptp_clock is not released until all references to the cdev are released.
This adds a requirement that an initialized but not exposed struct
device should be provided to posix_clock_register() by a caller instead
of a simple dev_t.

This approach was adopted from the commit 72139dfa24 ("watchdog: Fix
the race between the release of watchdog_core_data and cdev"). See
details of the implementation in the commit 233ed09d7f ("chardev: add
helper function to register char devs with a struct device").

Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-30 20:19:27 -08:00

411 lines
9.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PTP 1588 clock support
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
#include <linux/idr.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/posix-clock.h>
#include <linux/pps_kernel.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <uapi/linux/sched/types.h>
#include "ptp_private.h"
#define PTP_MAX_ALARMS 4
#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
#define PTP_PPS_EVENT PPS_CAPTUREASSERT
#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
/* private globals */
static dev_t ptp_devt;
static struct class *ptp_class;
static DEFINE_IDA(ptp_clocks_map);
/* time stamp event queue operations */
static inline int queue_free(struct timestamp_event_queue *q)
{
return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
}
static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
struct ptp_clock_event *src)
{
struct ptp_extts_event *dst;
unsigned long flags;
s64 seconds;
u32 remainder;
seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
spin_lock_irqsave(&queue->lock, flags);
dst = &queue->buf[queue->tail];
dst->index = src->index;
dst->t.sec = seconds;
dst->t.nsec = remainder;
if (!queue_free(queue))
queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
spin_unlock_irqrestore(&queue->lock, flags);
}
s32 scaled_ppm_to_ppb(long ppm)
{
/*
* The 'freq' field in the 'struct timex' is in parts per
* million, but with a 16 bit binary fractional field.
*
* We want to calculate
*
* ppb = scaled_ppm * 1000 / 2^16
*
* which simplifies to
*
* ppb = scaled_ppm * 125 / 2^13
*/
s64 ppb = 1 + ppm;
ppb *= 125;
ppb >>= 13;
return (s32) ppb;
}
EXPORT_SYMBOL(scaled_ppm_to_ppb);
/* posix clock implementation */
static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
{
tp->tv_sec = 0;
tp->tv_nsec = 1;
return 0;
}
static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
return ptp->info->settime64(ptp->info, tp);
}
static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
int err;
if (ptp->info->gettimex64)
err = ptp->info->gettimex64(ptp->info, tp, NULL);
else
err = ptp->info->gettime64(ptp->info, tp);
return err;
}
static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
{
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
struct ptp_clock_info *ops;
int err = -EOPNOTSUPP;
ops = ptp->info;
if (tx->modes & ADJ_SETOFFSET) {
struct timespec64 ts;
ktime_t kt;
s64 delta;
ts.tv_sec = tx->time.tv_sec;
ts.tv_nsec = tx->time.tv_usec;
if (!(tx->modes & ADJ_NANO))
ts.tv_nsec *= 1000;
if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
kt = timespec64_to_ktime(ts);
delta = ktime_to_ns(kt);
err = ops->adjtime(ops, delta);
} else if (tx->modes & ADJ_FREQUENCY) {
s32 ppb = scaled_ppm_to_ppb(tx->freq);
if (ppb > ops->max_adj || ppb < -ops->max_adj)
return -ERANGE;
if (ops->adjfine)
err = ops->adjfine(ops, tx->freq);
else
err = ops->adjfreq(ops, ppb);
ptp->dialed_frequency = tx->freq;
} else if (tx->modes == 0) {
tx->freq = ptp->dialed_frequency;
err = 0;
}
return err;
}
static struct posix_clock_operations ptp_clock_ops = {
.owner = THIS_MODULE,
.clock_adjtime = ptp_clock_adjtime,
.clock_gettime = ptp_clock_gettime,
.clock_getres = ptp_clock_getres,
.clock_settime = ptp_clock_settime,
.ioctl = ptp_ioctl,
.open = ptp_open,
.poll = ptp_poll,
.read = ptp_read,
};
static void ptp_clock_release(struct device *dev)
{
struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
mutex_destroy(&ptp->tsevq_mux);
mutex_destroy(&ptp->pincfg_mux);
ida_simple_remove(&ptp_clocks_map, ptp->index);
kfree(ptp);
}
static void ptp_aux_kworker(struct kthread_work *work)
{
struct ptp_clock *ptp = container_of(work, struct ptp_clock,
aux_work.work);
struct ptp_clock_info *info = ptp->info;
long delay;
delay = info->do_aux_work(info);
if (delay >= 0)
kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
}
/* public interface */
struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
struct device *parent)
{
struct ptp_clock *ptp;
int err = 0, index, major = MAJOR(ptp_devt);
if (info->n_alarm > PTP_MAX_ALARMS)
return ERR_PTR(-EINVAL);
/* Initialize a clock structure. */
err = -ENOMEM;
ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
if (ptp == NULL)
goto no_memory;
index = ida_simple_get(&ptp_clocks_map, 0, MINORMASK + 1, GFP_KERNEL);
if (index < 0) {
err = index;
goto no_slot;
}
ptp->clock.ops = ptp_clock_ops;
ptp->info = info;
ptp->devid = MKDEV(major, index);
ptp->index = index;
spin_lock_init(&ptp->tsevq.lock);
mutex_init(&ptp->tsevq_mux);
mutex_init(&ptp->pincfg_mux);
init_waitqueue_head(&ptp->tsev_wq);
if (ptp->info->do_aux_work) {
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index);
if (IS_ERR(ptp->kworker)) {
err = PTR_ERR(ptp->kworker);
pr_err("failed to create ptp aux_worker %d\n", err);
goto kworker_err;
}
}
err = ptp_populate_pin_groups(ptp);
if (err)
goto no_pin_groups;
/* Register a new PPS source. */
if (info->pps) {
struct pps_source_info pps;
memset(&pps, 0, sizeof(pps));
snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
pps.mode = PTP_PPS_MODE;
pps.owner = info->owner;
ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
if (IS_ERR(ptp->pps_source)) {
err = PTR_ERR(ptp->pps_source);
pr_err("failed to register pps source\n");
goto no_pps;
}
}
/* Initialize a new device of our class in our clock structure. */
device_initialize(&ptp->dev);
ptp->dev.devt = ptp->devid;
ptp->dev.class = ptp_class;
ptp->dev.parent = parent;
ptp->dev.groups = ptp->pin_attr_groups;
ptp->dev.release = ptp_clock_release;
dev_set_drvdata(&ptp->dev, ptp);
dev_set_name(&ptp->dev, "ptp%d", ptp->index);
/* Create a posix clock and link it to the device. */
err = posix_clock_register(&ptp->clock, &ptp->dev);
if (err) {
pr_err("failed to create posix clock\n");
goto no_clock;
}
return ptp;
no_clock:
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
no_pps:
ptp_cleanup_pin_groups(ptp);
no_pin_groups:
if (ptp->kworker)
kthread_destroy_worker(ptp->kworker);
kworker_err:
mutex_destroy(&ptp->tsevq_mux);
mutex_destroy(&ptp->pincfg_mux);
ida_simple_remove(&ptp_clocks_map, index);
no_slot:
kfree(ptp);
no_memory:
return ERR_PTR(err);
}
EXPORT_SYMBOL(ptp_clock_register);
int ptp_clock_unregister(struct ptp_clock *ptp)
{
ptp->defunct = 1;
wake_up_interruptible(&ptp->tsev_wq);
if (ptp->kworker) {
kthread_cancel_delayed_work_sync(&ptp->aux_work);
kthread_destroy_worker(ptp->kworker);
}
/* Release the clock's resources. */
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
ptp_cleanup_pin_groups(ptp);
posix_clock_unregister(&ptp->clock);
return 0;
}
EXPORT_SYMBOL(ptp_clock_unregister);
void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
{
struct pps_event_time evt;
switch (event->type) {
case PTP_CLOCK_ALARM:
break;
case PTP_CLOCK_EXTTS:
enqueue_external_timestamp(&ptp->tsevq, event);
wake_up_interruptible(&ptp->tsev_wq);
break;
case PTP_CLOCK_PPS:
pps_get_ts(&evt);
pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
break;
case PTP_CLOCK_PPSUSR:
pps_event(ptp->pps_source, &event->pps_times,
PTP_PPS_EVENT, NULL);
break;
}
}
EXPORT_SYMBOL(ptp_clock_event);
int ptp_clock_index(struct ptp_clock *ptp)
{
return ptp->index;
}
EXPORT_SYMBOL(ptp_clock_index);
int ptp_find_pin(struct ptp_clock *ptp,
enum ptp_pin_function func, unsigned int chan)
{
struct ptp_pin_desc *pin = NULL;
int i;
mutex_lock(&ptp->pincfg_mux);
for (i = 0; i < ptp->info->n_pins; i++) {
if (ptp->info->pin_config[i].func == func &&
ptp->info->pin_config[i].chan == chan) {
pin = &ptp->info->pin_config[i];
break;
}
}
mutex_unlock(&ptp->pincfg_mux);
return pin ? i : -1;
}
EXPORT_SYMBOL(ptp_find_pin);
int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
{
return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
}
EXPORT_SYMBOL(ptp_schedule_worker);
/* module operations */
static void __exit ptp_exit(void)
{
class_destroy(ptp_class);
unregister_chrdev_region(ptp_devt, MINORMASK + 1);
ida_destroy(&ptp_clocks_map);
}
static int __init ptp_init(void)
{
int err;
ptp_class = class_create(THIS_MODULE, "ptp");
if (IS_ERR(ptp_class)) {
pr_err("ptp: failed to allocate class\n");
return PTR_ERR(ptp_class);
}
err = alloc_chrdev_region(&ptp_devt, 0, MINORMASK + 1, "ptp");
if (err < 0) {
pr_err("ptp: failed to allocate device region\n");
goto no_region;
}
ptp_class->dev_groups = ptp_groups;
pr_info("PTP clock support registered\n");
return 0;
no_region:
class_destroy(ptp_class);
return err;
}
subsys_initcall(ptp_init);
module_exit(ptp_exit);
MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
MODULE_DESCRIPTION("PTP clocks support");
MODULE_LICENSE("GPL");