signalfd: add support for SFD_TASK
From: | Jens Axboe <axboe-AT-kernel.dk> | |
To: | io-uring <io-uring-AT-vger.kernel.org> | |
Subject: | [PATCH RFC] signalfd: add support for SFD_TASK | |
Date: | Tue, 26 Nov 2019 22:11:38 -0700 | |
Message-ID: | <254505c9-2b76-ebeb-306c-02aaf1704b88@kernel.dk> | |
Cc: | Jann Horn <jannh-AT-google.com>, "linux-kernel-AT-vger.kernel.org" <linux-kernel-AT-vger.kernel.org>, linux-fsdevel <linux-fsdevel-AT-vger.kernel.org> | |
Archive-link: | Article |
I posted this a few weeks back, took another look at it and refined it a bit. I'd like some input on the viability of this approach. A new signalfd setup flag is added, SFD_TASK. This is only valid if used with SFD_CLOEXEC. If set, the task setting up the signalfd descriptor is remembered in the signalfd context, and will be the one we use for checking signals in the poll/read handlers in signalfd. This is needed to make signalfd useful with io_uring and aio, of which the former in particular has my interest. I _think_ this is sane. To prevent the case of a task clearing O_CLOEXEC on the signalfd descriptor, forking, and then exiting, we grab a reference to the task when we assign it. If that original task exits, we catch it in signalfd_flush() and ensure waiters are woken up. The waiters also hold a task reference, so we don't have to wait for them to go away. Need to double check we can't race between original task exiting and new task grabbing a reference. I don't think this is solid in the version below. Probably need to add a refcount for ctx->task (the pointer, not the task) for that. Comments? Attaching two test programs using io_uring, one using poll and the other read. Remove SFD_TASK from either of them, and they will fail ala: ./signalfd-read Timed out waiting for cqe and with SFD_TASK set, both will exit silent with a value of 0. You need liburing installed, then compile them with: gcc -Wall -O2 -o signalfd-read signalfd-read.c -luring --- diff --git a/fs/signalfd.c b/fs/signalfd.c index 44b6845b071c..4bbdab9438c1 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -50,28 +50,62 @@ void signalfd_cleanup(struct sighand_struct *sighand) struct signalfd_ctx { sigset_t sigmask; + struct task_struct *task; }; +static int signalfd_flush(struct file *file, void *data) +{ + struct signalfd_ctx *ctx = file->private_data; + struct task_struct *tsk = ctx->task; + + if (tsk == current) { + ctx->task = NULL; + wake_up(&tsk->sighand->signalfd_wqh); + put_task_struct(tsk); + } + + return 0; +} + static int signalfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct signalfd_ctx *ctx = file->private_data; + + if (ctx->task) + put_task_struct(ctx->task); + kfree(ctx); return 0; } +static void signalfd_put_task(struct task_struct *tsk) +{ + put_task_struct(tsk); +} + +static struct task_struct *signalfd_get_task(struct signalfd_ctx *ctx) +{ + struct task_struct *tsk = ctx->task ?: current; + + get_task_struct(tsk); + return tsk; +} + static __poll_t signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; + struct task_struct *tsk = signalfd_get_task(ctx); __poll_t events = 0; - poll_wait(file, ¤t->sighand->signalfd_wqh, wait); + poll_wait(file, &tsk->sighand->signalfd_wqh, wait); - spin_lock_irq(¤t->sighand->siglock); - if (next_signal(¤t->pending, &ctx->sigmask) || - next_signal(¤t->signal->shared_pending, + spin_lock_irq(&tsk->sighand->siglock); + if (next_signal(&tsk->pending, &ctx->sigmask) || + next_signal(&tsk->signal->shared_pending, &ctx->sigmask)) events |= EPOLLIN; - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); + signalfd_put_task(tsk); return events; } @@ -167,10 +201,11 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info int nonblock) { ssize_t ret; + struct task_struct *tsk = signalfd_get_task(ctx); DECLARE_WAITQUEUE(wait, current); - spin_lock_irq(¤t->sighand->siglock); - ret = dequeue_signal(current, &ctx->sigmask, info); + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &ctx->sigmask, info); switch (ret) { case 0: if (!nonblock) @@ -178,29 +213,35 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info ret = -EAGAIN; /* fall through */ default: - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); + signalfd_put_task(tsk); return ret; } - add_wait_queue(¤t->sighand->signalfd_wqh, &wait); + add_wait_queue(&tsk->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(current, &ctx->sigmask, info); + ret = dequeue_signal(tsk, &ctx->sigmask, info); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); schedule(); - spin_lock_irq(¤t->sighand->siglock); + spin_lock_irq(&tsk->sighand->siglock); + if (tsk != current && !ctx->task) { + ret = -ESRCH; + break; + } } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); - remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); + remove_wait_queue(&tsk->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); + signalfd_put_task(tsk); return ret; } @@ -254,6 +295,7 @@ static const struct file_operations signalfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = signalfd_show_fdinfo, #endif + .flush = signalfd_flush, .release = signalfd_release, .poll = signalfd_poll, .read = signalfd_read, @@ -267,19 +309,26 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) /* Check the SFD_* constants for consistency. */ BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); + BUILD_BUG_ON(SFD_TASK & (SFD_CLOEXEC | SFD_NONBLOCK)); - if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) + if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_TASK)) + return -EINVAL; + if ((flags & (SFD_CLOEXEC | SFD_TASK)) == SFD_TASK) return -EINVAL; sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); signotset(mask); if (ufd == -1) { - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; + if (flags & SFD_TASK) { + ctx->task = current; + get_task_struct(ctx->task); + } /* * When we call this, the initialization must be complete, since @@ -290,6 +339,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) if (ufd < 0) kfree(ctx); } else { + struct task_struct *tsk; struct fd f = fdget(ufd); if (!f.file) return -EBADF; @@ -298,11 +348,13 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) fdput(f); return -EINVAL; } - spin_lock_irq(¤t->sighand->siglock); + tsk = signalfd_get_task(ctx); + spin_lock_irq(&tsk->sighand->siglock); ctx->sigmask = *mask; - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); - wake_up(¤t->sighand->signalfd_wqh); + wake_up(&tsk->sighand->signalfd_wqh); + signalfd_put_task(tsk); fdput(f); } diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 83429a05b698..064c5dc3eb99 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -16,6 +16,7 @@ /* Flags for signalfd4. */ #define SFD_CLOEXEC O_CLOEXEC #define SFD_NONBLOCK O_NONBLOCK +#define SFD_TASK 00000001 struct signalfd_siginfo { __u32 ssi_signo; -- Jens Axboe #include <unistd.h> #include <sys/signalfd.h> #include <sys/poll.h> #include <sys/time.h> #include <errno.h> #include <stdio.h> #include <string.h> #include <liburing.h> #define SFD_TASK 00000001 int main(int argc, char *argv[]) { struct __kernel_timespec ts; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct itimerval itv; sigset_t mask; int sfd, ret; sigemptyset(&mask); sigaddset(&mask, SIGALRM); sigprocmask(SIG_BLOCK, &mask, NULL); sfd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_TASK); if (sfd < 0) { if (errno == EINVAL) { printf("Not supported\n"); return 0; } perror("signalfd"); return 1; } memset(&itv, 0, sizeof(itv)); itv.it_value.tv_sec = 0; itv.it_value.tv_usec = 100000; setitimer(ITIMER_REAL, &itv, NULL); io_uring_queue_init(32, &ring, 0); sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, sfd, POLLIN); io_uring_submit(&ring); ts.tv_sec = 1; ts.tv_nsec = 0; ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts); if (ret < 0) { fprintf(stderr, "Timed out waiting for cqe\n"); ret = 1; } else { if (cqe->res < 0) { fprintf(stderr, "cqe failed with %d\n", cqe->res); ret = 1; } else if (!(cqe->res & POLLIN)) { fprintf(stderr, "POLLIN not set in result mask?\n"); ret = 1; } else { ret = 0; } } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); close(sfd); return ret; } #include <unistd.h> #include <sys/signalfd.h> #include <sys/poll.h> #include <sys/time.h> #include <errno.h> #include <stdio.h> #include <string.h> #include <liburing.h> #define SFD_TASK 00000001 int main(int argc, char *argv[]) { struct __kernel_timespec ts; struct signalfd_siginfo si; struct iovec iov = { .iov_base = &si, .iov_len = sizeof(si), }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct itimerval itv; sigset_t mask; int sfd, ret; sigemptyset(&mask); sigaddset(&mask, SIGALRM); sigprocmask(SIG_BLOCK, &mask, NULL); sfd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_TASK); if (sfd < 0) { if (errno == EINVAL) { printf("Not supported\n"); return 0; } perror("signalfd"); return 1; } memset(&itv, 0, sizeof(itv)); itv.it_value.tv_sec = 0; itv.it_value.tv_usec = 100000; setitimer(ITIMER_REAL, &itv, NULL); io_uring_queue_init(32, &ring, 0); sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, sfd, &iov, 1, 0); io_uring_submit(&ring); ts.tv_sec = 1; ts.tv_nsec = 0; ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts); if (ret < 0) { fprintf(stderr, "Timed out waiting for cqe\n"); ret = 1; } else { ret = 0; if (cqe->res < 0) { fprintf(stderr, "cqe failed with %d\n", cqe->res); ret = 1; } else if (cqe->res != sizeof(si)) { fprintf(stderr, "Read %d, wanted %d\n", cqe->res, (int)sizeof(si)); ret = 1; } } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); close(sfd); return ret; }