linux/drivers/dma-buf/udmabuf.c

510 lines
11 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/cred.h>
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/memfd.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/shmem_fs.h>
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
#include <linux/vmalloc.h>
#include <linux/iosys-map.h>
static int list_limit = 1024;
module_param(list_limit, int, 0644);
MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024.");
static int size_limit_mb = 64;
module_param(size_limit_mb, int, 0644);
MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
struct udmabuf {
pgoff_t pagecount;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
struct folio **folios;
struct sg_table *sg;
struct miscdevice *device;
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
pgoff_t *offsets;
struct list_head unpin_list;
};
struct udmabuf_folio {
struct folio *folio;
struct list_head list;
};
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
pgoff_t pgoff = vmf->pgoff;
unsigned long pfn;
if (pgoff >= ubuf->pagecount)
return VM_FAULT_SIGBUS;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
pfn = folio_pfn(ubuf->folios[pgoff]);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
return vmf_insert_pfn(vma, vmf->address, pfn);
}
static const struct vm_operations_struct udmabuf_vm_ops = {
.fault = udmabuf_vm_fault,
};
static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
{
struct udmabuf *ubuf = buf->priv;
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
return -EINVAL;
vma->vm_ops = &udmabuf_vm_ops;
vma->vm_private_data = ubuf;
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
return 0;
}
static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
{
struct udmabuf *ubuf = buf->priv;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
struct page **pages;
void *vaddr;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
pgoff_t pg;
dma_resv_assert_held(buf->resv);
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
pages = kmalloc_array(ubuf->pagecount, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
for (pg = 0; pg < ubuf->pagecount; pg++)
pages[pg] = &ubuf->folios[pg]->page;
vaddr = vm_map_ram(pages, ubuf->pagecount, -1);
kfree(pages);
if (!vaddr)
return -EINVAL;
iosys_map_set_vaddr(map, vaddr);
return 0;
}
static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
{
struct udmabuf *ubuf = buf->priv;
dma_resv_assert_held(buf->resv);
vm_unmap_ram(map->vaddr, ubuf->pagecount);
}
static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
enum dma_data_direction direction)
{
struct udmabuf *ubuf = buf->priv;
struct sg_table *sg;
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
struct scatterlist *sgl;
unsigned int i = 0;
int ret;
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return ERR_PTR(-ENOMEM);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
if (ret < 0)
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
goto err_alloc;
for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
ubuf->offsets[i]);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
ret = dma_map_sgtable(dev, sg, direction, 0);
if (ret < 0)
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
goto err_map;
return sg;
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
err_map:
sg_free_table(sg);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
err_alloc:
kfree(sg);
return ERR_PTR(ret);
}
static void put_sg_table(struct device *dev, struct sg_table *sg,
enum dma_data_direction direction)
{
dma_unmap_sgtable(dev, sg, direction, 0);
sg_free_table(sg);
kfree(sg);
}
static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
enum dma_data_direction direction)
{
return get_sg_table(at->dev, at->dmabuf, direction);
}
static void unmap_udmabuf(struct dma_buf_attachment *at,
struct sg_table *sg,
enum dma_data_direction direction)
{
return put_sg_table(at->dev, sg, direction);
}
static void unpin_all_folios(struct list_head *unpin_list)
{
struct udmabuf_folio *ubuf_folio;
while (!list_empty(unpin_list)) {
ubuf_folio = list_first_entry(unpin_list,
struct udmabuf_folio, list);
unpin_folio(ubuf_folio->folio);
list_del(&ubuf_folio->list);
kfree(ubuf_folio);
}
}
static int add_to_unpin_list(struct list_head *unpin_list,
struct folio *folio)
{
struct udmabuf_folio *ubuf_folio;
ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
if (!ubuf_folio)
return -ENOMEM;
ubuf_folio->folio = folio;
list_add_tail(&ubuf_folio->list, unpin_list);
return 0;
}
static void release_udmabuf(struct dma_buf *buf)
{
struct udmabuf *ubuf = buf->priv;
struct device *dev = ubuf->device->this_device;
if (ubuf->sg)
put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
unpin_all_folios(&ubuf->unpin_list);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
kfree(ubuf->offsets);
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
kfree(ubuf->folios);
kfree(ubuf);
}
static int begin_cpu_udmabuf(struct dma_buf *buf,
enum dma_data_direction direction)
{
struct udmabuf *ubuf = buf->priv;
struct device *dev = ubuf->device->this_device;
udmabuf: Set ubuf->sg = NULL if the creation of sg table fails When userspace tries to map the dmabuf and if for some reason (e.g. OOM) the creation of the sg table fails, ubuf->sg needs to be set to NULL. Otherwise, when the userspace subsequently closes the dmabuf fd, we'd try to erroneously free the invalid sg table from release_udmabuf resulting in the following crash reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 3609 Comm: syz-executor487 Not tainted 5.19.0-syzkaller-13930-g7ebfc85e2cd7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 2b 04 00 00 48 8d 7d 0c 4c 8b 63 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 e2 RSP: 0018:ffffc900037efd30 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8cb67800 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff84ad27e0 RDI: 0000000000000000 RBP: fffffffffffffff4 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000008c07c R12: ffff88801fa05000 R13: ffff888073db07e8 R14: ffff888025c25440 R15: 0000000000000000 FS: 0000555555fc4300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1c0ce06e4 CR3: 00000000715e6000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> dma_buf_release+0x157/0x2d0 drivers/dma-buf/dma-buf.c:78 __dentry_kill+0x42b/0x640 fs/dcache.c:612 dentry_kill fs/dcache.c:733 [inline] dput+0x806/0xdb0 fs/dcache.c:913 __fput+0x39c/0x9d0 fs/file_table.c:333 task_work_run+0xdd/0x1a0 kernel/task_work.c:177 ptrace_notify+0x114/0x140 kernel/signal.c:2353 ptrace_report_syscall include/linux/ptrace.h:420 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:482 [inline] syscall_exit_work kernel/entry/common.c:249 [inline] syscall_exit_to_user_mode_prepare+0x129/0x280 kernel/entry/common.c:276 __syscall_exit_to_user_mode_work kernel/entry/common.c:281 [inline] syscall_exit_to_user_mode+0x9/0x50 kernel/entry/common.c:294 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc1c0c35b6b Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd78a06090 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00007fc1c0c35b6b RDX: 0000000020000280 RSI: 0000000040086200 RDI: 0000000000000006 RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000000c R13: 0000000000000003 R14: 00007fc1c0cfe4a0 R15: 00007ffd78a06140 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Reported-by: syzbot+c80e9ef5d8bb45894db0@syzkaller.appspotmail.com Cc: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20220825063522.801264-1-vivek.kasireddy@intel.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2022-08-25 06:35:22 +00:00
int ret = 0;
if (!ubuf->sg) {
ubuf->sg = get_sg_table(dev, buf, direction);
udmabuf: Set ubuf->sg = NULL if the creation of sg table fails When userspace tries to map the dmabuf and if for some reason (e.g. OOM) the creation of the sg table fails, ubuf->sg needs to be set to NULL. Otherwise, when the userspace subsequently closes the dmabuf fd, we'd try to erroneously free the invalid sg table from release_udmabuf resulting in the following crash reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 3609 Comm: syz-executor487 Not tainted 5.19.0-syzkaller-13930-g7ebfc85e2cd7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 2b 04 00 00 48 8d 7d 0c 4c 8b 63 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 e2 RSP: 0018:ffffc900037efd30 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8cb67800 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff84ad27e0 RDI: 0000000000000000 RBP: fffffffffffffff4 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000008c07c R12: ffff88801fa05000 R13: ffff888073db07e8 R14: ffff888025c25440 R15: 0000000000000000 FS: 0000555555fc4300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1c0ce06e4 CR3: 00000000715e6000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> dma_buf_release+0x157/0x2d0 drivers/dma-buf/dma-buf.c:78 __dentry_kill+0x42b/0x640 fs/dcache.c:612 dentry_kill fs/dcache.c:733 [inline] dput+0x806/0xdb0 fs/dcache.c:913 __fput+0x39c/0x9d0 fs/file_table.c:333 task_work_run+0xdd/0x1a0 kernel/task_work.c:177 ptrace_notify+0x114/0x140 kernel/signal.c:2353 ptrace_report_syscall include/linux/ptrace.h:420 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:482 [inline] syscall_exit_work kernel/entry/common.c:249 [inline] syscall_exit_to_user_mode_prepare+0x129/0x280 kernel/entry/common.c:276 __syscall_exit_to_user_mode_work kernel/entry/common.c:281 [inline] syscall_exit_to_user_mode+0x9/0x50 kernel/entry/common.c:294 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc1c0c35b6b Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd78a06090 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00007fc1c0c35b6b RDX: 0000000020000280 RSI: 0000000040086200 RDI: 0000000000000006 RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000000c R13: 0000000000000003 R14: 00007fc1c0cfe4a0 R15: 00007ffd78a06140 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Reported-by: syzbot+c80e9ef5d8bb45894db0@syzkaller.appspotmail.com Cc: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20220825063522.801264-1-vivek.kasireddy@intel.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2022-08-25 06:35:22 +00:00
if (IS_ERR(ubuf->sg)) {
ret = PTR_ERR(ubuf->sg);
ubuf->sg = NULL;
}
} else {
dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
direction);
}
udmabuf: Set ubuf->sg = NULL if the creation of sg table fails When userspace tries to map the dmabuf and if for some reason (e.g. OOM) the creation of the sg table fails, ubuf->sg needs to be set to NULL. Otherwise, when the userspace subsequently closes the dmabuf fd, we'd try to erroneously free the invalid sg table from release_udmabuf resulting in the following crash reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 3609 Comm: syz-executor487 Not tainted 5.19.0-syzkaller-13930-g7ebfc85e2cd7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Code: 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 2b 04 00 00 48 8d 7d 0c 4c 8b 63 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 e2 RSP: 0018:ffffc900037efd30 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8cb67800 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff84ad27e0 RDI: 0000000000000000 RBP: fffffffffffffff4 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 000000000008c07c R12: ffff88801fa05000 R13: ffff888073db07e8 R14: ffff888025c25440 R15: 0000000000000000 FS: 0000555555fc4300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1c0ce06e4 CR3: 00000000715e6000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> dma_buf_release+0x157/0x2d0 drivers/dma-buf/dma-buf.c:78 __dentry_kill+0x42b/0x640 fs/dcache.c:612 dentry_kill fs/dcache.c:733 [inline] dput+0x806/0xdb0 fs/dcache.c:913 __fput+0x39c/0x9d0 fs/file_table.c:333 task_work_run+0xdd/0x1a0 kernel/task_work.c:177 ptrace_notify+0x114/0x140 kernel/signal.c:2353 ptrace_report_syscall include/linux/ptrace.h:420 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:482 [inline] syscall_exit_work kernel/entry/common.c:249 [inline] syscall_exit_to_user_mode_prepare+0x129/0x280 kernel/entry/common.c:276 __syscall_exit_to_user_mode_work kernel/entry/common.c:281 [inline] syscall_exit_to_user_mode+0x9/0x50 kernel/entry/common.c:294 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc1c0c35b6b Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd78a06090 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00007fc1c0c35b6b RDX: 0000000020000280 RSI: 0000000040086200 RDI: 0000000000000006 RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000000c R13: 0000000000000003 R14: 00007fc1c0cfe4a0 R15: 00007ffd78a06140 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:dma_unmap_sgtable include/linux/dma-mapping.h:378 [inline] RIP: 0010:put_sg_table drivers/dma-buf/udmabuf.c:89 [inline] RIP: 0010:release_udmabuf+0xcb/0x4f0 drivers/dma-buf/udmabuf.c:114 Reported-by: syzbot+c80e9ef5d8bb45894db0@syzkaller.appspotmail.com Cc: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20220825063522.801264-1-vivek.kasireddy@intel.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2022-08-25 06:35:22 +00:00
return ret;
}
static int end_cpu_udmabuf(struct dma_buf *buf,
enum dma_data_direction direction)
{
struct udmabuf *ubuf = buf->priv;
struct device *dev = ubuf->device->this_device;
if (!ubuf->sg)
return -EINVAL;
dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
return 0;
}
static const struct dma_buf_ops udmabuf_ops = {
.cache_sgt_mapping = true,
.map_dma_buf = map_udmabuf,
.unmap_dma_buf = unmap_udmabuf,
.release = release_udmabuf,
.mmap = mmap_udmabuf,
.vmap = vmap_udmabuf,
.vunmap = vunmap_udmabuf,
.begin_cpu_access = begin_cpu_udmabuf,
.end_cpu_access = end_cpu_udmabuf,
};
#define SEALS_WANTED (F_SEAL_SHRINK)
#define SEALS_DENIED (F_SEAL_WRITE)
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
static int check_memfd_seals(struct file *memfd)
{
int seals;
if (!memfd)
return -EBADFD;
if (!shmem_file(memfd) && !is_file_hugepages(memfd))
return -EBADFD;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
return -EBADFD;
if ((seals & SEALS_WANTED) != SEALS_WANTED ||
(seals & SEALS_DENIED) != 0)
return -EINVAL;
return 0;
}
static int export_udmabuf(struct udmabuf *ubuf,
struct miscdevice *device,
u32 flags)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct dma_buf *buf;
ubuf->device = device;
exp_info.ops = &udmabuf_ops;
exp_info.size = ubuf->pagecount << PAGE_SHIFT;
exp_info.priv = ubuf;
exp_info.flags = O_RDWR;
buf = dma_buf_export(&exp_info);
if (IS_ERR(buf))
return PTR_ERR(buf);
return dma_buf_fd(buf, flags);
}
static long udmabuf_create(struct miscdevice *device,
struct udmabuf_create_list *head,
struct udmabuf_create_item *list)
{
pgoff_t pgoff, pgcnt, pglimit, pgbuf = 0;
long nr_folios, ret = -EINVAL;
struct file *memfd = NULL;
struct folio **folios;
struct udmabuf *ubuf;
u32 i, j, k, flags;
loff_t end;
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
if (!ubuf)
return -ENOMEM;
INIT_LIST_HEAD(&ubuf->unpin_list);
pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
for (i = 0; i < head->count; i++) {
if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
goto err;
if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
goto err;
ubuf->pagecount += list[i].size >> PAGE_SHIFT;
if (ubuf->pagecount > pglimit)
goto err;
}
if (!ubuf->pagecount)
goto err;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
ubuf->folios = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->folios),
GFP_KERNEL);
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
if (!ubuf->folios) {
ret = -ENOMEM;
goto err;
}
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
ubuf->offsets = kcalloc(ubuf->pagecount, sizeof(*ubuf->offsets),
GFP_KERNEL);
if (!ubuf->offsets) {
ret = -ENOMEM;
goto err;
}
pgbuf = 0;
for (i = 0; i < head->count; i++) {
memfd = fget(list[i].memfd);
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
ret = check_memfd_seals(memfd);
if (ret < 0)
goto err;
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
pgcnt = list[i].size >> PAGE_SHIFT;
folios = kmalloc_array(pgcnt, sizeof(*folios), GFP_KERNEL);
if (!folios) {
ret = -ENOMEM;
goto err;
}
end = list[i].offset + (pgcnt << PAGE_SHIFT) - 1;
ret = memfd_pin_folios(memfd, list[i].offset, end,
folios, pgcnt, &pgoff);
if (ret <= 0) {
kfree(folios);
if (!ret)
ret = -EINVAL;
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
goto err;
}
nr_folios = ret;
pgoff >>= PAGE_SHIFT;
for (j = 0, k = 0; j < pgcnt; j++) {
ubuf->folios[pgbuf] = folios[k];
ubuf->offsets[pgbuf] = pgoff << PAGE_SHIFT;
if (j == 0 || ubuf->folios[pgbuf-1] != folios[k]) {
ret = add_to_unpin_list(&ubuf->unpin_list,
folios[k]);
if (ret < 0) {
kfree(folios);
goto err;
}
}
pgbuf++;
if (++pgoff == folio_nr_pages(folios[k])) {
pgoff = 0;
if (++k == nr_folios)
break;
}
}
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
kfree(folios);
fput(memfd);
memfd = NULL;
}
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
ret = export_udmabuf(ubuf, device, flags);
if (ret < 0)
goto err;
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
return ret;
err:
if (memfd)
fput(memfd);
unpin_all_folios(&ubuf->unpin_list);
udmabuf: add back support for mapping hugetlb pages A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
kfree(ubuf->offsets);
udmabuf: convert udmabuf driver to use folios This is mainly a preparatory patch to use memfd_pin_folios() API for pinning folios. Using folios instead of pages makes sense as the udmabuf driver needs to handle both shmem and hugetlb cases. And, using the memfd_pin_folios() API makes this easier as we no longer need to separately handle shmem vs hugetlb cases in the udmabuf driver. Note that, the function vmap_udmabuf() still needs a list of pages; so, we collect all the head pages into a local array in this case. Other changes in this patch include the addition of helpers for checking the memfd seals and exporting dmabuf. Moving code from udmabuf_create() into these helpers improves readability given that udmabuf_create() is a bit long. Link: https://lkml.kernel.org/r/20240624063952.1572359-8-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:15 +00:00
kfree(ubuf->folios);
kfree(ubuf);
return ret;
}
static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
{
struct udmabuf_create create;
struct udmabuf_create_list head;
struct udmabuf_create_item list;
if (copy_from_user(&create, (void __user *)arg,
sizeof(create)))
return -EFAULT;
head.flags = create.flags;
head.count = 1;
list.memfd = create.memfd;
list.offset = create.offset;
list.size = create.size;
return udmabuf_create(filp->private_data, &head, &list);
}
static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
{
struct udmabuf_create_list head;
struct udmabuf_create_item *list;
int ret = -EINVAL;
u32 lsize;
if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
return -EFAULT;
if (head.count > list_limit)
return -EINVAL;
lsize = sizeof(struct udmabuf_create_item) * head.count;
list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
if (IS_ERR(list))
return PTR_ERR(list);
ret = udmabuf_create(filp->private_data, &head, list);
kfree(list);
return ret;
}
static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
long ret;
switch (ioctl) {
case UDMABUF_CREATE:
ret = udmabuf_ioctl_create(filp, arg);
break;
case UDMABUF_CREATE_LIST:
ret = udmabuf_ioctl_create_list(filp, arg);
break;
default:
ret = -ENOTTY;
break;
}
return ret;
}
static const struct file_operations udmabuf_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = udmabuf_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = udmabuf_ioctl,
#endif
};
static struct miscdevice udmabuf_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "udmabuf",
.fops = &udmabuf_fops,
};
static int __init udmabuf_dev_init(void)
{
udmabuf: Set the DMA mask for the udmabuf device (v2) If the DMA mask is not set explicitly, the following warning occurs when the userspace tries to access the dma-buf via the CPU as reported by syzbot here: WARNING: CPU: 1 PID: 3595 at kernel/dma/mapping.c:188 __dma_map_sg_attrs+0x181/0x1f0 kernel/dma/mapping.c:188 Modules linked in: CPU: 0 PID: 3595 Comm: syz-executor249 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__dma_map_sg_attrs+0x181/0x1f0 kernel/dma/mapping.c:188 Code: 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 75 71 4c 8b 3d c0 83 b5 0d e9 db fe ff ff e8 b6 0f 13 00 0f 0b e8 af 0f 13 00 <0f> 0b 45 31 e4 e9 54 ff ff ff e8 a0 0f 13 00 49 8d 7f 50 48 b8 00 RSP: 0018:ffffc90002a07d68 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88807e25e2c0 RSI: ffffffff81649e91 RDI: ffff88801b848408 RBP: ffff88801b848000 R08: 0000000000000002 R09: ffff88801d86c74f R10: ffffffff81649d72 R11: 0000000000000001 R12: 0000000000000002 R13: ffff88801d86c680 R14: 0000000000000001 R15: 0000000000000000 FS: 0000555556e30300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200000cc CR3: 000000001d74a000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> dma_map_sgtable+0x70/0xf0 kernel/dma/mapping.c:264 get_sg_table.isra.0+0xe0/0x160 drivers/dma-buf/udmabuf.c:72 begin_cpu_udmabuf+0x130/0x1d0 drivers/dma-buf/udmabuf.c:126 dma_buf_begin_cpu_access+0xfd/0x1d0 drivers/dma-buf/dma-buf.c:1164 dma_buf_ioctl+0x259/0x2b0 drivers/dma-buf/dma-buf.c:363 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f62fcf530f9 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe3edab9b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f62fcf530f9 RDX: 0000000020000200 RSI: 0000000040086200 RDI: 0000000000000006 RBP: 00007f62fcf170e0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f62fcf17170 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 </TASK> v2: Dont't forget to deregister if DMA mask setup fails. Reported-by: syzbot+10e27961f4da37c443b2@syzkaller.appspotmail.com Cc: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20220520205235.3687336-1-vivek.kasireddy@intel.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2022-05-20 20:52:35 +00:00
int ret;
ret = misc_register(&udmabuf_misc);
if (ret < 0) {
pr_err("Could not initialize udmabuf device\n");
return ret;
}
ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
DMA_BIT_MASK(64));
if (ret < 0) {
pr_err("Could not setup DMA mask for udmabuf device\n");
misc_deregister(&udmabuf_misc);
return ret;
}
return 0;
}
static void __exit udmabuf_dev_exit(void)
{
misc_deregister(&udmabuf_misc);
}
module_init(udmabuf_dev_init)
module_exit(udmabuf_dev_exit)
MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");