2018-08-27 09:34:44 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/cred.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/dma-buf.h>
|
2022-11-10 20:13:46 +00:00
|
|
|
#include <linux/dma-resv.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/highmem.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kernel.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/memfd.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/miscdevice.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/shmem_fs.h>
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
#include <linux/hugetlb.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/udmabuf.h>
|
2022-11-17 17:18:09 +00:00
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/iosys-map.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2021-06-11 21:21:07 +00:00
|
|
|
static int list_limit = 1024;
|
|
|
|
module_param(list_limit, int, 0644);
|
|
|
|
MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024.");
|
|
|
|
|
|
|
|
static int size_limit_mb = 64;
|
|
|
|
module_param(size_limit_mb, int, 0644);
|
|
|
|
MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
|
2018-09-11 13:42:10 +00:00
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
struct udmabuf {
|
2018-09-11 13:42:06 +00:00
|
|
|
pgoff_t pagecount;
|
2024-06-24 06:36:15 +00:00
|
|
|
struct folio **folios;
|
2019-12-03 01:36:27 +00:00
|
|
|
struct sg_table *sg;
|
2019-12-03 01:36:25 +00:00
|
|
|
struct miscdevice *device;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
pgoff_t *offsets;
|
2024-06-24 06:36:16 +00:00
|
|
|
struct list_head unpin_list;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct udmabuf_folio {
|
|
|
|
struct folio *folio;
|
|
|
|
struct list_head list;
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
2019-01-03 23:26:34 +00:00
|
|
|
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
|
|
|
struct vm_area_struct *vma = vmf->vma;
|
|
|
|
struct udmabuf *ubuf = vma->vm_private_data;
|
2022-06-20 07:15:47 +00:00
|
|
|
pgoff_t pgoff = vmf->pgoff;
|
2024-06-24 06:36:13 +00:00
|
|
|
unsigned long pfn;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2022-06-20 07:15:47 +00:00
|
|
|
if (pgoff >= ubuf->pagecount)
|
|
|
|
return VM_FAULT_SIGBUS;
|
2024-06-24 06:36:13 +00:00
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
pfn = folio_pfn(ubuf->folios[pgoff]);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
|
|
|
|
|
2024-06-24 06:36:13 +00:00
|
|
|
return vmf_insert_pfn(vma, vmf->address, pfn);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct udmabuf_vm_ops = {
|
|
|
|
.fault = udmabuf_vm_fault,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
|
|
|
|
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
vma->vm_ops = &udmabuf_vm_ops;
|
|
|
|
vma->vm_private_data = ubuf;
|
2024-06-24 06:36:13 +00:00
|
|
|
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
|
2018-08-27 09:34:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-17 17:18:09 +00:00
|
|
|
static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2024-06-24 06:36:15 +00:00
|
|
|
struct page **pages;
|
2022-11-17 17:18:09 +00:00
|
|
|
void *vaddr;
|
2024-06-24 06:36:15 +00:00
|
|
|
pgoff_t pg;
|
2022-11-17 17:18:09 +00:00
|
|
|
|
|
|
|
dma_resv_assert_held(buf->resv);
|
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
pages = kmalloc_array(ubuf->pagecount, sizeof(*pages), GFP_KERNEL);
|
|
|
|
if (!pages)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
for (pg = 0; pg < ubuf->pagecount; pg++)
|
|
|
|
pages[pg] = &ubuf->folios[pg]->page;
|
|
|
|
|
|
|
|
vaddr = vm_map_ram(pages, ubuf->pagecount, -1);
|
|
|
|
kfree(pages);
|
2022-11-17 17:18:09 +00:00
|
|
|
if (!vaddr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
iosys_map_set_vaddr(map, vaddr);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
|
|
|
|
dma_resv_assert_held(buf->resv);
|
|
|
|
|
|
|
|
vm_unmap_ram(map->vaddr, ubuf->pagecount);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:26 +00:00
|
|
|
static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
2019-12-03 01:36:26 +00:00
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2018-08-27 09:34:44 +00:00
|
|
|
struct sg_table *sg;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
struct scatterlist *sgl;
|
|
|
|
unsigned int i = 0;
|
2018-09-11 13:42:05 +00:00
|
|
|
int ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
|
|
|
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
|
|
|
|
if (!sg)
|
2018-09-11 13:42:05 +00:00
|
|
|
return ERR_PTR(-ENOMEM);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
|
|
|
ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
|
2018-09-11 13:42:05 +00:00
|
|
|
if (ret < 0)
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
goto err_alloc;
|
|
|
|
|
|
|
|
for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
|
2024-06-24 06:36:15 +00:00
|
|
|
sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
|
|
|
|
ubuf->offsets[i]);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
2020-04-06 14:41:45 +00:00
|
|
|
ret = dma_map_sgtable(dev, sg, direction, 0);
|
|
|
|
if (ret < 0)
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
goto err_map;
|
2018-08-27 09:34:44 +00:00
|
|
|
return sg;
|
|
|
|
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
err_map:
|
2018-08-27 09:34:44 +00:00
|
|
|
sg_free_table(sg);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
err_alloc:
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(sg);
|
2018-09-11 13:42:05 +00:00
|
|
|
return ERR_PTR(ret);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:26 +00:00
|
|
|
static void put_sg_table(struct device *dev, struct sg_table *sg,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
2020-04-06 14:41:45 +00:00
|
|
|
dma_unmap_sgtable(dev, sg, direction, 0);
|
2019-12-03 01:36:26 +00:00
|
|
|
sg_free_table(sg);
|
|
|
|
kfree(sg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
return get_sg_table(at->dev, at->dmabuf, direction);
|
|
|
|
}
|
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
static void unmap_udmabuf(struct dma_buf_attachment *at,
|
|
|
|
struct sg_table *sg,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
2019-12-03 01:36:26 +00:00
|
|
|
return put_sg_table(at->dev, sg, direction);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 06:36:16 +00:00
|
|
|
static void unpin_all_folios(struct list_head *unpin_list)
|
|
|
|
{
|
|
|
|
struct udmabuf_folio *ubuf_folio;
|
|
|
|
|
|
|
|
while (!list_empty(unpin_list)) {
|
|
|
|
ubuf_folio = list_first_entry(unpin_list,
|
|
|
|
struct udmabuf_folio, list);
|
|
|
|
unpin_folio(ubuf_folio->folio);
|
|
|
|
|
|
|
|
list_del(&ubuf_folio->list);
|
|
|
|
kfree(ubuf_folio);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_to_unpin_list(struct list_head *unpin_list,
|
|
|
|
struct folio *folio)
|
|
|
|
{
|
|
|
|
struct udmabuf_folio *ubuf_folio;
|
|
|
|
|
|
|
|
ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
|
|
|
|
if (!ubuf_folio)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ubuf_folio->folio = folio;
|
|
|
|
list_add_tail(&ubuf_folio->list, unpin_list);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
static void release_udmabuf(struct dma_buf *buf)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2019-12-03 01:36:27 +00:00
|
|
|
struct device *dev = ubuf->device->this_device;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2019-12-03 01:36:27 +00:00
|
|
|
if (ubuf->sg)
|
|
|
|
put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
|
|
|
|
|
2024-06-24 06:36:16 +00:00
|
|
|
unpin_all_folios(&ubuf->unpin_list);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
kfree(ubuf->offsets);
|
2024-06-24 06:36:15 +00:00
|
|
|
kfree(ubuf->folios);
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(ubuf);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:27 +00:00
|
|
|
static int begin_cpu_udmabuf(struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
struct device *dev = ubuf->device->this_device;
|
2022-08-25 06:35:22 +00:00
|
|
|
int ret = 0;
|
2019-12-03 01:36:27 +00:00
|
|
|
|
|
|
|
if (!ubuf->sg) {
|
|
|
|
ubuf->sg = get_sg_table(dev, buf, direction);
|
2022-08-25 06:35:22 +00:00
|
|
|
if (IS_ERR(ubuf->sg)) {
|
|
|
|
ret = PTR_ERR(ubuf->sg);
|
|
|
|
ubuf->sg = NULL;
|
|
|
|
}
|
2019-12-03 01:36:27 +00:00
|
|
|
} else {
|
udmabuf: fix dma-buf cpu access
I'm just going to put Chia's review comment here since it sums
the issue rather nicely:
"(1) Semantically, a dma-buf is in DMA domain. CPU access from the
importer must be surrounded by {begin,end}_cpu_access. This gives the
exporter a chance to move the buffer to the CPU domain temporarily.
(2) When the exporter itself has other means to do CPU access, it is
only reasonable for the exporter to move the buffer to the CPU domain
before access, and to the DMA domain after access. The exporter can
potentially reuse {begin,end}_cpu_access for that purpose.
Because of (1), udmabuf does need to implement the
{begin,end}_cpu_access hooks. But "begin" should mean
dma_sync_sg_for_cpu and "end" should mean dma_sync_sg_for_device.
Because of (2), if userspace wants to continuing accessing through the
memfd mapping, it should call udmabuf's {begin,end}_cpu_access to
avoid cache issues."
Reported-by: Chia-I Wu <olvaffe@gmail.com>
Suggested-by: Chia-I Wu <olvaffe@gmail.com>
Fixes: 284562e1f348 ("udmabuf: implement begin_cpu_access/end_cpu_access hooks")
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20191217230228.453-1-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2019-12-17 23:02:28 +00:00
|
|
|
dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
|
|
|
|
direction);
|
2019-12-03 01:36:27 +00:00
|
|
|
}
|
|
|
|
|
2022-08-25 06:35:22 +00:00
|
|
|
return ret;
|
2019-12-03 01:36:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int end_cpu_udmabuf(struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
struct device *dev = ubuf->device->this_device;
|
|
|
|
|
|
|
|
if (!ubuf->sg)
|
|
|
|
return -EINVAL;
|
|
|
|
|
udmabuf: fix dma-buf cpu access
I'm just going to put Chia's review comment here since it sums
the issue rather nicely:
"(1) Semantically, a dma-buf is in DMA domain. CPU access from the
importer must be surrounded by {begin,end}_cpu_access. This gives the
exporter a chance to move the buffer to the CPU domain temporarily.
(2) When the exporter itself has other means to do CPU access, it is
only reasonable for the exporter to move the buffer to the CPU domain
before access, and to the DMA domain after access. The exporter can
potentially reuse {begin,end}_cpu_access for that purpose.
Because of (1), udmabuf does need to implement the
{begin,end}_cpu_access hooks. But "begin" should mean
dma_sync_sg_for_cpu and "end" should mean dma_sync_sg_for_device.
Because of (2), if userspace wants to continuing accessing through the
memfd mapping, it should call udmabuf's {begin,end}_cpu_access to
avoid cache issues."
Reported-by: Chia-I Wu <olvaffe@gmail.com>
Suggested-by: Chia-I Wu <olvaffe@gmail.com>
Fixes: 284562e1f348 ("udmabuf: implement begin_cpu_access/end_cpu_access hooks")
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20191217230228.453-1-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2019-12-17 23:02:28 +00:00
|
|
|
dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
|
2019-12-03 01:36:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-11 13:42:07 +00:00
|
|
|
static const struct dma_buf_ops udmabuf_ops = {
|
2019-12-03 01:36:24 +00:00
|
|
|
.cache_sgt_mapping = true,
|
|
|
|
.map_dma_buf = map_udmabuf,
|
|
|
|
.unmap_dma_buf = unmap_udmabuf,
|
|
|
|
.release = release_udmabuf,
|
|
|
|
.mmap = mmap_udmabuf,
|
2022-11-17 17:18:09 +00:00
|
|
|
.vmap = vmap_udmabuf,
|
|
|
|
.vunmap = vunmap_udmabuf,
|
2019-12-03 01:36:27 +00:00
|
|
|
.begin_cpu_access = begin_cpu_udmabuf,
|
|
|
|
.end_cpu_access = end_cpu_udmabuf,
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define SEALS_WANTED (F_SEAL_SHRINK)
|
|
|
|
#define SEALS_DENIED (F_SEAL_WRITE)
|
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
static int check_memfd_seals(struct file *memfd)
|
|
|
|
{
|
|
|
|
int seals;
|
|
|
|
|
|
|
|
if (!memfd)
|
|
|
|
return -EBADFD;
|
|
|
|
|
|
|
|
if (!shmem_file(memfd) && !is_file_hugepages(memfd))
|
|
|
|
return -EBADFD;
|
|
|
|
|
|
|
|
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
|
|
|
|
if (seals == -EINVAL)
|
|
|
|
return -EBADFD;
|
|
|
|
|
|
|
|
if ((seals & SEALS_WANTED) != SEALS_WANTED ||
|
|
|
|
(seals & SEALS_DENIED) != 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int export_udmabuf(struct udmabuf *ubuf,
|
|
|
|
struct miscdevice *device,
|
|
|
|
u32 flags)
|
|
|
|
{
|
|
|
|
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
|
|
|
struct dma_buf *buf;
|
|
|
|
|
|
|
|
ubuf->device = device;
|
|
|
|
exp_info.ops = &udmabuf_ops;
|
|
|
|
exp_info.size = ubuf->pagecount << PAGE_SHIFT;
|
|
|
|
exp_info.priv = ubuf;
|
|
|
|
exp_info.flags = O_RDWR;
|
|
|
|
|
|
|
|
buf = dma_buf_export(&exp_info);
|
|
|
|
if (IS_ERR(buf))
|
|
|
|
return PTR_ERR(buf);
|
|
|
|
|
|
|
|
return dma_buf_fd(buf, flags);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
static long udmabuf_create(struct miscdevice *device,
|
|
|
|
struct udmabuf_create_list *head,
|
|
|
|
struct udmabuf_create_item *list)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
2024-06-24 06:36:16 +00:00
|
|
|
pgoff_t pgoff, pgcnt, pglimit, pgbuf = 0;
|
|
|
|
long nr_folios, ret = -EINVAL;
|
2018-08-27 09:34:44 +00:00
|
|
|
struct file *memfd = NULL;
|
2024-06-24 06:36:16 +00:00
|
|
|
struct folio **folios;
|
2018-08-27 09:34:44 +00:00
|
|
|
struct udmabuf *ubuf;
|
2024-06-24 06:36:16 +00:00
|
|
|
u32 i, j, k, flags;
|
|
|
|
loff_t end;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2018-09-11 13:42:15 +00:00
|
|
|
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
|
2018-08-27 09:34:44 +00:00
|
|
|
if (!ubuf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2024-06-24 06:36:16 +00:00
|
|
|
INIT_LIST_HEAD(&ubuf->unpin_list);
|
2018-09-11 13:42:10 +00:00
|
|
|
pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
|
2018-08-27 09:34:44 +00:00
|
|
|
for (i = 0; i < head->count; i++) {
|
|
|
|
if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
ubuf->pagecount += list[i].size >> PAGE_SHIFT;
|
2018-09-11 13:42:10 +00:00
|
|
|
if (ubuf->pagecount > pglimit)
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
2021-12-30 14:26:49 +00:00
|
|
|
|
|
|
|
if (!ubuf->pagecount)
|
|
|
|
goto err;
|
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
ubuf->folios = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->folios),
|
2018-08-27 09:34:44 +00:00
|
|
|
GFP_KERNEL);
|
2024-06-24 06:36:15 +00:00
|
|
|
if (!ubuf->folios) {
|
2018-08-27 09:34:44 +00:00
|
|
|
ret = -ENOMEM;
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
ubuf->offsets = kcalloc(ubuf->pagecount, sizeof(*ubuf->offsets),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!ubuf->offsets) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
2018-08-27 09:34:44 +00:00
|
|
|
|
|
|
|
pgbuf = 0;
|
|
|
|
for (i = 0; i < head->count; i++) {
|
|
|
|
memfd = fget(list[i].memfd);
|
2024-06-24 06:36:15 +00:00
|
|
|
ret = check_memfd_seals(memfd);
|
|
|
|
if (ret < 0)
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
|
|
|
pgcnt = list[i].size >> PAGE_SHIFT;
|
2024-06-24 06:36:16 +00:00
|
|
|
folios = kmalloc_array(pgcnt, sizeof(*folios), GFP_KERNEL);
|
|
|
|
if (!folios) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
end = list[i].offset + (pgcnt << PAGE_SHIFT) - 1;
|
|
|
|
ret = memfd_pin_folios(memfd, list[i].offset, end,
|
|
|
|
folios, pgcnt, &pgoff);
|
|
|
|
if (ret <= 0) {
|
|
|
|
kfree(folios);
|
|
|
|
if (!ret)
|
|
|
|
ret = -EINVAL;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
goto err;
|
2024-06-24 06:36:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
nr_folios = ret;
|
|
|
|
pgoff >>= PAGE_SHIFT;
|
|
|
|
for (j = 0, k = 0; j < pgcnt; j++) {
|
|
|
|
ubuf->folios[pgbuf] = folios[k];
|
|
|
|
ubuf->offsets[pgbuf] = pgoff << PAGE_SHIFT;
|
|
|
|
|
|
|
|
if (j == 0 || ubuf->folios[pgbuf-1] != folios[k]) {
|
|
|
|
ret = add_to_unpin_list(&ubuf->unpin_list,
|
|
|
|
folios[k]);
|
|
|
|
if (ret < 0) {
|
|
|
|
kfree(folios);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pgbuf++;
|
|
|
|
if (++pgoff == folio_nr_pages(folios[k])) {
|
|
|
|
pgoff = 0;
|
|
|
|
if (++k == nr_folios)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
2024-06-24 06:36:16 +00:00
|
|
|
kfree(folios);
|
2018-08-27 09:34:44 +00:00
|
|
|
fput(memfd);
|
2018-09-11 13:42:11 +00:00
|
|
|
memfd = NULL;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
|
|
|
|
ret = export_udmabuf(ubuf, device, flags);
|
|
|
|
if (ret < 0)
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
return ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2018-09-11 13:42:11 +00:00
|
|
|
err:
|
2018-09-04 19:07:49 +00:00
|
|
|
if (memfd)
|
|
|
|
fput(memfd);
|
2024-06-24 06:36:16 +00:00
|
|
|
unpin_all_folios(&ubuf->unpin_list);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
kfree(ubuf->offsets);
|
2024-06-24 06:36:15 +00:00
|
|
|
kfree(ubuf->folios);
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(ubuf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct udmabuf_create create;
|
|
|
|
struct udmabuf_create_list head;
|
|
|
|
struct udmabuf_create_item list;
|
|
|
|
|
|
|
|
if (copy_from_user(&create, (void __user *)arg,
|
2018-09-11 13:42:15 +00:00
|
|
|
sizeof(create)))
|
2018-08-27 09:34:44 +00:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
head.flags = create.flags;
|
|
|
|
head.count = 1;
|
|
|
|
list.memfd = create.memfd;
|
|
|
|
list.offset = create.offset;
|
|
|
|
list.size = create.size;
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
return udmabuf_create(filp->private_data, &head, &list);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct udmabuf_create_list head;
|
|
|
|
struct udmabuf_create_item *list;
|
|
|
|
int ret = -EINVAL;
|
|
|
|
u32 lsize;
|
|
|
|
|
|
|
|
if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
|
|
|
|
return -EFAULT;
|
2018-09-11 13:42:10 +00:00
|
|
|
if (head.count > list_limit)
|
2018-08-27 09:34:44 +00:00
|
|
|
return -EINVAL;
|
|
|
|
lsize = sizeof(struct udmabuf_create_item) * head.count;
|
|
|
|
list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
|
|
|
|
if (IS_ERR(list))
|
|
|
|
return PTR_ERR(list);
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
ret = udmabuf_create(filp->private_data, &head, list);
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(list);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
switch (ioctl) {
|
|
|
|
case UDMABUF_CREATE:
|
|
|
|
ret = udmabuf_ioctl_create(filp, arg);
|
|
|
|
break;
|
|
|
|
case UDMABUF_CREATE_LIST:
|
|
|
|
ret = udmabuf_ioctl_create_list(filp, arg);
|
|
|
|
break;
|
|
|
|
default:
|
2018-09-11 13:42:13 +00:00
|
|
|
ret = -ENOTTY;
|
2018-08-27 09:34:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations udmabuf_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.unlocked_ioctl = udmabuf_ioctl,
|
2020-09-03 18:16:52 +00:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_ioctl = udmabuf_ioctl,
|
|
|
|
#endif
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct miscdevice udmabuf_misc = {
|
|
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
|
|
.name = "udmabuf",
|
|
|
|
.fops = &udmabuf_fops,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init udmabuf_dev_init(void)
|
|
|
|
{
|
2022-05-20 20:52:35 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = misc_register(&udmabuf_misc);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Could not initialize udmabuf device\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
|
|
|
|
DMA_BIT_MASK(64));
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Could not setup DMA mask for udmabuf device\n");
|
|
|
|
misc_deregister(&udmabuf_misc);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit udmabuf_dev_exit(void)
|
|
|
|
{
|
|
|
|
misc_deregister(&udmabuf_misc);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(udmabuf_dev_init)
|
|
|
|
module_exit(udmabuf_dev_exit)
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
|