2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
|
2007-10-17 06:29:25 +00:00
|
|
|
/* we can't #include <linux/syscalls.h> here,
|
|
|
|
but tell gcc to not warn with -Wmissing-prototypes */
|
|
|
|
asmlinkage long sys_ni_syscall(void);
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
* Non-implemented system calls get redirected here.
|
|
|
|
*/
|
|
|
|
asmlinkage long sys_ni_syscall(void)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
cond_syscall(sys_nfsservctl);
|
|
|
|
cond_syscall(sys_quotactl);
|
2007-07-16 06:41:12 +00:00
|
|
|
cond_syscall(sys32_quotactl);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_acct);
|
|
|
|
cond_syscall(sys_lookup_dcookie);
|
|
|
|
cond_syscall(sys_swapon);
|
|
|
|
cond_syscall(sys_swapoff);
|
2005-06-25 21:57:52 +00:00
|
|
|
cond_syscall(sys_kexec_load);
|
|
|
|
cond_syscall(compat_sys_kexec_load);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_init_module);
|
|
|
|
cond_syscall(sys_delete_module);
|
|
|
|
cond_syscall(sys_socketpair);
|
|
|
|
cond_syscall(sys_bind);
|
|
|
|
cond_syscall(sys_listen);
|
|
|
|
cond_syscall(sys_accept);
|
|
|
|
cond_syscall(sys_connect);
|
|
|
|
cond_syscall(sys_getsockname);
|
|
|
|
cond_syscall(sys_getpeername);
|
|
|
|
cond_syscall(sys_sendto);
|
|
|
|
cond_syscall(sys_send);
|
|
|
|
cond_syscall(sys_recvfrom);
|
|
|
|
cond_syscall(sys_recv);
|
|
|
|
cond_syscall(sys_socket);
|
|
|
|
cond_syscall(sys_setsockopt);
|
2007-10-29 07:54:39 +00:00
|
|
|
cond_syscall(compat_sys_setsockopt);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_getsockopt);
|
2007-10-29 07:54:39 +00:00
|
|
|
cond_syscall(compat_sys_getsockopt);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_shutdown);
|
|
|
|
cond_syscall(sys_sendmsg);
|
2007-10-29 07:54:39 +00:00
|
|
|
cond_syscall(compat_sys_sendmsg);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_recvmsg);
|
2007-10-29 07:54:39 +00:00
|
|
|
cond_syscall(compat_sys_recvmsg);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_socketcall);
|
|
|
|
cond_syscall(sys_futex);
|
|
|
|
cond_syscall(compat_sys_futex);
|
2006-03-27 09:16:22 +00:00
|
|
|
cond_syscall(sys_set_robust_list);
|
|
|
|
cond_syscall(compat_sys_set_robust_list);
|
|
|
|
cond_syscall(sys_get_robust_list);
|
|
|
|
cond_syscall(compat_sys_get_robust_list);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_epoll_create);
|
|
|
|
cond_syscall(sys_epoll_ctl);
|
|
|
|
cond_syscall(sys_epoll_wait);
|
2006-10-16 16:01:46 +00:00
|
|
|
cond_syscall(sys_epoll_pwait);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_semget);
|
|
|
|
cond_syscall(sys_semop);
|
|
|
|
cond_syscall(sys_semtimedop);
|
|
|
|
cond_syscall(sys_semctl);
|
|
|
|
cond_syscall(sys_msgget);
|
|
|
|
cond_syscall(sys_msgsnd);
|
|
|
|
cond_syscall(sys_msgrcv);
|
|
|
|
cond_syscall(sys_msgctl);
|
|
|
|
cond_syscall(sys_shmget);
|
2005-05-01 15:59:12 +00:00
|
|
|
cond_syscall(sys_shmat);
|
2005-04-16 22:20:36 +00:00
|
|
|
cond_syscall(sys_shmdt);
|
|
|
|
cond_syscall(sys_shmctl);
|
|
|
|
cond_syscall(sys_mq_open);
|
|
|
|
cond_syscall(sys_mq_unlink);
|
|
|
|
cond_syscall(sys_mq_timedsend);
|
|
|
|
cond_syscall(sys_mq_timedreceive);
|
|
|
|
cond_syscall(sys_mq_notify);
|
|
|
|
cond_syscall(sys_mq_getsetattr);
|
|
|
|
cond_syscall(compat_sys_mq_open);
|
|
|
|
cond_syscall(compat_sys_mq_timedsend);
|
|
|
|
cond_syscall(compat_sys_mq_timedreceive);
|
|
|
|
cond_syscall(compat_sys_mq_notify);
|
|
|
|
cond_syscall(compat_sys_mq_getsetattr);
|
|
|
|
cond_syscall(sys_mbind);
|
|
|
|
cond_syscall(sys_get_mempolicy);
|
|
|
|
cond_syscall(sys_set_mempolicy);
|
|
|
|
cond_syscall(compat_sys_mbind);
|
|
|
|
cond_syscall(compat_sys_get_mempolicy);
|
|
|
|
cond_syscall(compat_sys_set_mempolicy);
|
|
|
|
cond_syscall(sys_add_key);
|
|
|
|
cond_syscall(sys_request_key);
|
|
|
|
cond_syscall(sys_keyctl);
|
|
|
|
cond_syscall(compat_sys_keyctl);
|
|
|
|
cond_syscall(compat_sys_socketcall);
|
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-12 21:06:03 +00:00
|
|
|
cond_syscall(sys_inotify_init);
|
|
|
|
cond_syscall(sys_inotify_add_watch);
|
|
|
|
cond_syscall(sys_inotify_rm_watch);
|
2006-01-08 09:00:51 +00:00
|
|
|
cond_syscall(sys_migrate_pages);
|
2006-06-23 09:03:55 +00:00
|
|
|
cond_syscall(sys_move_pages);
|
2006-01-08 09:05:24 +00:00
|
|
|
cond_syscall(sys_chown16);
|
|
|
|
cond_syscall(sys_fchown16);
|
|
|
|
cond_syscall(sys_getegid16);
|
|
|
|
cond_syscall(sys_geteuid16);
|
|
|
|
cond_syscall(sys_getgid16);
|
|
|
|
cond_syscall(sys_getgroups16);
|
|
|
|
cond_syscall(sys_getresgid16);
|
|
|
|
cond_syscall(sys_getresuid16);
|
|
|
|
cond_syscall(sys_getuid16);
|
|
|
|
cond_syscall(sys_lchown16);
|
|
|
|
cond_syscall(sys_setfsgid16);
|
|
|
|
cond_syscall(sys_setfsuid16);
|
|
|
|
cond_syscall(sys_setgid16);
|
|
|
|
cond_syscall(sys_setgroups16);
|
|
|
|
cond_syscall(sys_setregid16);
|
|
|
|
cond_syscall(sys_setresgid16);
|
|
|
|
cond_syscall(sys_setresuid16);
|
|
|
|
cond_syscall(sys_setreuid16);
|
|
|
|
cond_syscall(sys_setuid16);
|
2006-01-08 09:05:26 +00:00
|
|
|
cond_syscall(sys_vm86old);
|
|
|
|
cond_syscall(sys_vm86);
|
2006-02-21 02:28:08 +00:00
|
|
|
cond_syscall(compat_sys_ipc);
|
|
|
|
cond_syscall(compat_sys_sysctl);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/* arch-specific weak syscall entries */
|
|
|
|
cond_syscall(sys_pciconfig_read);
|
|
|
|
cond_syscall(sys_pciconfig_write);
|
|
|
|
cond_syscall(sys_pciconfig_iobase);
|
|
|
|
cond_syscall(sys32_ipc);
|
|
|
|
cond_syscall(sys32_sysctl);
|
|
|
|
cond_syscall(ppc_rtas);
|
2005-11-15 20:53:48 +00:00
|
|
|
cond_syscall(sys_spu_run);
|
|
|
|
cond_syscall(sys_spu_create);
|
[POWERPC] Provide a way to protect 4k subpages when using 64k pages
Using 64k pages on 64-bit PowerPC systems makes life difficult for
emulators that are trying to emulate an ISA, such as x86, which use a
smaller page size, since the emulator can no longer use the MMU and
the normal system calls for controlling page protections. Of course,
the emulator can emulate the MMU by checking and possibly remapping
the address for each memory access in software, but that is pretty
slow.
This provides a facility for such programs to control the access
permissions on individual 4k sub-pages of 64k pages. The idea is
that the emulator supplies an array of protection masks to apply to a
specified range of virtual addresses. These masks are applied at the
level where hardware PTEs are inserted into the hardware page table
based on the Linux PTEs, so the Linux PTEs are not affected. Note
that this new mechanism does not allow any access that would otherwise
be prohibited; it can only prohibit accesses that would otherwise be
allowed. This new facility is only available on 64-bit PowerPC and
only when the kernel is configured for 64k pages.
The masks are supplied using a new subpage_prot system call, which
takes a starting virtual address and length, and a pointer to an array
of protection masks in memory. The array has a 32-bit word per 64k
page to be protected; each 32-bit word consists of 16 2-bit fields,
for which 0 allows any access (that is otherwise allowed), 1 prevents
write accesses, and 2 or 3 prevent any access.
Implicit in this is that the regions of the address space that are
protected are switched to use 4k hardware pages rather than 64k
hardware pages (on machines with hardware 64k page support). In fact
the whole process is switched to use 4k hardware pages when the
subpage_prot system call is used, but this could be improved in future
to switch only the affected segments.
The subpage protection bits are stored in a 3 level tree akin to the
page table tree. The top level of this tree is stored in a structure
that is appended to the top level of the page table tree, i.e., the
pgd array. Since it will often only be 32-bit addresses (below 4GB)
that are protected, the pointers to the first four bottom level pages
are also stored in this structure (each bottom level page contains the
protection bits for 1GB of address space), so the protection bits for
addresses below 4GB can be accessed with one fewer loads than those
for higher addresses.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-01-23 21:35:13 +00:00
|
|
|
cond_syscall(sys_subpage_prot);
|
2006-04-11 05:53:06 +00:00
|
|
|
|
|
|
|
/* mmu depending weak syscall entries */
|
|
|
|
cond_syscall(sys_mprotect);
|
|
|
|
cond_syscall(sys_msync);
|
|
|
|
cond_syscall(sys_mlock);
|
|
|
|
cond_syscall(sys_munlock);
|
|
|
|
cond_syscall(sys_mlockall);
|
|
|
|
cond_syscall(sys_munlockall);
|
|
|
|
cond_syscall(sys_mincore);
|
|
|
|
cond_syscall(sys_madvise);
|
|
|
|
cond_syscall(sys_mremap);
|
|
|
|
cond_syscall(sys_remap_file_pages);
|
2006-06-23 09:03:56 +00:00
|
|
|
cond_syscall(compat_sys_move_pages);
|
2006-11-03 06:07:24 +00:00
|
|
|
cond_syscall(compat_sys_migrate_pages);
|
[PATCH] BLOCK: Make it possible to disable the block layer [try #6]
Make it possible to disable the block layer. Not all embedded devices require
it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require
the block layer to be present.
This patch does the following:
(*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev
support.
(*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls
an item that uses the block layer. This includes:
(*) Block I/O tracing.
(*) Disk partition code.
(*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS.
(*) The SCSI layer. As far as I can tell, even SCSI chardevs use the
block layer to do scheduling. Some drivers that use SCSI facilities -
such as USB storage - end up disabled indirectly from this.
(*) Various block-based device drivers, such as IDE and the old CDROM
drivers.
(*) MTD blockdev handling and FTL.
(*) JFFS - which uses set_bdev_super(), something it could avoid doing by
taking a leaf out of JFFS2's book.
(*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and
linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is,
however, still used in places, and so is still available.
(*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and
parts of linux/fs.h.
(*) Makes a number of files in fs/ contingent on CONFIG_BLOCK.
(*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK.
(*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK
is not enabled.
(*) fs/no-block.c is created to hold out-of-line stubs and things that are
required when CONFIG_BLOCK is not set:
(*) Default blockdev file operations (to give error ENODEV on opening).
(*) Makes some /proc changes:
(*) /proc/devices does not list any blockdevs.
(*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK.
(*) Makes some compat ioctl handling contingent on CONFIG_BLOCK.
(*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if
given command other than Q_SYNC or if a special device is specified.
(*) In init/do_mounts.c, no reference is made to the blockdev routines if
CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2.
(*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return
error ENOSYS by way of cond_syscall if so).
(*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if
CONFIG_BLOCK is not set, since they can't then happen.
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-09-30 18:45:40 +00:00
|
|
|
|
|
|
|
/* block-layer dependent */
|
|
|
|
cond_syscall(sys_bdflush);
|
|
|
|
cond_syscall(sys_ioprio_set);
|
|
|
|
cond_syscall(sys_ioprio_get);
|
signal/timer/event: signalfd core
This patch series implements the new signalfd() system call.
I took part of the original Linus code (and you know how badly it can be
broken :), and I added even more breakage ;) Signals are fetched from the same
signal queue used by the process, so signalfd will compete with standard
kernel delivery in dequeue_signal(). If you want to reliably fetch signals on
the signalfd file, you need to block them with sigprocmask(SIG_BLOCK). This
seems to be working fine on my Dual Opteron machine. I made a quick test
program for it:
http://www.xmailserver.org/signafd-test.c
The signalfd() system call implements signal delivery into a file descriptor
receiver. The signalfd file descriptor if created with the following API:
int signalfd(int ufd, const sigset_t *mask, size_t masksize);
The "ufd" parameter allows to change an existing signalfd sigmask, w/out going
to close/create cycle (Linus idea). Use "ufd" == -1 if you want a brand new
signalfd file.
The "mask" allows to specify the signal mask of signals that we are interested
in. The "masksize" parameter is the size of "mask".
The signalfd fd supports the poll(2) and read(2) system calls. The poll(2)
will return POLLIN when signals are available to be dequeued. As a direct
consequence of supporting the Linux poll subsystem, the signalfd fd can use
used together with epoll(2) too.
The read(2) system call will return a "struct signalfd_siginfo" structure in
the userspace supplied buffer. The return value is the number of bytes copied
in the supplied buffer, or -1 in case of error. The read(2) call can also
return 0, in case the sighand structure to which the signalfd was attached,
has been orphaned. The O_NONBLOCK flag is also supported, and read(2) will
return -EAGAIN in case no signal is available.
If the size of the buffer passed to read(2) is lower than sizeof(struct
signalfd_siginfo), -EINVAL is returned. A read from the signalfd can also
return -ERESTARTSYS in case a signal hits the process. The format of the
struct signalfd_siginfo is, and the valid fields depends of the (->code &
__SI_MASK) value, in the same way a struct siginfo would:
struct signalfd_siginfo {
__u32 signo; /* si_signo */
__s32 err; /* si_errno */
__s32 code; /* si_code */
__u32 pid; /* si_pid */
__u32 uid; /* si_uid */
__s32 fd; /* si_fd */
__u32 tid; /* si_fd */
__u32 band; /* si_band */
__u32 overrun; /* si_overrun */
__u32 trapno; /* si_trapno */
__s32 status; /* si_status */
__s32 svint; /* si_int */
__u64 svptr; /* si_ptr */
__u64 utime; /* si_utime */
__u64 stime; /* si_stime */
__u64 addr; /* si_addr */
};
[akpm@linux-foundation.org: fix signalfd_copyinfo() on i386]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-11 05:23:13 +00:00
|
|
|
|
|
|
|
/* New file descriptors */
|
|
|
|
cond_syscall(sys_signalfd);
|
signal/timer/event: timerfd core
This patch introduces a new system call for timers events delivered though
file descriptors. This allows timer event to be used with standard POSIX
poll(2), select(2) and read(2). As a consequence of supporting the Linux
f_op->poll subsystem, they can be used with epoll(2) too.
The system call is defined as:
int timerfd(int ufd, int clockid, int flags, const struct itimerspec *utmr);
The "ufd" parameter allows for re-use (re-programming) of an existing timerfd
w/out going through the close/open cycle (same as signalfd). If "ufd" is -1,
s new file descriptor will be created, otherwise the existing "ufd" will be
re-programmed.
The "clockid" parameter is either CLOCK_MONOTONIC or CLOCK_REALTIME. The time
specified in the "utmr->it_value" parameter is the expiry time for the timer.
If the TFD_TIMER_ABSTIME flag is set in "flags", this is an absolute time,
otherwise it's a relative time.
If the time specified in the "utmr->it_interval" is not zero (.tv_sec == 0,
tv_nsec == 0), this is the period at which the following ticks should be
generated.
The "utmr->it_interval" should be set to zero if only one tick is requested.
Setting the "utmr->it_value" to zero will disable the timer, or will create a
timerfd without the timer enabled.
The function returns the new (or same, in case "ufd" is a valid timerfd
descriptor) file, or -1 in case of error.
As stated before, the timerfd file descriptor supports poll(2), select(2) and
epoll(2). When a timer event happened on the timerfd, a POLLIN mask will be
returned.
The read(2) call can be used, and it will return a u32 variable holding the
number of "ticks" that happened on the interface since the last call to
read(2). The read(2) call supportes the O_NONBLOCK flag too, and EAGAIN will
be returned if no ticks happened.
A quick test program, shows timerfd working correctly on my amd64 box:
http://www.xmailserver.org/timerfd-test.c
[akpm@linux-foundation.org: add sys_timerfd to sys_ni.c]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-11 05:23:16 +00:00
|
|
|
cond_syscall(sys_timerfd);
|
2007-05-12 17:37:02 +00:00
|
|
|
cond_syscall(compat_sys_signalfd);
|
|
|
|
cond_syscall(compat_sys_timerfd);
|
signal/timer/event: eventfd core
This is a very simple and light file descriptor, that can be used as event
wait/dispatch by userspace (both wait and dispatch) and by the kernel
(dispatch only). It can be used instead of pipe(2) in all cases where those
would simply be used to signal events. Their kernel overhead is much lower
than pipes, and they do not consume two fds. When used in the kernel, it can
offer an fd-bridge to enable, for example, functionalities like KAIO or
syslets/threadlets to signal to an fd the completion of certain operations.
But more in general, an eventfd can be used by the kernel to signal readiness,
in a POSIX poll/select way, of interfaces that would otherwise be incompatible
with it. The API is:
int eventfd(unsigned int count);
The eventfd API accepts an initial "count" parameter, and returns an eventfd
fd. It supports poll(2) (POLLIN, POLLOUT, POLLERR), read(2) and write(2).
The POLLIN flag is raised when the internal counter is greater than zero.
The POLLOUT flag is raised when at least a value of "1" can be written to the
internal counter.
The POLLERR flag is raised when an overflow in the counter value is detected.
The write(2) operation can never overflow the counter, since it blocks (unless
O_NONBLOCK is set, in which case -EAGAIN is returned).
But the eventfd_signal() function can do it, since it's supposed to not sleep
during its operation.
The read(2) function reads the __u64 counter value, and reset the internal
value to zero. If the value read is equal to (__u64) -1, an overflow happened
on the internal counter (due to 2^64 eventfd_signal() posts that has never
been retired - unlickely, but possible).
The write(2) call writes an __u64 count value, and adds it to the current
counter. The eventfd fd supports O_NONBLOCK also.
On the kernel side, we have:
struct file *eventfd_fget(int fd);
int eventfd_signal(struct file *file, unsigned int n);
The eventfd_fget() should be called to get a struct file* from an eventfd fd
(this is an fget() + check of f_op being an eventfd fops pointer).
The kernel can then call eventfd_signal() every time it wants to post an event
to userspace. The eventfd_signal() function can be called from any context.
An eventfd() simple test and bench is available here:
http://www.xmailserver.org/eventfd-bench.c
This is the eventfd-based version of pipetest-4 (pipe(2) based):
http://www.xmailserver.org/pipetest-4.c
Not that performance matters much in the eventfd case, but eventfd-bench
shows almost as double as performance than pipetest-4.
[akpm@linux-foundation.org: fix i386 build]
[akpm@linux-foundation.org: add sys_eventfd to sys_ni.c]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-11 05:23:19 +00:00
|
|
|
cond_syscall(sys_eventfd);
|