--- amd64/linux32/linux32_dummy.c.orig +++ amd64/linux32/linux32_dummy.c @@ -65,9 +65,6 @@ DUMMY(fadvise64); DUMMY(ptrace); DUMMY(lookup_dcookie); -DUMMY(epoll_create); -DUMMY(epoll_ctl); -DUMMY(epoll_wait); DUMMY(remap_file_pages); DUMMY(timer_create); DUMMY(timer_settime); --- amd64/linux32/linux32_proto.h.orig +++ amd64/linux32/linux32_proto.h @@ -752,13 +752,19 @@ register_t dummy; }; struct linux_epoll_create_args { - register_t dummy; + char size_l_[PADL_(l_int)]; l_int size; char size_r_[PADR_(l_int)]; }; struct linux_epoll_ctl_args { - register_t dummy; + char epfd_l_[PADL_(l_int)]; l_int epfd; char epfd_r_[PADR_(l_int)]; + char op_l_[PADL_(l_int)]; l_int op; char op_r_[PADR_(l_int)]; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char event_l_[PADL_(struct linux_epoll_event *)]; struct linux_epoll_event * event; char event_r_[PADR_(struct linux_epoll_event *)]; }; struct linux_epoll_wait_args { - register_t dummy; + char epfd_l_[PADL_(l_int)]; l_int epfd; char epfd_r_[PADR_(l_int)]; + char events_l_[PADL_(struct linux_epoll_event *)]; struct linux_epoll_event * events; char events_r_[PADR_(struct linux_epoll_event *)]; + char maxevents_l_[PADL_(l_int)]; l_int maxevents; char maxevents_r_[PADR_(l_int)]; + char timeout_l_[PADL_(l_int)]; l_int timeout; char timeout_r_[PADR_(l_int)]; }; struct linux_remap_file_pages_args { register_t dummy; --- amd64/linux32/linux32_sysent.c.orig +++ amd64/linux32/linux32_sysent.c @@ -274,9 +274,9 @@ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 251 = */ { AS(linux_exit_group_args), (sy_call_t *)linux_exit_group, AUE_EXIT, NULL, 0, 0 }, /* 252 = linux_exit_group */ { 0, (sy_call_t *)linux_lookup_dcookie, AUE_NULL, NULL, 0, 0 }, /* 253 = linux_lookup_dcookie */ - { 0, (sy_call_t *)linux_epoll_create, AUE_NULL, NULL, 0, 0 }, /* 254 = linux_epoll_create */ - { 0, (sy_call_t *)linux_epoll_ctl, AUE_NULL, NULL, 0, 0 }, /* 255 = linux_epoll_ctl */ - { 0, (sy_call_t *)linux_epoll_wait, AUE_NULL, NULL, 0, 0 }, /* 256 = linux_epoll_wait */ + { AS(linux_epoll_create_args), (sy_call_t *)linux_epoll_create, AUE_NULL, NULL, 0, 0 }, /* 254 = linux_epoll_create */ + { AS(linux_epoll_ctl_args), (sy_call_t *)linux_epoll_ctl, AUE_NULL, NULL, 0, 0 }, /* 255 = linux_epoll_ctl */ + { AS(linux_epoll_wait_args), (sy_call_t *)linux_epoll_wait, AUE_NULL, NULL, 0, 0 }, /* 256 = linux_epoll_wait */ { 0, (sy_call_t *)linux_remap_file_pages, AUE_NULL, NULL, 0, 0 }, /* 257 = linux_remap_file_pages */ { AS(linux_set_tid_address_args), (sy_call_t *)linux_set_tid_address, AUE_NULL, NULL, 0, 0 }, /* 258 = linux_set_tid_address */ { 0, (sy_call_t *)linux_timer_create, AUE_NULL, NULL, 0, 0 }, /* 259 = linux_timer_create */ --- amd64/linux32/syscalls.master.orig +++ amd64/linux32/syscalls.master @@ -421,9 +421,11 @@ 251 AUE_NULL UNIMPL 252 AUE_EXIT STD { int linux_exit_group(int error_code); } 253 AUE_NULL STD { int linux_lookup_dcookie(void); } -254 AUE_NULL STD { int linux_epoll_create(void); } -255 AUE_NULL STD { int linux_epoll_ctl(void); } -256 AUE_NULL STD { int linux_epoll_wait(void); } +254 AUE_NULL STD { int linux_epoll_create(l_int size); } +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \ + struct linux_epoll_event *event); } +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \ + l_int maxevents, l_int timeout); } 257 AUE_NULL STD { int linux_remap_file_pages(void); } 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); } 259 AUE_NULL STD { int linux_timer_create(void); } --- /dev/null 2008-03-05 19:22:00.000000000 +0100 +++ compat/linux/linux_epoll.c 2008-03-05 19:28:14.000000000 +0100 @@ -0,0 +1,258 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef COMPAT_LINUX32 +#include +#include +#else +#include +#include +#endif + + +/* Create a new epoll file descriptor. */ +int +linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) +{ + struct kqueue_args k_args; + + if (args->size <= 0) + return (EINVAL); + /* + * args->size is unused. Linux just tests it + * and then forgets it as well. */ + + return (kqueue(td, &k_args)); +} + +/* Structure converting function from epoll to kevent. */ +static void +linux_epoll_to_kevent(int fd, struct linux_epoll_event *event, struct kevent *kevent) +{ + int filter = 0; + int flags = kevent->flags; + + if (event->events & LINUX_EPOLLIN) + filter |= EVFILT_READ; + if (event->events & LINUX_EPOLLOUT) + filter |= EVFILT_WRITE; + if (event->events & LINUX_EPOLLPRI) + filter |= EVFILT_READ; + if (event->events & LINUX_EPOLLET) + flags |= EV_CLEAR; + if (event->events & LINUX_EPOLLONESHOT) + flags |= EV_ONESHOT; + + EV_SET(kevent, fd, filter, flags, 0, 0, 0); +} + +/* + * Structure converting function from kevent to epoll. In a case + * this is called on error in registration we store the error in + * event->data and pick it up later in linux_epoll_ctl(). + */ +static void +linux_kevent_to_epoll(struct kevent *kevent, struct linux_epoll_event *event) +{ + if (kevent->flags & EV_ERROR) { + event->data = kevent->data; + return; + } + switch (kevent->filter) { + case EVFILT_READ: + if (kevent->data > 0) + event->events = LINUX_EPOLLIN; + event->data = kevent->ident; + break; + case EVFILT_WRITE: + if (kevent->data > 0) + event->events = LINUX_EPOLLOUT; + event->data = kevent->ident; + break; + } +} + +/* + * Copyout callback used by kevent. This converts kevent + * events to epoll events and copies them back to the + * userspace. This is also called on error on registering + * of the filter. + */ +static int +linux_kev_copyout(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + struct linux_epoll_event *eep; + int error, i; + + uap = (struct kevent_args*) arg; + + eep = malloc(sizeof(*eep) * count, M_TEMP, M_WAITOK | M_ZERO); + + for (i = 0; i < count; i++) { + linux_kevent_to_epoll(&kevp[i], &eep[i]); + } + + error = copyout(eep, uap->eventlist, count * sizeof(*eep)); + if (error) + uap->eventlist = (struct kevent *)((char *)uap->eventlist + count * sizeof(*eep)); + + free(eep, M_TEMP); + return (0); +} + +/* + * Copyin callback used by kevent. This copies already + * converted filters from kernel memory to the kevent + * internal kernel memory. Hence the memcpy instead of + * copyin. + */ +static int +linux_kev_copyin(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + + uap = (struct kevent_args*) arg; + + memcpy(kevp, uap->changelist, count * sizeof(*kevp)); + + uap->changelist += count; + + return (0); +} + +/* + * Load epoll filter, convert it to kevent filter + * and load it into kevent subsystem. + */ +int +linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) +{ + struct kevent_args k_args; + struct kevent kev; + struct kevent_copyops k_ops = { &k_args, + linux_kev_copyout, + linux_kev_copyin}; + struct linux_epoll_event le; + int error; + + error = copyin(args->event, &le, sizeof(le)); + if (error) + return (error); +#ifdef DEBUG + if (ldebug(epoll_ctl)) + printf(ARGS(epoll_ctl,"%i, %i, %i, %u"), args->epfd, args->op, + args->fd, le.events); +#endif + k_args.fd = args->epfd; + k_args.changelist = &kev; + /* The epoll can register only 1 filter at once. */ + k_args.nchanges = 1; + k_args.eventlist = NULL; + k_args.nevents = 0; + k_args.timeout = NULL; + + switch (args->op) { + case LINUX_EPOLL_CTL_ADD: + kev.flags = EV_ADD | EV_ENABLE; + break; + case LINUX_EPOLL_CTL_MOD: + /* TODO: DELETE && ADD maybe? */ + printf("linux_epoll_ctl: CTL_MOD not yet implemented.\n"); + return (ENOSYS); + break; + case LINUX_EPOLL_CTL_DEL: + kev.flags = EV_DELETE | EV_DISABLE; + break; + } + linux_epoll_to_kevent(args->fd, &le, &kev); + + error = kern_kevent(td, args->epfd, 1, 0, &k_ops, NULL); + /* Check if there was an error during registration. */ + if (error == 0 && td->td_retval[0] != 0) { + /* The copyout callback stored the error there. */ + error = le.data; + } + + return (error); +} + +/* + * Wait for a filter to be triggered on the epoll file descriptor. */ +int +linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) +{ + struct timespec ts; + struct kevent_args k_args; + struct kevent_copyops k_ops = { &k_args, + linux_kev_copyout, + linux_kev_copyin}; + int error; + + if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS) + return (EINVAL); + + /* Convert from miliseconds to timespec. */ + ts.tv_sec = args->timeout / 1000000; + ts.tv_nsec = (args->timeout % 1000000) * 1000; + + k_args.fd = args->epfd; + k_args.changelist = NULL; + k_args.nchanges = 0; + /* + * We don't mind the bogus type-cast because + * our copyout function knows about this and + * handles it correctly. + */ + k_args.eventlist = (struct kevent *)args->events; + k_args.nevents = args->maxevents; + k_args.timeout = &ts; + + error = kern_kevent(td, args->epfd, 0, args->maxevents, &k_ops, &ts); + + /* + * kern_keven might return ENOMEM which is not expected from epoll_wait. + * Maybe we should translate that but I don't think it matters at all. + */ + return (error); +} --- /dev/null 2008-03-05 19:22:00.000000000 +0100 +++ compat/linux/linux_epoll.h 2008-03-05 19:28:15.000000000 +0100 @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_EPOLL_H_ +#define _LINUX_EPOLL_H_ + +#ifdef __amd64__ +#define EPOLL_PACKED __packed +#else +#define EPOLL_PACKED +#endif + +struct linux_epoll_event { + uint32_t events; + uint64_t data; +} EPOLL_PACKED; + +#define LINUX_EPOLLIN 0x001 +#define LINUX_EPOLLPRI 0x002 +#define LINUX_EPOLLOUT 0x004 +#define LINUX_EPOLLONESHOT (1 << 30) +#define LINUX_EPOLLET (1 << 31) + +#define LINUX_EPOLL_CTL_ADD 1 +#define LINUX_EPOLL_CTL_DEL 2 +#define LINUX_EPOLL_CTL_MOD 3 + +#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct linux_epoll_event)) + +#endif /* !_LINUX_EPOLL_H_ */ --- conf/files.amd64.orig +++ conf/files.amd64 @@ -235,6 +235,7 @@ dependency "linux32_assym.h" amd64/linux32/linux32_sysent.c optional compat_linux32 amd64/linux32/linux32_sysvec.c optional compat_linux32 +compat/linux/linux_epoll.c optional compat_linux32 compat/linux/linux_emul.c optional compat_linux32 compat/linux/linux_file.c optional compat_linux32 compat/linux/linux_futex.c optional compat_linux32 --- conf/files.i386.orig +++ conf/files.i386 @@ -84,6 +84,7 @@ # compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs +compat/linux/linux_epoll.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_futex.c optional compat_linux --- conf/files.pc98.orig +++ conf/files.pc98 @@ -55,6 +55,7 @@ # compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs +compat/linux/linux_epoll.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_futex.c optional compat_linux --- i386/linux/linux_dummy.c.orig +++ i386/linux/linux_dummy.c @@ -67,9 +67,6 @@ DUMMY(mincore); DUMMY(fadvise64); DUMMY(lookup_dcookie); -DUMMY(epoll_create); -DUMMY(epoll_ctl); -DUMMY(epoll_wait); DUMMY(remap_file_pages); DUMMY(fstatfs64); DUMMY(fadvise64_64); --- i386/linux/linux_proto.h.orig +++ i386/linux/linux_proto.h @@ -752,13 +752,19 @@ register_t dummy; }; struct linux_epoll_create_args { - register_t dummy; + char size_l_[PADL_(l_int)]; l_int size; char size_r_[PADR_(l_int)]; }; struct linux_epoll_ctl_args { - register_t dummy; + char epfd_l_[PADL_(l_int)]; l_int epfd; char epfd_r_[PADR_(l_int)]; + char op_l_[PADL_(l_int)]; l_int op; char op_r_[PADR_(l_int)]; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char event_l_[PADL_(struct linux_epoll_event *)]; struct linux_epoll_event * event; char event_r_[PADR_(struct linux_epoll_event *)]; }; struct linux_epoll_wait_args { - register_t dummy; + char epfd_l_[PADL_(l_int)]; l_int epfd; char epfd_r_[PADR_(l_int)]; + char events_l_[PADL_(struct linux_epoll_event *)]; struct linux_epoll_event * events; char events_r_[PADR_(struct linux_epoll_event *)]; + char maxevents_l_[PADL_(l_int)]; l_int maxevents; char maxevents_r_[PADR_(l_int)]; + char timeout_l_[PADL_(l_int)]; l_int timeout; char timeout_r_[PADR_(l_int)]; }; struct linux_remap_file_pages_args { register_t dummy; --- i386/linux/linux_sysent.c.orig +++ i386/linux/linux_sysent.c @@ -273,9 +273,9 @@ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 251 = */ { AS(linux_exit_group_args), (sy_call_t *)linux_exit_group, AUE_EXIT, NULL, 0, 0 }, /* 252 = linux_exit_group */ { 0, (sy_call_t *)linux_lookup_dcookie, AUE_NULL, NULL, 0, 0 }, /* 253 = linux_lookup_dcookie */ - { 0, (sy_call_t *)linux_epoll_create, AUE_NULL, NULL, 0, 0 }, /* 254 = linux_epoll_create */ - { 0, (sy_call_t *)linux_epoll_ctl, AUE_NULL, NULL, 0, 0 }, /* 255 = linux_epoll_ctl */ - { 0, (sy_call_t *)linux_epoll_wait, AUE_NULL, NULL, 0, 0 }, /* 256 = linux_epoll_wait */ + { AS(linux_epoll_create_args), (sy_call_t *)linux_epoll_create, AUE_NULL, NULL, 0, 0 }, /* 254 = linux_epoll_create */ + { AS(linux_epoll_ctl_args), (sy_call_t *)linux_epoll_ctl, AUE_NULL, NULL, 0, 0 }, /* 255 = linux_epoll_ctl */ + { AS(linux_epoll_wait_args), (sy_call_t *)linux_epoll_wait, AUE_NULL, NULL, 0, 0 }, /* 256 = linux_epoll_wait */ { 0, (sy_call_t *)linux_remap_file_pages, AUE_NULL, NULL, 0, 0 }, /* 257 = linux_remap_file_pages */ { AS(linux_set_tid_address_args), (sy_call_t *)linux_set_tid_address, AUE_NULL, NULL, 0, 0 }, /* 258 = linux_set_tid_address */ { AS(linux_timer_create_args), (sy_call_t *)linux_timer_create, AUE_NULL, NULL, 0, 0 }, /* 259 = linux_timer_create */ --- i386/linux/syscalls.master.orig +++ i386/linux/syscalls.master @@ -423,9 +423,11 @@ 251 AUE_NULL UNIMPL 252 AUE_EXIT STD { int linux_exit_group(int error_code); } 253 AUE_NULL STD { int linux_lookup_dcookie(void); } -254 AUE_NULL STD { int linux_epoll_create(void); } -255 AUE_NULL STD { int linux_epoll_ctl(void); } -256 AUE_NULL STD { int linux_epoll_wait(void); } +254 AUE_NULL STD { int linux_epoll_create(l_int size); } +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \ + struct linux_epoll_event *event); } +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \ + l_int maxevents, l_int timeout); } 257 AUE_NULL STD { int linux_remap_file_pages(void); } 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); } 259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \ --- modules/linux/Makefile.orig +++ modules/linux/Makefile @@ -8,7 +8,7 @@ .PATH: ${.CURDIR}/../../compat/linux ${.CURDIR}/../../${MACHINE_ARCH}/linux${SFX} KMOD= linux -SRCS= linux${SFX}_dummy.c linux_emul.c linux_file.c \ +SRCS= linux${SFX}_dummy.c linux_epoll.c linux_emul.c linux_file.c \ linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \ linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \ linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \