~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Wine Cross Reference
wine/server/fd.c

Version: ~ [ wine-1.0-rc1 ] ~ [ wine-0.9.61 ] ~ [ wine-0.9.60 ] ~ [ wine-0.9.59 ] ~ [ wine-0.9.58 ] ~ [ wine-0.9.57 ] ~ [ wine-0.9.56 ] ~ [ wine-0.9.55 ] ~ [ wine-0.9.54 ] ~ [ wine-0.9.53 ] ~ [ wine-0.9.52 ] ~ [ wine-0.9.51 ] ~ [ wine-0.9.50 ] ~ [ wine-0.9.49 ] ~ [ wine-0.9.48 ] ~ [ wine-0.9.47 ] ~ [ wine-0.9.46 ] ~ [ wine-0.9.45 ] ~ [ wine-0.9.44 ] ~ [ wine-0.9.43 ] ~ [ wine-0.9.42 ] ~ [ wine-0.9.41 ] ~ [ wine-0.9.40 ] ~ [ wine-0.9.39 ] ~ [ wine-0.9.38 ] ~ [ wine-0.9.37 ] ~ [ wine-0.9.36 ] ~ [ wine-0.9.35 ] ~ [ wine-0.9.34 ] ~ [ wine-0.9.33 ] ~ [ wine-0.9.32 ] ~ [ wine-0.9.31 ] ~ [ wine-0.9.30 ] ~ [ wine-0.9.29 ] ~ [ wine-0.9.28 ] ~ [ wine-0.9.27 ] ~ [ wine-0.9.26 ] ~ [ wine-0.9.25 ] ~ [ wine-0.9.24 ] ~ [ wine-0.9.23 ] ~ [ wine-0.9.22 ] ~ [ wine-0.9.21 ] ~ [ wine-0.9.20 ] ~ [ wine-0.9.19 ] ~ [ wine-0.9.18 ] ~ [ wine-0.9.17 ] ~ [ wine-0.9.16 ] ~ [ wine-0.9.15 ] ~ [ wine-0.9.14 ] ~ [ wine-0.9.13 ] ~ [ wine-0.9.12 ] ~ [ wine-0.9.11 ] ~ [ wine-0.9.10 ] ~ [ wine-0.9.9 ] ~ [ wine-0.9.8 ] ~ [ wine-0.9.7 ] ~ [ wine-0.9.6 ] ~ [ wine-0.9.5 ] ~ [ wine-0.9.4 ] ~ [ wine-0.9.3 ] ~ [ wine-0.9.2 ] ~ [ wine-0.9.1 ] ~ [ wine-0.9 ] ~ [ wine20050930 ] ~ [ wine20050830 ] ~ [ wine20050725 ] ~ [ wine20050628 ] ~ [ wine20050524 ] ~ [ wine20050419 ] ~ [ wine20050310 ] ~ [ wine20050211 ] ~ [ wine20050111 ] ~ [ wine20041201 ] ~ [ wine20041019 ] ~ [ wine20040914 ] ~ [ wine20040813 ] ~ [ wine20040716 ] ~ [ wine20040615 ] ~ [ wine20040505 ] ~ [ wine20040408 ] ~ [ wine20040309 ] ~ [ wine20040213 ] ~ [ wine20040121 ] ~ [ wine20031212 ] ~ [ wine20031118 ] ~ [ wine20031016 ] ~ [ wine20030911 ] ~ [ wine20030813 ] ~ [ wine20030709 ] ~ [ wine20030618 ] ~ [ wine20030508 ] ~ [ wine20030408 ] ~ [ wine20030318 ] ~ [ wine20030219 ] ~ [ wine20030115 ] ~ [ wine20021219 ] ~ [ wine20021125 ] ~ [ wine20021031 ] ~ [ wine20021007 ] ~ [ wine20020904 ] ~ [ wine20020804 ] ~ [ wine20020710 ] ~ [ wine20020605 ] ~ [ wine20020509 ] ~ [ wine20020411 ] ~ [ wine20020310 ] ~ [ wine20020228 ] ~ [ wine20011226 ] ~ [ wine20011108 ] ~ [ wine20011004 ] ~ [ wine20010824 ] ~ [ wine20010731 ] ~ [ wine20010629 ] ~ [ wine20010510 ] ~ [ wine20010418 ] ~ [ wine20010326 ] ~ [ wine20010305 ] ~ [ wine20010216 ] ~ [ wine20010112 ] ~ [ wine20001222 ] ~ [ wine20001202 ] ~ [ wine20001026 ] ~ [ wine20001002 ] ~ [ wine20000909 ] ~ [ wine20000821 ] ~ [ wine20000801 ] ~ [ wine20000716 ] ~ [ wine20000326 ] ~ [ wine20000227 ] ~ [ wine20000130 ] ~ [ wine20000109 ] ~

  1 /*
  2  * Server-side file descriptor management
  3  *
  4  * Copyright (C) 2000, 2003 Alexandre Julliard
  5  *
  6  * This library is free software; you can redistribute it and/or
  7  * modify it under the terms of the GNU Lesser General Public
  8  * License as published by the Free Software Foundation; either
  9  * version 2.1 of the License, or (at your option) any later version.
 10  *
 11  * This library is distributed in the hope that it will be useful,
 12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14  * Lesser General Public License for more details.
 15  *
 16  * You should have received a copy of the GNU Lesser General Public
 17  * License along with this library; if not, write to the Free Software
 18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 19  */
 20 
 21 
 22 #include "config.h"
 23 #include "wine/port.h"
 24 
 25 #include <assert.h>
 26 #include <errno.h>
 27 #include <fcntl.h>
 28 #include <limits.h>
 29 #include <signal.h>
 30 #include <stdarg.h>
 31 #include <stdio.h>
 32 #include <string.h>
 33 #include <stdlib.h>
 34 #ifdef HAVE_POLL_H
 35 #include <poll.h>
 36 #endif
 37 #ifdef HAVE_SYS_POLL_H
 38 #include <sys/poll.h>
 39 #endif
 40 #ifdef HAVE_LINUX_MAJOR_H
 41 #include <linux/major.h>
 42 #endif
 43 #ifdef HAVE_SYS_STATVFS_H
 44 #include <sys/statvfs.h>
 45 #endif
 46 #ifdef HAVE_SYS_VFS_H
 47 /*
 48  * Solaris defines its system list in sys/list.h.
 49  * This need to be workaround it here.
 50  */
 51 #define list SYSLIST
 52 #define list_next SYSLIST_NEXT
 53 #define list_prev SYSLIST_PREV
 54 #define list_head SYSLIST_HEAD
 55 #define list_tail SYSLIST_TAIL
 56 #define list_move_tail SYSLIST_MOVE_TAIL
 57 #define list_remove SYSLIST_REMOVE
 58 #include <sys/vfs.h>
 59 #undef list
 60 #undef list_next
 61 #undef list_prev
 62 #undef list_head
 63 #undef list_tail
 64 #undef list_move_tail
 65 #undef list_remove
 66 #endif
 67 #ifdef HAVE_SYS_PARAM_H
 68 #include <sys/param.h>
 69 #endif
 70 #ifdef HAVE_SYS_MOUNT_H
 71 #include <sys/mount.h>
 72 #endif
 73 #ifdef HAVE_SYS_STATFS_H
 74 #include <sys/statfs.h>
 75 #endif
 76 #ifdef HAVE_SYS_SYSCTL_H
 77 #include <sys/sysctl.h>
 78 #endif
 79 #ifdef HAVE_SYS_EVENT_H
 80 #include <sys/event.h>
 81 #undef LIST_INIT
 82 #undef LIST_ENTRY
 83 #endif
 84 #ifdef HAVE_STDINT_H
 85 #include <stdint.h>
 86 #endif
 87 #include <sys/stat.h>
 88 #include <sys/time.h>
 89 #include <sys/types.h>
 90 #include <unistd.h>
 91 
 92 #include "ntstatus.h"
 93 #define WIN32_NO_STATUS
 94 #include "object.h"
 95 #include "file.h"
 96 #include "handle.h"
 97 #include "process.h"
 98 #include "request.h"
 99 
100 #include "winternl.h"
101 #include "winioctl.h"
102 
103 #if defined(HAVE_SYS_EPOLL_H) && defined(HAVE_EPOLL_CREATE)
104 # include <sys/epoll.h>
105 # define USE_EPOLL
106 #elif defined(linux) && defined(__i386__) && defined(HAVE_STDINT_H)
107 # define USE_EPOLL
108 # define EPOLLIN POLLIN
109 # define EPOLLOUT POLLOUT
110 # define EPOLLERR POLLERR
111 # define EPOLLHUP POLLHUP
112 # define EPOLL_CTL_ADD 1
113 # define EPOLL_CTL_DEL 2
114 # define EPOLL_CTL_MOD 3
115 
116 typedef union epoll_data
117 {
118   void *ptr;
119   int fd;
120   uint32_t u32;
121   uint64_t u64;
122 } epoll_data_t;
123 
124 struct epoll_event
125 {
126   uint32_t events;
127   epoll_data_t data;
128 };
129 
130 #define SYSCALL_RET(ret) do { \
131         if (ret < 0) { errno = -ret; ret = -1; } \
132         return ret; \
133     } while(0)
134 
135 static inline int epoll_create( int size )
136 {
137     int ret;
138     __asm__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
139              : "=a" (ret) : "" (254 /*NR_epoll_create*/), "r" (size) );
140     SYSCALL_RET(ret);
141 }
142 
143 static inline int epoll_ctl( int epfd, int op, int fd, const struct epoll_event *event )
144 {
145     int ret;
146     __asm__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
147              : "=a" (ret)
148              : "" (255 /*NR_epoll_ctl*/), "r" (epfd), "c" (op), "d" (fd), "S" (event), "m" (*event) );
149     SYSCALL_RET(ret);
150 }
151 
152 static inline int epoll_wait( int epfd, struct epoll_event *events, int maxevents, int timeout )
153 {
154     int ret;
155     __asm__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
156              : "=a" (ret)
157              : "" (256 /*NR_epoll_wait*/), "r" (epfd), "c" (events), "d" (maxevents), "S" (timeout)
158              : "memory" );
159     SYSCALL_RET(ret);
160 }
161 #undef SYSCALL_RET
162 
163 #endif /* linux && __i386__ && HAVE_STDINT_H */
164 
165 
166 /* Because of the stupid Posix locking semantics, we need to keep
167  * track of all file descriptors referencing a given file, and not
168  * close a single one until all the locks are gone (sigh).
169  */
170 
171 /* file descriptor object */
172 
173 /* closed_fd is used to keep track of the unix fd belonging to a closed fd object */
174 struct closed_fd
175 {
176     struct list entry;       /* entry in inode closed list */
177     int         unix_fd;     /* the unix file descriptor */
178     char        unlink[1];   /* name to unlink on close (if any) */
179 };
180 
181 struct fd
182 {
183     struct object        obj;         /* object header */
184     const struct fd_ops *fd_ops;      /* file descriptor operations */
185     struct inode        *inode;       /* inode that this fd belongs to */
186     struct list          inode_entry; /* entry in inode fd list */
187     struct closed_fd    *closed;      /* structure to store the unix fd at destroy time */
188     struct object       *user;        /* object using this file descriptor */
189     struct list          locks;       /* list of locks on this fd */
190     unsigned int         access;      /* file access (FILE_READ_DATA etc.) */
191     unsigned int         options;     /* file options (FILE_DELETE_ON_CLOSE, FILE_SYNCHRONOUS...) */
192     unsigned int         sharing;     /* file sharing mode */
193     int                  unix_fd;     /* unix file descriptor */
194     unsigned int         no_fd_status;/* status to return when unix_fd is -1 */
195     int                  signaled :1; /* is the fd signaled? */
196     int                  fs_locks :1; /* can we use filesystem locks for this fd? */
197     int                  poll_index;  /* index of fd in poll array */
198     struct async_queue  *read_q;      /* async readers of this fd */
199     struct async_queue  *write_q;     /* async writers of this fd */
200     struct async_queue  *wait_q;      /* other async waiters of this fd */
201     struct completion   *completion;  /* completion object attached to this fd */
202     unsigned long        comp_key;    /* completion key to set in completion events */
203 };
204 
205 static void fd_dump( struct object *obj, int verbose );
206 static void fd_destroy( struct object *obj );
207 
208 static const struct object_ops fd_ops =
209 {
210     sizeof(struct fd),        /* size */
211     fd_dump,                  /* dump */
212     no_get_type,              /* get_type */
213     no_add_queue,             /* add_queue */
214     NULL,                     /* remove_queue */
215     NULL,                     /* signaled */
216     NULL,                     /* satisfied */
217     no_signal,                /* signal */
218     no_get_fd,                /* get_fd */
219     no_map_access,            /* map_access */
220     default_get_sd,           /* get_sd */
221     default_set_sd,           /* set_sd */
222     no_lookup_name,           /* lookup_name */
223     no_open_file,             /* open_file */
224     no_close_handle,          /* close_handle */
225     fd_destroy                /* destroy */
226 };
227 
228 /* device object */
229 
230 #define DEVICE_HASH_SIZE 7
231 #define INODE_HASH_SIZE 17
232 
233 struct device
234 {
235     struct object       obj;        /* object header */
236     struct list         entry;      /* entry in device hash list */
237     dev_t               dev;        /* device number */
238     int                 removable;  /* removable device? (or -1 if unknown) */
239     struct list         inode_hash[INODE_HASH_SIZE];  /* inodes hash table */
240 };
241 
242 static void device_dump( struct object *obj, int verbose );
243 static void device_destroy( struct object *obj );
244 
245 static const struct object_ops device_ops =
246 {
247     sizeof(struct device),    /* size */
248     device_dump,              /* dump */
249     no_get_type,              /* get_type */
250     no_add_queue,             /* add_queue */
251     NULL,                     /* remove_queue */
252     NULL,                     /* signaled */
253     NULL,                     /* satisfied */
254     no_signal,                /* signal */
255     no_get_fd,                /* get_fd */
256     no_map_access,            /* map_access */
257     default_get_sd,           /* get_sd */
258     default_set_sd,           /* set_sd */
259     no_lookup_name,           /* lookup_name */
260     no_open_file,             /* open_file */
261     no_close_handle,          /* close_handle */
262     device_destroy            /* destroy */
263 };
264 
265 /* inode object */
266 
267 struct inode
268 {
269     struct object       obj;        /* object header */
270     struct list         entry;      /* inode hash list entry */
271     struct device      *device;     /* device containing this inode */
272     ino_t               ino;        /* inode number */
273     struct list         open;       /* list of open file descriptors */
274     struct list         locks;      /* list of file locks */
275     struct list         closed;     /* list of file descriptors to close at destroy time */
276 };
277 
278 static void inode_dump( struct object *obj, int verbose );
279 static void inode_destroy( struct object *obj );
280 
281 static const struct object_ops inode_ops =
282 {
283     sizeof(struct inode),     /* size */
284     inode_dump,               /* dump */
285     no_get_type,              /* get_type */
286     no_add_queue,             /* add_queue */
287     NULL,                     /* remove_queue */
288     NULL,                     /* signaled */
289     NULL,                     /* satisfied */
290     no_signal,                /* signal */
291     no_get_fd,                /* get_fd */
292     no_map_access,            /* map_access */
293     default_get_sd,           /* get_sd */
294     default_set_sd,           /* set_sd */
295     no_lookup_name,           /* lookup_name */
296     no_open_file,             /* open_file */
297     no_close_handle,          /* close_handle */
298     inode_destroy             /* destroy */
299 };
300 
301 /* file lock object */
302 
303 struct file_lock
304 {
305     struct object       obj;         /* object header */
306     struct fd          *fd;          /* fd owning this lock */
307     struct list         fd_entry;    /* entry in list of locks on a given fd */
308     struct list         inode_entry; /* entry in inode list of locks */
309     int                 shared;      /* shared lock? */
310     file_pos_t          start;       /* locked region is interval [start;end) */
311     file_pos_t          end;
312     struct process     *process;     /* process owning this lock */
313     struct list         proc_entry;  /* entry in list of locks owned by the process */
314 };
315 
316 static void file_lock_dump( struct object *obj, int verbose );
317 static int file_lock_signaled( struct object *obj, struct thread *thread );
318 
319 static const struct object_ops file_lock_ops =
320 {
321     sizeof(struct file_lock),   /* size */
322     file_lock_dump,             /* dump */
323     no_get_type,                /* get_type */
324     add_queue,                  /* add_queue */
325     remove_queue,               /* remove_queue */
326     file_lock_signaled,         /* signaled */
327     no_satisfied,               /* satisfied */
328     no_signal,                  /* signal */
329     no_get_fd,                  /* get_fd */
330     no_map_access,              /* map_access */
331     default_get_sd,             /* get_sd */
332     default_set_sd,             /* set_sd */
333     no_lookup_name,             /* lookup_name */
334     no_open_file,               /* open_file */
335     no_close_handle,            /* close_handle */
336     no_destroy                  /* destroy */
337 };
338 
339 
340 #define OFF_T_MAX       (~((file_pos_t)1 << (8*sizeof(off_t)-1)))
341 #define FILE_POS_T_MAX  (~(file_pos_t)0)
342 
343 static file_pos_t max_unix_offset = OFF_T_MAX;
344 
345 #define DUMP_LONG_LONG(val) do { \
346     if (sizeof(val) > sizeof(unsigned long) && (val) > ~0UL) \
347         fprintf( stderr, "%lx%08lx", (unsigned long)((unsigned long long)(val) >> 32), (unsigned long)(val) ); \
348     else \
349         fprintf( stderr, "%lx", (unsigned long)(val) ); \
350   } while (0)
351 
352 
353 
354 /****************************************************************/
355 /* timeouts support */
356 
357 struct timeout_user
358 {
359     struct list           entry;      /* entry in sorted timeout list */
360     timeout_t             when;       /* timeout expiry (absolute time) */
361     timeout_callback      callback;   /* callback function */
362     void                 *private;    /* callback private data */
363 };
364 
365 static struct list timeout_list = LIST_INIT(timeout_list);   /* sorted timeouts list */
366 timeout_t current_time;
367 
368 static inline void set_current_time(void)
369 {
370     static const timeout_t ticks_1601_to_1970 = (timeout_t)86400 * (369 * 365 + 89) * TICKS_PER_SEC;
371     struct timeval now;
372     gettimeofday( &now, NULL );
373     current_time = (timeout_t)now.tv_sec * TICKS_PER_SEC + now.tv_usec * 10 + ticks_1601_to_1970;
374 }
375 
376 /* add a timeout user */
377 struct timeout_user *add_timeout_user( timeout_t when, timeout_callback func, void *private )
378 {
379     struct timeout_user *user;
380     struct list *ptr;
381 
382     if (!(user = mem_alloc( sizeof(*user) ))) return NULL;
383     user->when     = (when > 0) ? when : current_time - when;
384     user->callback = func;
385     user->private  = private;
386 
387     /* Now insert it in the linked list */
388 
389     LIST_FOR_EACH( ptr, &timeout_list )
390     {
391         struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
392         if (timeout->when >= user->when) break;
393     }
394     list_add_before( ptr, &user->entry );
395     return user;
396 }
397 
398 /* remove a timeout user */
399 void remove_timeout_user( struct timeout_user *user )
400 {
401     list_remove( &user->entry );
402     free( user );
403 }
404 
405 /* return a text description of a timeout for debugging purposes */
406 const char *get_timeout_str( timeout_t timeout )
407 {
408     static char buffer[64];
409     long secs, nsecs;
410 
411     if (!timeout) return "";
412     if (timeout == TIMEOUT_INFINITE) return "infinite";
413 
414     if (timeout < 0)  /* relative */
415     {
416         secs = -timeout / TICKS_PER_SEC;
417         nsecs = -timeout % TICKS_PER_SEC;
418         sprintf( buffer, "+%ld.%07ld", secs, nsecs );
419     }
420     else  /* absolute */
421     {
422         secs = (timeout - current_time) / TICKS_PER_SEC;
423         nsecs = (timeout - current_time) % TICKS_PER_SEC;
424         if (nsecs < 0)
425         {
426             nsecs += TICKS_PER_SEC;
427             secs--;
428         }
429         if (secs >= 0)
430             sprintf( buffer, "%x%08x (+%ld.%07ld)",
431                      (unsigned int)(timeout >> 32), (unsigned int)timeout, secs, nsecs );
432         else
433             sprintf( buffer, "%x%08x (-%ld.%07ld)",
434                      (unsigned int)(timeout >> 32), (unsigned int)timeout,
435                      -(secs + 1), TICKS_PER_SEC - nsecs );
436     }
437     return buffer;
438 }
439 
440 
441 /****************************************************************/
442 /* poll support */
443 
444 static struct fd **poll_users;              /* users array */
445 static struct pollfd *pollfd;               /* poll fd array */
446 static int nb_users;                        /* count of array entries actually in use */
447 static int active_users;                    /* current number of active users */
448 static int allocated_users;                 /* count of allocated entries in the array */
449 static struct fd **freelist;                /* list of free entries in the array */
450 
451 static int get_next_timeout(void);
452 
453 static inline void fd_poll_event( struct fd *fd, int event )
454 {
455     fd->fd_ops->poll_event( fd, event );
456 }
457 
458 #ifdef USE_EPOLL
459 
460 static int epoll_fd = -1;
461 
462 static inline void init_epoll(void)
463 {
464     epoll_fd = epoll_create( 128 );
465 }
466 
467 /* set the events that epoll waits for on this fd; helper for set_fd_events */
468 static inline void set_fd_epoll_events( struct fd *fd, int user, int events )
469 {
470     struct epoll_event ev;
471     int ctl;
472 
473     if (epoll_fd == -1) return;
474 
475     if (events == -1)  /* stop waiting on this fd completely */
476     {
477         if (pollfd[user].fd == -1) return;  /* already removed */
478         ctl = EPOLL_CTL_DEL;
479     }
480     else if (pollfd[user].fd == -1)
481     {
482         if (pollfd[user].events) return;  /* stopped waiting on it, don't restart */
483         ctl = EPOLL_CTL_ADD;
484     }
485     else
486     {
487         if (pollfd[user].events == events) return;  /* nothing to do */
488         ctl = EPOLL_CTL_MOD;
489     }
490 
491     ev.events = events;
492     memset(&ev.data, 0, sizeof(ev.data));
493     ev.data.u32 = user;
494 
495     if (epoll_ctl( epoll_fd, ctl, fd->unix_fd, &ev ) == -1)
496     {
497         if (errno == ENOMEM)  /* not enough memory, give up on epoll */
498         {
499             close( epoll_fd );
500             epoll_fd = -1;
501         }
502         else perror( "epoll_ctl" );  /* should not happen */
503     }
504 }
505 
506 static inline void remove_epoll_user( struct fd *fd, int user )
507 {
508     if (epoll_fd == -1) return;
509 
510     if (pollfd[user].fd != -1)
511     {
512         struct epoll_event dummy;
513         epoll_ctl( epoll_fd, EPOLL_CTL_DEL, fd->unix_fd, &dummy );
514     }
515 }
516 
517 static inline void main_loop_epoll(void)
518 {
519     int i, ret, timeout;
520     struct epoll_event events[128];
521 
522     assert( POLLIN == EPOLLIN );
523     assert( POLLOUT == EPOLLOUT );
524     assert( POLLERR == EPOLLERR );
525     assert( POLLHUP == EPOLLHUP );
526 
527     if (epoll_fd == -1) return;
528 
529     while (active_users)
530     {
531         timeout = get_next_timeout();
532 
533         if (!active_users) break;  /* last user removed by a timeout */
534         if (epoll_fd == -1) break;  /* an error occurred with epoll */
535 
536         ret = epoll_wait( epoll_fd, events, sizeof(events)/sizeof(events[0]), timeout );
537         set_current_time();
538 
539         /* put the events into the pollfd array first, like poll does */
540         for (i = 0; i < ret; i++)
541         {
542             int user = events[i].data.u32;
543             pollfd[user].revents = events[i].events;
544         }
545 
546         /* read events from the pollfd array, as set_fd_events may modify them */
547         for (i = 0; i < ret; i++)
548         {
549             int user = events[i].data.u32;
550             if (pollfd[user].revents) fd_poll_event( poll_users[user], pollfd[user].revents );
551         }
552     }
553 }
554 
555 #elif defined(HAVE_KQUEUE)
556 
557 static int kqueue_fd = -1;
558 
559 static inline void init_epoll(void)
560 {
561 #ifdef __APPLE__ /* kqueue support is broken in Mac OS < 10.5 */
562     int mib[2];
563     char release[32];
564     size_t len = sizeof(release);
565 
566     mib[0] = CTL_KERN;
567     mib[1] = KERN_OSRELEASE;
568     if (sysctl( mib, 2, release, &len, NULL, 0 ) == -1) return;
569     if (atoi(release) < 9) return;
570 #endif
571     kqueue_fd = kqueue();
572 }
573 
574 static inline void set_fd_epoll_events( struct fd *fd, int user, int events )
575 {
576     struct kevent ev[2];
577 
578     if (kqueue_fd == -1) return;
579 
580     EV_SET( &ev[0], fd->unix_fd, EVFILT_READ, 0, NOTE_LOWAT, 1, (void *)user );
581     EV_SET( &ev[1], fd->unix_fd, EVFILT_WRITE, 0, NOTE_LOWAT, 1, (void *)user );
582 
583     if (events == -1)  /* stop waiting on this fd completely */
584     {
585         if (pollfd[user].fd == -1) return;  /* already removed */
586         ev[0].flags |= EV_DELETE;
587         ev[1].flags |= EV_DELETE;
588     }
589     else if (pollfd[user].fd == -1)
590     {
591         if (pollfd[user].events) return;  /* stopped waiting on it, don't restart */
592         ev[0].flags |= EV_ADD | ((events & POLLIN) ? EV_ENABLE : EV_DISABLE);
593         ev[1].flags |= EV_ADD | ((events & POLLOUT) ? EV_ENABLE : EV_DISABLE);
594     }
595     else
596     {
597         if (pollfd[user].events == events) return;  /* nothing to do */
598         ev[0].flags |= (events & POLLIN) ? EV_ENABLE : EV_DISABLE;
599         ev[1].flags |= (events & POLLOUT) ? EV_ENABLE : EV_DISABLE;
600     }
601 
602     if (kevent( kqueue_fd, ev, 2, NULL, 0, NULL ) == -1)
603     {
604         if (errno == ENOMEM)  /* not enough memory, give up on kqueue */
605         {
606             close( kqueue_fd );
607             kqueue_fd = -1;
608         }
609         else perror( "kevent" );  /* should not happen */
610     }
611 }
612 
613 static inline void remove_epoll_user( struct fd *fd, int user )
614 {
615     if (kqueue_fd == -1) return;
616 
617     if (pollfd[user].fd != -1)
618     {
619         struct kevent ev[2];
620 
621         EV_SET( &ev[0], fd->unix_fd, EVFILT_READ, EV_DELETE, 0, 0, 0 );
622         EV_SET( &ev[1], fd->unix_fd, EVFILT_WRITE, EV_DELETE, 0, 0, 0 );
623         kevent( kqueue_fd, ev, 2, NULL, 0, NULL );
624     }
625 }
626 
627 static inline void main_loop_epoll(void)
628 {
629     int i, ret, timeout;
630     struct kevent events[128];
631 
632     if (kqueue_fd == -1) return;
633 
634     while (active_users)
635     {
636         timeout = get_next_timeout();
637 
638         if (!active_users) break;  /* last user removed by a timeout */
639         if (kqueue_fd == -1) break;  /* an error occurred with kqueue */
640 
641         if (timeout != -1)
642         {
643             struct timespec ts;
644 
645             ts.tv_sec = timeout / 1000;
646             ts.tv_nsec = (timeout % 1000) * 1000000;
647             ret = kevent( kqueue_fd, NULL, 0, events, sizeof(events)/sizeof(events[0]), &ts );
648         }
649         else ret = kevent( kqueue_fd, NULL, 0, events, sizeof(events)/sizeof(events[0]), NULL );
650 
651         set_current_time();
652 
653         /* put the events into the pollfd array first, like poll does */
654         for (i = 0; i < ret; i++)
655         {
656             long user = (long)events[i].udata;
657             pollfd[user].revents = 0;
658         }
659         for (i = 0; i < ret; i++)
660         {
661             long user = (long)events[i].udata;
662             if (events[i].filter == EVFILT_READ) pollfd[user].revents |= POLLIN;
663             else if (events[i].filter == EVFILT_WRITE) pollfd[user].revents |= POLLOUT;
664             if (events[i].flags & EV_EOF) pollfd[user].revents |= POLLHUP;
665             if (events[i].flags & EV_ERROR) pollfd[user].revents |= POLLERR;
666         }
667 
668         /* read events from the pollfd array, as set_fd_events may modify them */
669         for (i = 0; i < ret; i++)
670         {
671             long user = (long)events[i].udata;
672             if (pollfd[user].revents) fd_poll_event( poll_users[user], pollfd[user].revents );
673             pollfd[user].revents = 0;
674         }
675     }
676 }
677 
678 #else /* HAVE_KQUEUE */
679 
680 static inline void init_epoll(void) { }
681 static inline void set_fd_epoll_events( struct fd *fd, int user, int events ) { }
682 static inline void remove_epoll_user( struct fd *fd, int user ) { }
683 static inline void main_loop_epoll(void) { }
684 
685 #endif /* USE_EPOLL */
686 
687 
688 /* add a user in the poll array and return its index, or -1 on failure */
689 static int add_poll_user( struct fd *fd )
690 {
691     int ret;
692     if (freelist)
693     {
694         ret = freelist - poll_users;
695         freelist = (struct fd **)poll_users[ret];
696     }
697     else
698     {
699         if (nb_users == allocated_users)
700         {
701             struct fd **newusers;
702             struct pollfd *newpoll;
703             int new_count = allocated_users ? (allocated_users + allocated_users / 2) : 16;
704             if (!(newusers = realloc( poll_users, new_count * sizeof(*poll_users) ))) return -1;
705             if (!(newpoll = realloc( pollfd, new_count * sizeof(*pollfd) )))
706             {
707                 if (allocated_users)
708                     poll_users = newusers;
709                 else
710                     free( newusers );
711                 return -1;
712             }
713             poll_users = newusers;
714             pollfd = newpoll;
715             if (!allocated_users) init_epoll();
716             allocated_users = new_count;
717         }
718         ret = nb_users++;
719     }
720     pollfd[ret].fd = -1;
721     pollfd[ret].events = 0;
722     pollfd[ret].revents = 0;
723     poll_users[ret] = fd;
724     active_users++;
725     return ret;
726 }
727 
728 /* remove a user from the poll list */
729 static void remove_poll_user( struct fd *fd, int user )
730 {
731     assert( user >= 0 );
732     assert( poll_users[user] == fd );
733 
734     remove_epoll_user( fd, user );
735     pollfd[user].fd = -1;
736     pollfd[user].events = 0;
737     pollfd[user].revents = 0;
738     poll_users[user] = (struct fd *)freelist;
739     freelist = &poll_users[user];
740     active_users--;
741 }
742 
743 /* process pending timeouts and return the time until the next timeout, in milliseconds */
744 static int get_next_timeout(void)
745 {
746     if (!list_empty( &timeout_list ))
747     {
748         struct list expired_list, *ptr;
749 
750         /* first remove all expired timers from the list */
751 
752         list_init( &expired_list );
753         while ((ptr = list_head( &timeout_list )) != NULL)
754         {
755             struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
756 
757             if (timeout->when <= current_time)
758             {
759                 list_remove( &timeout->entry );
760                 list_add_tail( &expired_list, &timeout->entry );
761             }
762             else break;
763         }
764 
765         /* now call the callback for all the removed timers */
766 
767         while ((ptr = list_head( &expired_list )) != NULL)
768         {
769             struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
770             list_remove( &timeout->entry );
771             timeout->callback( timeout->private );
772             free( timeout );
773         }
774 
775         if ((ptr = list_head( &timeout_list )) != NULL)
776         {
777             struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
778             int diff = (timeout->when - current_time + 9999) / 10000;
779             if (diff < 0) diff = 0;
780             return diff;
781         }
782     }
783     return -1;  /* no pending timeouts */
784 }
785 
786 /* server main poll() loop */
787 void main_loop(void)
788 {
789     int i, ret, timeout;
790 
791     set_current_time();
792     server_start_time = current_time;
793 
794     main_loop_epoll();
795     /* fall through to normal poll loop */
796 
797     while (active_users)
798     {
799         timeout = get_next_timeout();
800 
801         if (!active_users) break;  /* last user removed by a timeout */
802 
803         ret = poll( pollfd, nb_users, timeout );
804         set_current_time();
805 
806         if (ret > 0)
807         {
808             for (i = 0; i < nb_users; i++)
809             {
810                 if (pollfd[i].revents)
811                 {
812                     fd_poll_event( poll_users[i], pollfd[i].revents );
813                     if (!--ret) break;
814                 }
815             }
816         }
817     }
818 }
819 
820 
821 /****************************************************************/
822 /* device functions */
823 
824 static struct list device_hash[DEVICE_HASH_SIZE];
825 
826 static int is_device_removable( dev_t dev, int unix_fd )
827 {
828 #if defined(linux) && defined(HAVE_FSTATFS)
829     struct statfs stfs;
830 
831     /* check for floppy disk */
832     if (major(dev) == FLOPPY_MAJOR) return 1;
833 
834     if (fstatfs( unix_fd, &stfs ) == -1) return 0;
835     return (stfs.f_type == 0x9660 ||    /* iso9660 */
836             stfs.f_type == 0x9fa1 ||    /* supermount */
837             stfs.f_type == 0x15013346); /* udf */
838 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__APPLE__)
839     struct statfs stfs;
840 
841     if (fstatfs( unix_fd, &stfs ) == -1) return 0;
842     return (!strcmp("cd9660", stfs.f_fstypename) || !strcmp("udf", stfs.f_fstypename));
843 #elif defined(__NetBSD__)
844     struct statvfs stfs;
845 
846     if (fstatvfs( unix_fd, &stfs ) == -1) return 0;
847     return (!strcmp("cd9660", stfs.f_fstypename) || !strcmp("udf", stfs.f_fstypename));
848 #elif defined(sun)
849 # include <sys/dkio.h>
850 # include <sys/vtoc.h>
851     struct dk_cinfo dkinf;
852     if (ioctl( unix_fd, DKIOCINFO, &dkinf ) == -1) return 0;
853     return (dkinf.dki_ctype == DKC_CDROM ||
854             dkinf.dki_ctype == DKC_NCRFLOPPY ||
855             dkinf.dki_ctype == DKC_SMSFLOPPY ||
856             dkinf.dki_ctype == DKC_INTEL82072 ||
857             dkinf.dki_ctype == DKC_INTEL82077);
858 #else
859     return 0;
860 #endif
861 }
862 
863 /* retrieve the device object for a given fd, creating it if needed */
864 static struct device *get_device( dev_t dev, int unix_fd )
865 {
866     struct device *device;
867     unsigned int i, hash = dev % DEVICE_HASH_SIZE;
868 
869     if (device_hash[hash].next)
870     {
871         LIST_FOR_EACH_ENTRY( device, &device_hash[hash], struct device, entry )
872             if (device->dev == dev) return (struct device *)grab_object( device );
873     }
874     else list_init( &device_hash[hash] );
875 
876     /* not found, create it */
877 
878     if (unix_fd == -1) return NULL;
879     if ((device = alloc_object( &device_ops )))
880     {
881         device->dev = dev;
882         device->removable = is_device_removable( dev, unix_fd );
883         for (i = 0; i < INODE_HASH_SIZE; i++) list_init( &device->inode_hash[i] );
884         list_add_head( &device_hash[hash], &device->entry );
885     }
886     return device;
887 }
888 
889 static void device_dump( struct object *obj, int verbose )
890 {
891     struct device *device = (struct device *)obj;
892     fprintf( stderr, "Device dev=" );
893     DUMP_LONG_LONG( device->dev );
894     fprintf( stderr, "\n" );
895 }
896 
897 static void device_destroy( struct object *obj )
898 {
899     struct device *device = (struct device *)obj;
900     unsigned int i;
901 
902     for (i = 0; i < INODE_HASH_SIZE; i++)
903         assert( list_empty(&device->inode_hash[i]) );
904 
905     list_remove( &device->entry );  /* remove it from the hash table */
906 }
907 
908 
909 /****************************************************************/
910 /* inode functions */
911 
912 /* close all pending file descriptors in the closed list */
913 static void inode_close_pending( struct inode *inode, int keep_unlinks )
914 {
915     struct list *ptr = list_head( &inode->closed );
916 
917     while (ptr)
918     {
919         struct closed_fd *fd = LIST_ENTRY( ptr, struct closed_fd, entry );
920         struct list *next = list_next( &inode->closed, ptr );
921 
922         if (fd->unix_fd != -1)
923         {
924             close( fd->unix_fd );
925             fd->unix_fd = -1;
926         }
927         if (!keep_unlinks || !fd->unlink[0])  /* get rid of it unless there's an unlink pending on that file */
928         {
929             list_remove( ptr );
930             free( fd );
931         }
932         ptr = next;
933     }
934 }
935 
936 static void inode_dump( struct object *obj, int verbose )
937 {
938     struct inode *inode = (struct inode *)obj;
939     fprintf( stderr, "Inode device=%p ino=", inode->device );
940     DUMP_LONG_LONG( inode->ino );
941     fprintf( stderr, "\n" );
942 }
943 
944 static void inode_destroy( struct object *obj )
945 {
946     struct inode *inode = (struct inode *)obj;
947     struct list *ptr;
948 
949     assert( list_empty(&inode->open) );
950     assert( list_empty(&inode->locks) );
951 
952     list_remove( &inode->entry );
953 
954     while ((ptr = list_head( &inode->closed )))
955     {
956         struct closed_fd *fd = LIST_ENTRY( ptr, struct closed_fd, entry );
957         list_remove( ptr );
958         if (fd->unix_fd != -1) close( fd->unix_fd );
959         if (fd->unlink[0])
960         {
961             /* make sure it is still the same file */
962             struct stat st;
963             if (!stat( fd->unlink, &st ) && st.st_dev == inode->device->dev && st.st_ino == inode->ino)
964             {
965                 if (S_ISDIR(st.st_mode)) rmdir( fd->unlink );
966                 else unlink( fd->unlink );
967             }
968         }
969         free( fd );
970     }
971     release_object( inode->device );
972 }
973 
974 /* retrieve the inode object for a given fd, creating it if needed */
975 static struct inode *get_inode( dev_t dev, ino_t ino, int unix_fd )
976 {
977     struct device *device;
978     struct inode *inode;
979     unsigned int hash = ino % INODE_HASH_SIZE;
980 
981     if (!(device = get_device( dev, unix_fd ))) return NULL;
982 
983     LIST_FOR_EACH_ENTRY( inode, &device->inode_hash[hash], struct inode, entry )
984     {
985         if (inode->ino == ino)
986         {
987             release_object( device );
988             return (struct inode *)grab_object( inode );
989         }
990     }
991 
992     /* not found, create it */
993     if ((inode = alloc_object( &inode_ops )))
994     {
995         inode->device = device;
996         inode->ino    = ino;
997         list_init( &inode->open );
998         list_init( &inode->locks );
999         list_init( &inode->closed );
1000         list_add_head( &device->inode_hash[hash], &inode->entry );
1001     }
1002     else release_object( device );
1003 
1004     return inode;
1005 }
1006 
1007 /* add fd to the inode list of file descriptors to close */
1008 static void inode_add_closed_fd( struct inode *inode, struct closed_fd *fd )
1009 {
1010     if (!list_empty( &inode->locks ))
1011     {
1012         list_add_head( &inode->closed, &fd->entry );
1013     }
1014     else if (fd->unlink[0])  /* close the fd but keep the structure around for unlink */
1015     {
1016         if (fd->unix_fd != -1) close( fd->unix_fd );
1017         fd->unix_fd = -1;
1018         list_add_head( &inode->closed, &fd->entry );
1019     }
1020     else  /* no locks on this inode and no unlink, get rid of the fd */
1021     {
1022         if (fd->unix_fd != -1) close( fd->unix_fd );
1023         free( fd );
1024     }
1025 }
1026 
1027 
1028 /****************************************************************/
1029 /* file lock functions */
1030 
1031 static void file_lock_dump( struct object *obj, int verbose )
1032 {
1033     struct file_lock *lock = (struct file_lock *)obj;
1034     fprintf( stderr, "Lock %s fd=%p proc=%p start=",
1035              lock->shared ? "shared" : "excl", lock->fd, lock->process );
1036     DUMP_LONG_LONG( lock->start );
1037     fprintf( stderr, " end=" );
1038     DUMP_LONG_LONG( lock->end );
1039     fprintf( stderr, "\n" );
1040 }
1041 
1042 static int file_lock_signaled( struct object *obj, struct thread *thread )
1043 {
1044     struct file_lock *lock = (struct file_lock *)obj;
1045     /* lock is signaled if it has lost its owner */
1046     return !lock->process;
1047 }
1048 
1049 /* set (or remove) a Unix lock if possible for the given range */
1050 static int set_unix_lock( struct fd *fd, file_pos_t start, file_pos_t end, int type )
1051 {
1052     struct flock fl;
1053