1 : /*
2 : * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
3 : * All rights reserved.
4 : *
5 : * Redistribution and use in source and binary forms, with or without
6 : * modification, are permitted provided that the following conditions
7 : * are met:
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : * 2. Redistributions in binary form must reproduce the above copyright
11 : * notice, this list of conditions and the following disclaimer in the
12 : * documentation and/or other materials provided with the distribution.
13 : * 3. The name of the author may not be used to endorse or promote products
14 : * derived from this software without specific prior written permission.
15 : *
16 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 : * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 : * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 : * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 : */
27 : #ifdef HAVE_CONFIG_H
28 : #include "config.h"
29 : #endif
30 :
31 : #include <stdint.h>
32 : #include <sys/types.h>
33 : #include <sys/resource.h>
34 : #ifdef HAVE_SYS_TIME_H
35 : #include <sys/time.h>
36 : #else
37 : #include <sys/_time.h>
38 : #endif
39 : #include <sys/queue.h>
40 : #include <sys/epoll.h>
41 : #include <signal.h>
42 : #include <stdio.h>
43 : #include <stdlib.h>
44 : #include <string.h>
45 : #include <unistd.h>
46 : #include <errno.h>
47 : #ifdef HAVE_FCNTL_H
48 : #include <fcntl.h>
49 : #endif
50 :
51 : #include "event.h"
52 : #include "event-internal.h"
53 : #include "evsignal.h"
54 : #include "log.h"
55 :
56 : /* due to limitations in the epoll interface, we need to keep track of
57 : * all file descriptors outself.
58 : */
59 : struct evepoll {
60 : struct event *evread;
61 : struct event *evwrite;
62 : };
63 :
64 : struct epollop {
65 : struct evepoll *fds;
66 : int nfds;
67 : struct epoll_event *events;
68 : int nevents;
69 : int epfd;
70 : };
71 :
72 : static void *epoll_init (struct event_base *);
73 : static int epoll_add (void *, struct event *);
74 : static int epoll_del (void *, struct event *);
75 : static int epoll_dispatch (struct event_base *, void *, struct timeval *);
76 : static void epoll_dealloc (struct event_base *, void *);
77 :
78 : struct eventop epollops = {
79 : "epoll",
80 : epoll_init,
81 : epoll_add,
82 : epoll_del,
83 : epoll_dispatch,
84 : epoll_dealloc,
85 : 1 /* need reinit */
86 : };
87 :
88 : #ifdef HAVE_SETFD
89 : #define FD_CLOSEONEXEC(x) do { \
90 : if (fcntl(x, F_SETFD, 1) == -1) \
91 : event_warn("fcntl(%d, F_SETFD)", x); \
92 : } while (0)
93 : #else
94 : #define FD_CLOSEONEXEC(x)
95 : #endif
96 :
97 : #define NEVENT 32000
98 :
99 : /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
100 : * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
101 : * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
102 : * largest number of msec we can support here is 2147482. Let's
103 : * round that down by 47 seconds.
104 : */
105 : #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
106 :
107 : static void *
108 1420 : epoll_init(struct event_base *base)
109 : {
110 1420 : int epfd, nfiles = NEVENT;
111 : struct rlimit rl;
112 : struct epollop *epollop;
113 :
114 : /* Disable epollueue when this environment variable is set */
115 1420 : if (getenv("EVENT_NOEPOLL"))
116 0 : return (NULL);
117 :
118 2840 : if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
119 1420 : rl.rlim_cur != RLIM_INFINITY) {
120 : /*
121 : * Solaris is somewhat retarded - it's important to drop
122 : * backwards compatibility when making changes. So, don't
123 : * dare to put rl.rlim_cur here.
124 : */
125 1420 : nfiles = rl.rlim_cur - 1;
126 : }
127 :
128 : /* Initalize the kernel queue */
129 :
130 1420 : if ((epfd = epoll_create(nfiles)) == -1) {
131 0 : if (errno != ENOSYS)
132 0 : event_warn("epoll_create");
133 0 : return (NULL);
134 : }
135 :
136 1420 : FD_CLOSEONEXEC(epfd);
137 :
138 1420 : if (!(epollop = calloc(1, sizeof(struct epollop))))
139 0 : return (NULL);
140 :
141 1420 : epollop->epfd = epfd;
142 :
143 : /* Initalize fields */
144 1420 : epollop->events = malloc(nfiles * sizeof(struct epoll_event));
145 1420 : if (epollop->events == NULL) {
146 0 : free(epollop);
147 0 : return (NULL);
148 : }
149 1420 : epollop->nevents = nfiles;
150 :
151 1420 : epollop->fds = calloc(nfiles, sizeof(struct evepoll));
152 1420 : if (epollop->fds == NULL) {
153 0 : free(epollop->events);
154 0 : free(epollop);
155 0 : return (NULL);
156 : }
157 1420 : epollop->nfds = nfiles;
158 :
159 1420 : evsignal_init(base);
160 :
161 1420 : return (epollop);
162 : }
163 :
164 : static int
165 0 : epoll_recalc(struct event_base *base, void *arg, int max)
166 : {
167 0 : struct epollop *epollop = arg;
168 :
169 0 : if (max > epollop->nfds) {
170 : struct evepoll *fds;
171 : int nfds;
172 :
173 0 : nfds = epollop->nfds;
174 0 : while (nfds < max)
175 0 : nfds <<= 1;
176 :
177 0 : fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
178 0 : if (fds == NULL) {
179 0 : event_warn("realloc");
180 0 : return (-1);
181 : }
182 0 : epollop->fds = fds;
183 0 : memset(fds + epollop->nfds, 0,
184 0 : (nfds - epollop->nfds) * sizeof(struct evepoll));
185 0 : epollop->nfds = nfds;
186 : }
187 :
188 0 : return (0);
189 : }
190 :
191 : static int
192 1472 : epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
193 : {
194 1472 : struct epollop *epollop = arg;
195 1472 : struct epoll_event *events = epollop->events;
196 : struct evepoll *evep;
197 1472 : int i, res, timeout = -1;
198 :
199 1472 : if (tv != NULL)
200 0 : timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
201 :
202 1472 : if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
203 : /* Linux kernels can wait forever if the timeout is too big;
204 : * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
205 0 : timeout = MAX_EPOLL_TIMEOUT_MSEC;
206 : }
207 :
208 1472 : res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
209 :
210 1471 : if (res == -1) {
211 52 : if (errno != EINTR) {
212 0 : event_warn("epoll_wait");
213 0 : return (-1);
214 : }
215 :
216 52 : evsignal_process(base);
217 52 : return (0);
218 1419 : } else if (base->sig.evsignal_caught) {
219 0 : evsignal_process(base);
220 : }
221 :
222 : event_debug(("%s: epoll_wait reports %d", __func__, res));
223 :
224 2838 : for (i = 0; i < res; i++) {
225 1419 : int what = events[i].events;
226 1419 : struct event *evread = NULL, *evwrite = NULL;
227 :
228 1419 : evep = (struct evepoll *)events[i].data.ptr;
229 :
230 1419 : if (what & (EPOLLHUP|EPOLLERR)) {
231 0 : evread = evep->evread;
232 0 : evwrite = evep->evwrite;
233 : } else {
234 1419 : if (what & EPOLLIN) {
235 1419 : evread = evep->evread;
236 : }
237 :
238 1419 : if (what & EPOLLOUT) {
239 0 : evwrite = evep->evwrite;
240 : }
241 : }
242 :
243 1419 : if (!(evread||evwrite))
244 0 : continue;
245 :
246 1419 : if (evread != NULL)
247 1419 : event_active(evread, EV_READ, 1);
248 1419 : if (evwrite != NULL)
249 0 : event_active(evwrite, EV_WRITE, 1);
250 : }
251 :
252 1419 : return (0);
253 : }
254 :
255 :
256 : static int
257 1420 : epoll_add(void *arg, struct event *ev)
258 : {
259 1420 : struct epollop *epollop = arg;
260 1420 : struct epoll_event epev = {0, {0}};
261 : struct evepoll *evep;
262 : int fd, op, events;
263 :
264 1420 : if (ev->ev_events & EV_SIGNAL)
265 0 : return (evsignal_add(ev));
266 :
267 1420 : fd = ev->ev_fd;
268 1420 : if (fd >= epollop->nfds) {
269 : /* Extent the file descriptor array as necessary */
270 0 : if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
271 0 : return (-1);
272 : }
273 1420 : evep = &epollop->fds[fd];
274 1420 : op = EPOLL_CTL_ADD;
275 1420 : events = 0;
276 1420 : if (evep->evread != NULL) {
277 0 : events |= EPOLLIN;
278 0 : op = EPOLL_CTL_MOD;
279 : }
280 1420 : if (evep->evwrite != NULL) {
281 0 : events |= EPOLLOUT;
282 0 : op = EPOLL_CTL_MOD;
283 : }
284 :
285 1420 : if (ev->ev_events & EV_READ)
286 1420 : events |= EPOLLIN;
287 1420 : if (ev->ev_events & EV_WRITE)
288 0 : events |= EPOLLOUT;
289 :
290 1420 : epev.data.ptr = evep;
291 1420 : epev.events = events;
292 1420 : if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
293 0 : return (-1);
294 :
295 : /* Update events responsible */
296 1420 : if (ev->ev_events & EV_READ)
297 1420 : evep->evread = ev;
298 1420 : if (ev->ev_events & EV_WRITE)
299 0 : evep->evwrite = ev;
300 :
301 1420 : return (0);
302 : }
303 :
304 : static int
305 1419 : epoll_del(void *arg, struct event *ev)
306 : {
307 1419 : struct epollop *epollop = arg;
308 1419 : struct epoll_event epev = {0, {0}};
309 : struct evepoll *evep;
310 : int fd, events, op;
311 1419 : int needwritedelete = 1, needreaddelete = 1;
312 :
313 1419 : if (ev->ev_events & EV_SIGNAL)
314 0 : return (evsignal_del(ev));
315 :
316 1419 : fd = ev->ev_fd;
317 1419 : if (fd >= epollop->nfds)
318 0 : return (0);
319 1419 : evep = &epollop->fds[fd];
320 :
321 1419 : op = EPOLL_CTL_DEL;
322 1419 : events = 0;
323 :
324 1419 : if (ev->ev_events & EV_READ)
325 1419 : events |= EPOLLIN;
326 1419 : if (ev->ev_events & EV_WRITE)
327 0 : events |= EPOLLOUT;
328 :
329 1419 : if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
330 1419 : if ((events & EPOLLIN) && evep->evwrite != NULL) {
331 0 : needwritedelete = 0;
332 0 : events = EPOLLOUT;
333 0 : op = EPOLL_CTL_MOD;
334 1419 : } else if ((events & EPOLLOUT) && evep->evread != NULL) {
335 0 : needreaddelete = 0;
336 0 : events = EPOLLIN;
337 0 : op = EPOLL_CTL_MOD;
338 : }
339 : }
340 :
341 1419 : epev.events = events;
342 1419 : epev.data.ptr = evep;
343 :
344 1419 : if (needreaddelete)
345 1419 : evep->evread = NULL;
346 1419 : if (needwritedelete)
347 1419 : evep->evwrite = NULL;
348 :
349 1419 : if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
350 0 : return (-1);
351 :
352 1419 : return (0);
353 : }
354 :
355 : static void
356 1419 : epoll_dealloc(struct event_base *base, void *arg)
357 : {
358 1419 : struct epollop *epollop = arg;
359 :
360 1419 : evsignal_dealloc(base);
361 1419 : if (epollop->fds)
362 1419 : free(epollop->fds);
363 1419 : if (epollop->events)
364 1419 : free(epollop->events);
365 1419 : if (epollop->epfd >= 0)
366 1419 : close(epollop->epfd);
367 :
368 1419 : memset(epollop, 0, sizeof(struct epollop));
369 1419 : free(epollop);
370 1419 : }
|