blob: 78b6d7916a6f1f57c3e0cd6e356c07783116f27b [file] [log] [blame]
QUICHE team53f08a32019-04-15 14:47:31 -04001// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef QUICHE_EPOLL_SERVER_H_
6#define QUICHE_EPOLL_SERVER_H_
7
8#include <fcntl.h>
9#include <stddef.h>
10#include <stdint.h>
11#include <sys/queue.h>
12
13#include <map>
14#include <memory>
15#include <string>
16#include <unordered_map>
17#include <unordered_set>
18#include <vector>
19
20// #define EPOLL_SERVER_EVENT_TRACING 1
21//
22// Defining EPOLL_SERVER_EVENT_TRACING
23// causes code to exist which didn't before.
24// This code tracks each event generated by the epollserver,
25// as well as providing a per-fd-registered summary of
26// events. Note that enabling this code vastly slows
27// down operations, and uses substantially more
28// memory. For these reasons, it should only be enabled by developers doing
29// development at their workstations.
30//
31// A structure called 'EventRecorder' will exist when
32// the macro is defined. See the EventRecorder class interface
33// within the SimpleEpollServer class for more details.
34#ifdef EPOLL_SERVER_EVENT_TRACING
35#include <ostream>
36#endif
37
38#include <sys/epoll.h>
39
40#include "net/third_party/quiche/src/epoll_server/platform/api/epoll_logging.h"
41
42namespace epoll_server {
43
44class SimpleEpollServer;
45class EpollAlarmCallbackInterface;
46class ReadPipeCallback;
47
48struct EpollEvent {
49 EpollEvent(int events) : in_events(events), out_ready_mask(0) {}
50
51 int in_events; // incoming events
52 int out_ready_mask; // the new event mask for ready list (0 means don't
53 // get on the ready list). This field is always
54 // initialized to 0 when the event is passed to
55 // OnEvent.
56};
57
58// Callbacks which go into SimpleEpollServers are expected to derive from this
59// class.
60class EpollCallbackInterface {
61 public:
62 // Summary:
63 // Called when the callback is registered into a SimpleEpollServer.
64 // Args:
65 // eps - the poll server into which this callback was registered
66 // fd - the file descriptor which was registered
67 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
68 // which was registered (and will initially be used
69 // in the epoll() calls)
70 virtual void OnRegistration(SimpleEpollServer* eps, int fd,
71 int event_mask) = 0;
72
73 // Summary:
74 // Called when the event_mask is modified (for a file-descriptor)
75 // Args:
76 // fd - the file descriptor which was registered
77 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
78 // which was is now curren (and will be used
79 // in subsequent epoll() calls)
80 virtual void OnModification(int fd, int event_mask) = 0;
81
82 // Summary:
83 // Called whenever an event occurs on the file-descriptor.
84 // This is where the bulk of processing is expected to occur.
85 // Args:
86 // fd - the file descriptor which was registered
87 // event - a struct that contains the event mask (composed of EPOLLIN,
88 // EPOLLOUT, etc), a flag that indicates whether this is a true
89 // epoll_wait event vs one from the ready list, and an output
90 // parameter for OnEvent to inform the SimpleEpollServer whether to
91 // put this fd on the ready list.
92 virtual void OnEvent(int fd, EpollEvent* event) = 0;
93
94 // Summary:
95 // Called when the file-descriptor is unregistered from the poll-server.
96 // Args:
97 // fd - the file descriptor which was registered, and of this call, is now
98 // unregistered.
99 // replaced - If true, this callback is being replaced by another, otherwise
100 // it is simply being removed.
101 virtual void OnUnregistration(int fd, bool replaced) = 0;
102
103 // Summary:
104 // Called when the epoll server is shutting down. This is different from
105 // OnUnregistration because the subclass may want to clean up memory.
106 // This is called in leiu of OnUnregistration.
107 // Args:
108 // fd - the file descriptor which was registered.
109 virtual void OnShutdown(SimpleEpollServer* eps, int fd) = 0;
110
111 // Summary:
112 // Returns a name describing the class for use in debug/error reporting.
113 virtual std::string Name() const = 0;
114
115 virtual ~EpollCallbackInterface() {}
116
117 protected:
118 EpollCallbackInterface() {}
119};
120
121////////////////////////////////////////////////////////////////////////////////
122////////////////////////////////////////////////////////////////////////////////
123
124class SimpleEpollServer {
125 public:
126 typedef EpollAlarmCallbackInterface AlarmCB;
127 typedef EpollCallbackInterface CB;
128
129 typedef std::multimap<int64_t, AlarmCB*> TimeToAlarmCBMap;
130 typedef TimeToAlarmCBMap::iterator AlarmRegToken;
131
132 // Summary:
133 // Constructor:
134 // By default, we don't wait any amount of time for events, and
135 // we suggest to the epoll-system that we're going to use on-the-order
136 // of 1024 FDs.
137 SimpleEpollServer();
138
139 SimpleEpollServer(const SimpleEpollServer&) = delete;
140 SimpleEpollServer operator=(const SimpleEpollServer&) = delete;
141
142 ////////////////////////////////////////
143
144 // Destructor
145 virtual ~SimpleEpollServer();
146
147 ////////////////////////////////////////
148
149 // Summary
150 // Register a callback to be called whenever an event contained
151 // in the set of events included in event_mask occurs on the
152 // file-descriptor 'fd'
153 //
154 // Note that only one callback is allowed to be registered for
155 // any specific file-decriptor.
156 //
157 // If a callback is registered for a file-descriptor which has already
158 // been registered, then the previous callback is unregistered with
159 // the 'replaced' flag set to true. I.e. the previous callback's
160 // OnUnregistration() function is called like so:
161 // OnUnregistration(fd, true);
162 //
163 // The epoll server does NOT take on ownership of the callback: the callback
164 // creator is responsible for managing that memory.
165 //
166 // Args:
167 // fd - a valid file-descriptor
168 // cb - an instance of a subclass of EpollCallbackInterface
169 // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
170 // the events for which the callback would like to be
171 // called.
172 virtual void RegisterFD(int fd, CB* cb, int event_mask);
173
174 ////////////////////////////////////////
175
176 // Summary:
177 // A shortcut for RegisterFD which sets things up such that the
178 // callback is called when 'fd' is available for writing.
179 // Args:
180 // fd - a valid file-descriptor
181 // cb - an instance of a subclass of EpollCallbackInterface
182 virtual void RegisterFDForWrite(int fd, CB* cb);
183
184 ////////////////////////////////////////
185
186 // Summary:
187 // A shortcut for RegisterFD which sets things up such that the
188 // callback is called when 'fd' is available for reading or writing.
189 // Args:
190 // fd - a valid file-descriptor
191 // cb - an instance of a subclass of EpollCallbackInterface
192 virtual void RegisterFDForReadWrite(int fd, CB* cb);
193
194 ////////////////////////////////////////
195
196 // Summary:
197 // A shortcut for RegisterFD which sets things up such that the
198 // callback is called when 'fd' is available for reading.
199 // Args:
200 // fd - a valid file-descriptor
201 // cb - an instance of a subclass of EpollCallbackInterface
202 virtual void RegisterFDForRead(int fd, CB* cb);
203
204 ////////////////////////////////////////
205
206 // Summary:
207 // Removes the FD and the associated callback from the pollserver.
208 // If the callback is registered with other FDs, they will continue
209 // to be processed using the callback without modification.
210 // If the file-descriptor specified is not registered in the
211 // epoll_server, then nothing happens as a result of this call.
212 // Args:
213 // fd - the file-descriptor which should no-longer be monitored.
214 virtual void UnregisterFD(int fd);
215
216 ////////////////////////////////////////
217
218 // Summary:
219 // Modifies the event mask for the file-descriptor, replacing
220 // the old event_mask with the new one specified here.
221 // If the file-descriptor specified is not registered in the
222 // epoll_server, then nothing happens as a result of this call.
223 // Args:
224 // fd - the fd whose event mask should be modified.
225 // event_mask - the new event mask.
226 virtual void ModifyCallback(int fd, int event_mask);
227
228 ////////////////////////////////////////
229
230 // Summary:
231 // Modifies the event mask for the file-descriptor such that we
232 // no longer request events when 'fd' is readable.
233 // If the file-descriptor specified is not registered in the
234 // epoll_server, then nothing happens as a result of this call.
235 // Args:
236 // fd - the fd whose event mask should be modified.
237 virtual void StopRead(int fd);
238
239 ////////////////////////////////////////
240
241 // Summary:
242 // Modifies the event mask for the file-descriptor such that we
243 // request events when 'fd' is readable.
244 // If the file-descriptor specified is not registered in the
245 // epoll_server, then nothing happens as a result of this call.
246 // Args:
247 // fd - the fd whose event mask should be modified.
248 virtual void StartRead(int fd);
249
250 ////////////////////////////////////////
251
252 // Summary:
253 // Modifies the event mask for the file-descriptor such that we
254 // no longer request events when 'fd' is writable.
255 // If the file-descriptor specified is not registered in the
256 // epoll_server, then nothing happens as a result of this call.
257 // Args:
258 // fd - the fd whose event mask should be modified.
259 virtual void StopWrite(int fd);
260
261 ////////////////////////////////////////
262
263 // Summary:
264 // Modifies the event mask for the file-descriptor such that we
265 // request events when 'fd' is writable.
266 // If the file-descriptor specified is not registered in the
267 // epoll_server, then nothing happens as a result of this call.
268 // Args:
269 // fd - the fd whose event mask should be modified.
270 virtual void StartWrite(int fd);
271
272 ////////////////////////////////////////
273
274 // Summary:
275 // Looks up the callback associated with the file-descriptor 'fd'.
276 // If a callback is associated with this file-descriptor, then
277 // it's OnEvent() method is called with the file-descriptor 'fd',
278 // and event_mask 'event_mask'
279 //
280 // If no callback is registered for this file-descriptor, nothing
281 // will happen as a result of this call.
282 //
283 // This function is used internally by the SimpleEpollServer, but is
284 // available publicly so that events might be 'faked'. Calling
285 // this function with an fd and event_mask is equivalent (as far
286 // as the callback is concerned) to having a real event generated
287 // by epoll (except, of course, that read(), etc won't necessarily
288 // be able to read anything)
289 // Args:
290 // fd - the file-descriptor on which an event has occurred.
291 // event_mask - a bitmask representing the events which have occurred
292 // on/for this fd. This bitmask is composed of
293 // POLLIN, POLLOUT, etc.
294 //
295 void HandleEvent(int fd, int event_mask);
296
297 // Summary:
298 // Call this when you want the pollserver to
299 // wait for events and execute the callbacks associated with
300 // the file-descriptors on which those events have occurred.
301 // Depending on the value of timeout_in_us_, this may or may
302 // not return immediately. Please reference the set_timeout()
303 // function for the specific behaviour.
304 virtual void WaitForEventsAndExecuteCallbacks();
305
306 // Summary:
307 // When an fd is registered to use edge trigger notification, the ready
308 // list can be used to simulate level trigger semantics. Edge trigger
309 // registration doesn't send an initial event, and only rising edge (going
310 // from blocked to unblocked) events are sent. A callback can put itself on
311 // the ready list by calling SetFDReady() after calling RegisterFD(). The
312 // OnEvent method of all callbacks associated with the fds on the ready
313 // list will be called immediately after processing the events returned by
314 // epoll_wait(). The fd is removed from the ready list before the
315 // callback's OnEvent() method is invoked. To stay on the ready list, the
316 // OnEvent() (or some function in that call chain) must call SetFDReady
317 // again. When a fd is unregistered using UnregisterFD(), the fd is
318 // automatically removed from the ready list.
319 //
320 // When the callback for a edge triggered fd hits the falling edge (about
321 // to block, either because of it got an EAGAIN, or had a short read/write
322 // operation), it should remove itself from the ready list using
323 // SetFDNotReady() (since OnEvent cannot distinguish between invocation
324 // from the ready list vs from a normal epoll event). All four ready list
325 // methods are safe to be called within the context of the callbacks.
326 //
327 // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
328 // that are registered with the SimpleEpollServer will be put on the ready
329 // list. SetFDReady() and SetFDNotReady() will do nothing if the
330 // SimpleEpollServer doesn't know about the fd passed in.
331 //
332 // Since the ready list cannot reliably determine proper set of events
333 // which should be sent to the callback, SetFDReady() requests the caller
334 // to provide the ready list with the event mask, which will be used later
335 // when OnEvent() is invoked by the ready list. Hence, the event_mask
336 // passedto SetFDReady() does not affect the actual epoll registration of
337 // the fd with the kernel. If a fd is already put on the ready list, and
338 // SetFDReady() is called again for that fd with a different event_mask,
339 // the event_mask will be updated.
340 virtual void SetFDReady(int fd, int events_to_fake);
341
342 virtual void SetFDNotReady(int fd);
343
344 // Summary:
345 // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
346 // debugging tools and for writing unit tests.
347 // ISFDReady() returns whether a fd is in the ready list.
348 // ReadyListSize() returns the number of fds on the ready list.
349 // VerifyReadyList() checks the consistency of internal data structure. It
350 // will CHECK if it finds an error.
351 virtual bool IsFDReady(int fd) const;
352
353 size_t ReadyListSize() const { return ready_list_size_; }
354
355 void VerifyReadyList() const;
356
357 ////////////////////////////////////////
358
359 // Summary:
360 // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
361 // If the callback returns a positive number from its OnAlarm() function,
362 // then the callback will be re-registered at that time, else the alarm
363 // owner is responsible for freeing up memory.
364 //
365 // Important: A give AlarmCB* can not be registered again if it is already
366 // registered. If a user wants to register a callback again it should first
367 // unregister the previous callback before calling RegisterAlarm again.
368 // Args:
369 // timeout_time_in_us - the absolute time at which the alarm should go off
370 // ac - the alarm which will be called.
371 virtual void RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac);
372
373 // Summary:
374 // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
375 // delta_in_us). While this is somewhat less accurate (see the description
376 // for ApproximateNowInUs() to see how 'approximate'), the error is never
377 // worse than the amount of time it takes to process all events in one
378 // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a
379 // positive number from its OnAlarm() function, then the callback will be
380 // re-registered at that time, else the alarm owner is responsible for
381 // freeing up memory.
382 // Note that this function is purely a convienence. The
383 // same thing may be accomplished by using RegisterAlarm with
384 // ApproximateNowInUs() directly.
385 //
386 // Important: A give AlarmCB* can not be registered again if it is already
387 // registered. If a user wants to register a callback again it should first
388 // unregister the previous callback before calling RegisterAlarm again.
389 // Args:
390 // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
391 // which point the alarm should go off.
392 // ac - the alarm which will be called.
393 void RegisterAlarmApproximateDelta(int64_t delta_in_us, AlarmCB* ac) {
394 RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
395 }
396
397 ////////////////////////////////////////
398
399 // Summary:
400 // Unregister the alarm referred to by iterator_token; Callers should
401 // be warned that a token may have become already invalid when OnAlarm()
402 // is called, was unregistered, or OnShutdown was called on that alarm.
403 // Args:
404 // iterator_token - iterator to the alarm callback to unregister.
405 virtual void UnregisterAlarm(
406 const SimpleEpollServer::AlarmRegToken& iterator_token);
407
408 virtual SimpleEpollServer::AlarmRegToken ReregisterAlarm(
409 SimpleEpollServer::AlarmRegToken iterator_token,
410 int64_t timeout_time_in_us);
411
412 ////////////////////////////////////////
413
414 // Summary:
415 // returns the number of file-descriptors registered in this
416 // SimpleEpollServer.
417 // Returns:
418 // number of FDs registered (discounting the internal pipe used for Wake)
419 virtual int NumFDsRegistered() const;
420
421 // Summary:
422 // Force the epoll server to wake up (by writing to an internal pipe).
423 virtual void Wake();
424
425 // Summary:
426 // Wrapper around WallTimer's NowInUsec. We do this so that we can test
427 // SimpleEpollServer without using the system clock (and can avoid the
428 // flakiness that would ensue)
429 // Returns:
430 // the current time as number of microseconds since the Unix epoch.
431 virtual int64_t NowInUsec() const;
432
433 // Summary:
434 // Since calling NowInUsec() many thousands of times per
435 // WaitForEventsAndExecuteCallbacks function call is, to say the least,
436 // inefficient, we allow users to use an approximate time instead. The
437 // time returned from this function is as accurate as NowInUsec() when
438 // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
439 // callstack.
440 // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
441 // this function returns the time at which the
442 // WaitForEventsAndExecuteCallbacks function started to process events or
443 // alarms.
444 //
445 // Essentially, this function makes available a fast and mostly accurate
446 // mechanism for getting the time for any function handling an event or
447 // alarm. When functions which are not handling callbacks or alarms call
448 // this function, they get the slow and "absolutely" accurate time.
449 //
450 // Users should be encouraged to use this function.
451 // Returns:
452 // the "approximate" current time as number of microseconds since the Unix
453 // epoch.
454 virtual int64_t ApproximateNowInUsec() const;
455
456 static std::string EventMaskToString(int event_mask);
457
458 // Summary:
459 // Logs the state of the epoll server with EPOLL_LOG(ERROR).
460 void LogStateOnCrash();
461
462 // Summary:
463 // Set the timeout to the value specified.
464 // If the timeout is set to a negative number,
465 // WaitForEventsAndExecuteCallbacks() will only return when an event has
466 // occurred
467 // If the timeout is set to zero,
468 // WaitForEventsAndExecuteCallbacks() will return immediately
469 // If the timeout is set to a positive number,
470 // WaitForEventsAndExecuteCallbacks() will return when an event has
471 // occurred, or when timeout_in_us microseconds has elapsed, whichever
472 // is first.
473 // Args:
474 // timeout_in_us - value specified depending on behaviour desired.
475 // See above.
476 void set_timeout_in_us(int64_t timeout_in_us) {
477 timeout_in_us_ = timeout_in_us;
478 }
479
480 ////////////////////////////////////////
481
482 // Summary:
483 // Accessor for the current value of timeout_in_us.
484 int timeout_in_us_for_test() const { return timeout_in_us_; }
485
486 // Summary:
487 // Returns true when the SimpleEpollServer() is being destroyed.
488 bool in_shutdown() const { return in_shutdown_; }
489
490 // Compatibility stub.
491 void Shutdown() {}
492
493 // Summary:
494 // A function for implementing the ready list. It invokes OnEvent for each
495 // of the fd in the ready list, and takes care of adding them back to the
496 // ready list if the callback requests it (by checking that out_ready_mask
497 // is non-zero).
498 void CallReadyListCallbacks();
499
500 int64_t LastDelayInUsec() const { return last_delay_in_usec_; }
501
502 protected:
503 virtual void SetNonblocking(int fd);
504
505 // This exists here so that we can override this function in unittests
506 // in order to make effective mock SimpleEpollServer objects.
507 virtual int epoll_wait_impl(int epfd, struct epoll_event* events,
508 int max_events, int timeout_in_ms);
509
510 // this struct is used internally, and is never used by anything external
511 // to this class. Some of its members are declared mutable to get around the
512 // restriction imposed by hash_set. Since hash_set knows nothing about the
513 // objects it stores, it has to assume that every bit of the object is used
514 // in the hash function and equal_to comparison. Thus hash_set::iterator is a
515 // const iterator. In this case, the only thing that must stay constant is
516 // fd. Everything else are just along for the ride and changing them doesn't
517 // compromise the hash_set integrity.
518 struct CBAndEventMask {
519 CBAndEventMask()
520 : cb(NULL),
521 fd(-1),
522 event_mask(0),
523 events_asserted(0),
524 events_to_fake(0),
525 in_use(false) {
526 entry.le_next = NULL;
527 entry.le_prev = NULL;
528 }
529
530 CBAndEventMask(EpollCallbackInterface* cb, int event_mask, int fd)
531 : cb(cb),
532 fd(fd),
533 event_mask(event_mask),
534 events_asserted(0),
535 events_to_fake(0),
536 in_use(false) {
537 entry.le_next = NULL;
538 entry.le_prev = NULL;
539 }
540
541 // Required operator for hash_set. Normally operator== should be a free
542 // standing function. However, since CBAndEventMask is a protected type and
543 // it will never be a base class, it makes no difference.
544 bool operator==(const CBAndEventMask& cb_and_mask) const {
545 return fd == cb_and_mask.fd;
546 }
547 // A callback. If the fd is unregistered inside the callchain of OnEvent,
548 // the cb will be set to NULL.
549 mutable EpollCallbackInterface* cb;
550
551 mutable LIST_ENTRY(CBAndEventMask) entry;
552 // file descriptor registered with the epoll server.
553 int fd;
554 // the current event_mask registered for this callback.
555 mutable int event_mask;
556 // the event_mask that was returned by epoll
557 mutable int events_asserted;
558 // the event_mask for the ready list to use to call OnEvent.
559 mutable int events_to_fake;
560 // toggle around calls to OnEvent to tell UnregisterFD to not erase the
561 // iterator because HandleEvent is using it.
562 mutable bool in_use;
563 };
564
565 // Custom hash function to be used by hash_set.
566 struct CBAndEventMaskHash {
567 size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
568 return static_cast<size_t>(cb_and_eventmask.fd);
569 }
570 };
571
572 using FDToCBMap = std::unordered_set<CBAndEventMask, CBAndEventMaskHash>;
573
574 // the following four functions are OS-specific, and are likely
575 // to be changed in a subclass if the poll/select method is changed
576 // from epoll.
577
578 // Summary:
579 // Deletes a file-descriptor from the set of FDs that should be
580 // monitored with epoll.
581 // Note that this only deals with modifying data relating -directly-
582 // with the epoll call-- it does not modify any data within the
583 // epoll_server.
584 // Args:
585 // fd - the file descriptor to-be-removed from the monitoring set
586 virtual void DelFD(int fd) const;
587
588 ////////////////////////////////////////
589
590 // Summary:
591 // Adds a file-descriptor to the set of FDs that should be
592 // monitored with epoll.
593 // Note that this only deals with modifying data relating -directly-
594 // with the epoll call.
595 // Args:
596 // fd - the file descriptor to-be-added to the monitoring set
597 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
598 // OR'd together) which will be associated with this
599 // FD initially.
600 virtual void AddFD(int fd, int event_mask) const;
601
602 ////////////////////////////////////////
603
604 // Summary:
605 // Modifies a file-descriptor in the set of FDs that should be
606 // monitored with epoll.
607 // Note that this only deals with modifying data relating -directly-
608 // with the epoll call.
609 // Args:
610 // fd - the file descriptor to-be-added to the monitoring set
611 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
612 // OR'd together) which will be associated with this
613 // FD after this call.
614 virtual void ModFD(int fd, int event_mask) const;
615
616 ////////////////////////////////////////
617
618 // Summary:
619 // Modified the event mask associated with an FD in the set of
620 // data needed by epoll.
621 // Events are removed before they are added, thus, if ~0 is put
622 // in 'remove_event', whatever is put in 'add_event' will be
623 // the new event mask.
624 // If the file-descriptor specified is not registered in the
625 // epoll_server, then nothing happens as a result of this call.
626 // Args:
627 // fd - the file descriptor whose event mask is to be modified
628 // remove_event - the events which are to be removed from the current
629 // event_mask
630 // add_event - the events which are to be added to the current event_mask
631 //
632 //
633 virtual void ModifyFD(int fd, int remove_event, int add_event);
634
635 ////////////////////////////////////////
636
637 // Summary:
638 // Waits for events, and calls HandleEvents() for each
639 // fd, event pair discovered to possibly have an event.
640 // Note that a callback (B) may get a spurious event if
641 // another callback (A) has closed a file-descriptor N, and
642 // the callback (B) has a newly opened file-descriptor, which
643 // also happens to be N.
644 virtual void WaitForEventsAndCallHandleEvents(int64_t timeout_in_us,
645 struct epoll_event events[],
646 int events_size);
647
648 // Summary:
649 // An internal function for implementing the ready list. It adds a fd's
650 // CBAndEventMask to the ready list. If the fd is already on the ready
651 // list, it is a no-op.
652 void AddToReadyList(CBAndEventMask* cb_and_mask);
653
654 // Summary:
655 // An internal function for implementing the ready list. It remove a fd's
656 // CBAndEventMask from the ready list. If the fd is not on the ready list,
657 // it is a no-op.
658 void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
659
660 // Summary:
661 // Calls any pending alarms that should go off and reregisters them if they
662 // were recurring.
663 virtual void CallAndReregisterAlarmEvents();
664
665 // The file-descriptor created for epolling
666 int epoll_fd_;
667
668 // The mapping of file-descriptor to CBAndEventMasks
669 FDToCBMap cb_map_;
670
671 // Custom hash function to be used by hash_set.
672 struct AlarmCBHash {
673 size_t operator()(AlarmCB* const& p) const {
674 return reinterpret_cast<size_t>(p);
675 }
676 };
677
678 // TODO(sushantj): Having this hash_set is avoidable. We currently have it
679 // only so that we can enforce stringent checks that a caller can not register
680 // the same alarm twice. One option is to have an implementation in which
681 // this hash_set is used only in the debug mode.
682 using AlarmCBMap = std::unordered_set<AlarmCB*, AlarmCBHash>;
683 AlarmCBMap all_alarms_;
684
685 TimeToAlarmCBMap alarm_map_;
686
687 // The amount of time in microseconds that we'll wait before returning
688 // from the WaitForEventsAndExecuteCallbacks() function.
689 // If this is positive, wait that many microseconds.
690 // If this is negative, wait forever, or for the first event that occurs
691 // If this is zero, never wait for an event.
692 int64_t timeout_in_us_;
693
694 // This is nonzero only after the invocation of epoll_wait_impl within
695 // WaitForEventsAndCallHandleEvents and before the function
696 // WaitForEventsAndExecuteCallbacks returns. At all other times, this is
697 // zero. This enables us to have relatively accurate time returned from the
698 // ApproximateNowInUs() function. See that function for more details.
699 int64_t recorded_now_in_us_;
700
701 // This is used to implement CallAndReregisterAlarmEvents. This stores
702 // all alarms that were reregistered because OnAlarm() returned a
703 // value > 0 and the time at which they should be executed is less that
704 // the current time. By storing such alarms in this map we ensure
705 // that while calling CallAndReregisterAlarmEvents we do not call
706 // OnAlarm on any alarm in this set. This ensures that we do not
707 // go in an infinite loop.
708 AlarmCBMap alarms_reregistered_and_should_be_skipped_;
709
710 LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
711 LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
712 int ready_list_size_;
713 // TODO(alyssar): make this into something that scales up.
714 static const int events_size_ = 256;
715 struct epoll_event events_[256];
716
717#ifdef EPOLL_SERVER_EVENT_TRACING
718 struct EventRecorder {
719 public:
720 EventRecorder() : num_records_(0), record_threshold_(10000) {}
721
722 ~EventRecorder() { Clear(); }
723
724 // When a number of events equals the record threshold,
725 // the collected data summary for all FDs will be written
726 // to EPOLL_LOG(INFO). Note that this does not include the
727 // individual events (if you'reinterested in those, you'll
728 // have to get at them programmatically).
729 // After any such flushing to EPOLL_LOG(INFO) all events will
730 // be cleared.
731 // Note that the definition of an 'event' is a bit 'hazy',
732 // as it includes the 'Unregistration' event, and perhaps
733 // others.
734 void set_record_threshold(int64_t new_threshold) {
735 record_threshold_ = new_threshold;
736 }
737
738 void Clear() {
739 for (int i = 0; i < debug_events_.size(); ++i) {
740 delete debug_events_[i];
741 }
742 debug_events_.clear();
743 unregistered_fds_.clear();
744 event_counts_.clear();
745 }
746
747 void MaybeRecordAndClear() {
748 ++num_records_;
749 if ((num_records_ > record_threshold_) && (record_threshold_ > 0)) {
750 EPOLL_LOG(INFO) << "\n" << *this;
751 num_records_ = 0;
752 Clear();
753 }
754 }
755
756 void RecordFDMaskEvent(int fd, int mask, const char* function) {
757 FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
758 debug_events_.push_back(fdmo);
759 MaybeRecordAndClear();
760 }
761
762 void RecordEpollWaitEvent(int timeout_in_ms, int num_events_generated) {
763 EpollWaitOutput* ewo =
764 new EpollWaitOutput(timeout_in_ms, num_events_generated);
765 debug_events_.push_back(ewo);
766 MaybeRecordAndClear();
767 }
768
769 void RecordEpollEvent(int fd, int event_mask) {
770 Events& events_for_fd = event_counts_[fd];
771 events_for_fd.AssignFromMask(event_mask);
772 MaybeRecordAndClear();
773 }
774
775 friend ostream& operator<<(ostream& os, const EventRecorder& er) {
776 for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
777 os << "fd: " << er.unregistered_fds_[i] << "\n";
778 os << er.unregistered_fds_[i];
779 }
780 for (EventCountsMap::const_iterator i = er.event_counts_.begin();
781 i != er.event_counts_.end(); ++i) {
782 os << "fd: " << i->first << "\n";
783 os << i->second;
784 }
785 for (int i = 0; i < er.debug_events_.size(); ++i) {
786 os << *(er.debug_events_[i]) << "\n";
787 }
788 return os;
789 }
790
791 void RecordUnregistration(int fd) {
792 EventCountsMap::iterator i = event_counts_.find(fd);
793 if (i != event_counts_.end()) {
794 unregistered_fds_.push_back(i->second);
795 event_counts_.erase(i);
796 }
797 MaybeRecordAndClear();
798 }
799
800 protected:
801 class DebugOutput {
802 public:
803 friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
804 debug_output.OutputToStream(os);
805 return os;
806 }
807 virtual void OutputToStream(ostream* os) const = 0;
808 virtual ~DebugOutput() {}
809 };
810
811 class FDMaskOutput : public DebugOutput {
812 public:
813 FDMaskOutput(int fd, int mask, const char* function)
814 : fd_(fd), mask_(mask), function_(function) {}
815 virtual void OutputToStream(ostream* os) const {
816 (*os) << "func: " << function_ << "\tfd: " << fd_;
817 if (mask_ != 0) {
818 (*os) << "\tmask: " << EventMaskToString(mask_);
819 }
820 }
821 int fd_;
822 int mask_;
823 const char* function_;
824 };
825
826 class EpollWaitOutput : public DebugOutput {
827 public:
828 EpollWaitOutput(int timeout_in_ms, int num_events_generated)
829 : timeout_in_ms_(timeout_in_ms),
830 num_events_generated_(num_events_generated) {}
831 virtual void OutputToStream(ostream* os) const {
832 (*os) << "timeout_in_ms: " << timeout_in_ms_
833 << "\tnum_events_generated: " << num_events_generated_;
834 }
835
836 protected:
837 int timeout_in_ms_;
838 int num_events_generated_;
839 };
840
841 struct Events {
842 Events()
843 : epoll_in(0),
844 epoll_pri(0),
845 epoll_out(0),
846 epoll_rdnorm(0),
847 epoll_rdband(0),
848 epoll_wrnorm(0),
849 epoll_wrband(0),
850 epoll_msg(0),
851 epoll_err(0),
852 epoll_hup(0),
853 epoll_oneshot(0),
854 epoll_et(0) {}
855
856 void AssignFromMask(int event_mask) {
857 if (event_mask & EPOLLIN) ++epoll_in;
858 if (event_mask & EPOLLPRI) ++epoll_pri;
859 if (event_mask & EPOLLOUT) ++epoll_out;
860 if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
861 if (event_mask & EPOLLRDBAND) ++epoll_rdband;
862 if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
863 if (event_mask & EPOLLWRBAND) ++epoll_wrband;
864 if (event_mask & EPOLLMSG) ++epoll_msg;
865 if (event_mask & EPOLLERR) ++epoll_err;
866 if (event_mask & EPOLLHUP) ++epoll_hup;
867 if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
868 if (event_mask & EPOLLET) ++epoll_et;
869 }
870
871 friend ostream& operator<<(ostream& os, const Events& ev) {
872 if (ev.epoll_in) {
873 os << "\t EPOLLIN: " << ev.epoll_in << "\n";
874 }
875 if (ev.epoll_pri) {
876 os << "\t EPOLLPRI: " << ev.epoll_pri << "\n";
877 }
878 if (ev.epoll_out) {
879 os << "\t EPOLLOUT: " << ev.epoll_out << "\n";
880 }
881 if (ev.epoll_rdnorm) {
882 os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
883 }
884 if (ev.epoll_rdband) {
885 os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n";
886 }
887 if (ev.epoll_wrnorm) {
888 os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
889 }
890 if (ev.epoll_wrband) {
891 os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n";
892 }
893 if (ev.epoll_msg) {
894 os << "\t EPOLLMSG: " << ev.epoll_msg << "\n";
895 }
896 if (ev.epoll_err) {
897 os << "\t EPOLLERR: " << ev.epoll_err << "\n";
898 }
899 if (ev.epoll_hup) {
900 os << "\t EPOLLHUP: " << ev.epoll_hup << "\n";
901 }
902 if (ev.epoll_oneshot) {
903 os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
904 }
905 if (ev.epoll_et) {
906 os << "\t EPOLLET: " << ev.epoll_et << "\n";
907 }
908 return os;
909 }
910
911 unsigned int epoll_in;
912 unsigned int epoll_pri;
913 unsigned int epoll_out;
914 unsigned int epoll_rdnorm;
915 unsigned int epoll_rdband;
916 unsigned int epoll_wrnorm;
917 unsigned int epoll_wrband;
918 unsigned int epoll_msg;
919 unsigned int epoll_err;
920 unsigned int epoll_hup;
921 unsigned int epoll_oneshot;
922 unsigned int epoll_et;
923 };
924
925 std::vector<DebugOutput*> debug_events_;
926 std::vector<Events> unregistered_fds_;
927 using EventCountsMap = std::unordered_map<int, Events>;
928 EventCountsMap event_counts_;
929 int64_t num_records_;
930 int64_t record_threshold_;
931 };
932
933 void ClearEventRecords() { event_recorder_.Clear(); }
934 void WriteEventRecords(ostream* os) const { (*os) << event_recorder_; }
935
936 mutable EventRecorder event_recorder_;
937
938#endif
939
940 private:
941 // Helper functions used in the destructor.
942 void CleanupFDToCBMap();
943 void CleanupTimeToAlarmCBMap();
944
945 // The callback registered to the fds below. As the purpose of their
946 // registration is to wake the epoll server it just clears the pipe and
947 // returns.
948 std::unique_ptr<ReadPipeCallback> wake_cb_;
949
950 // A pipe owned by the epoll server. The server will be registered to listen
951 // on read_fd_ and can be woken by Wake() which writes to write_fd_.
952 int read_fd_;
953 int write_fd_;
954
955 // This boolean is checked to see if it is false at the top of the
956 // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
957 // without doing work, and logs to ERROR, or aborts the program (in
958 // DEBUG mode). If so, then it sets the bool to true, does work, and
959 // sets it back to false when done. This catches unwanted recursion.
960 bool in_wait_for_events_and_execute_callbacks_;
961
962 // Returns true when the SimpleEpollServer() is being destroyed.
963 bool in_shutdown_;
964 int64_t last_delay_in_usec_;
965};
966
967class EpollAlarmCallbackInterface {
968 public:
969 // Summary:
970 // Called when an alarm times out. Invalidates an AlarmRegToken.
971 // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
972 // delete it, as the reference is no longer valid.
973 // Returns:
974 // the unix time (in microseconds) at which this alarm should be signaled
975 // again, or 0 if the alarm should be removed.
976 virtual int64_t OnAlarm() = 0;
977
978 // Summary:
979 // Called when the an alarm is registered. Invalidates an AlarmRegToken.
980 // Args:
981 // token: the iterator to the alarm registered in the alarm map.
982 // WARNING: this token becomes invalid when the alarm fires, is
983 // unregistered, or OnShutdown is called on that alarm.
984 // eps: the epoll server the alarm is registered with.
985 virtual void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
986 SimpleEpollServer* eps) = 0;
987
988 // Summary:
989 // Called when the an alarm is unregistered.
990 // WARNING: It is not valid to unregister a callback and then use the token
991 // that was saved to refer to the callback.
992 virtual void OnUnregistration() = 0;
993
994 // Summary:
995 // Called when the epoll server is shutting down.
996 // Invalidates the AlarmRegToken that was given when this alarm was
997 // registered.
998 virtual void OnShutdown(SimpleEpollServer* eps) = 0;
999
1000 virtual ~EpollAlarmCallbackInterface() {}
1001
1002 protected:
1003 EpollAlarmCallbackInterface() {}
1004};
1005
1006// A simple alarm which unregisters itself on destruction.
1007//
1008// PLEASE NOTE:
1009// Any classes overriding these functions must either call the implementation
1010// of the parent class, or is must otherwise make sure that the 'registered_'
1011// boolean and the token, 'token_', are updated appropriately.
1012class EpollAlarm : public EpollAlarmCallbackInterface {
1013 public:
1014 EpollAlarm();
1015
1016 ~EpollAlarm() override;
1017
1018 // Marks the alarm as unregistered and returns 0. The return value may be
1019 // safely ignored by subclasses.
1020 int64_t OnAlarm() override;
1021
1022 // Marks the alarm as registered, and stores the token.
1023 void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
1024 SimpleEpollServer* eps) override;
1025
1026 // Marks the alarm as unregistered.
1027 void OnUnregistration() override;
1028
1029 // Marks the alarm as unregistered.
1030 void OnShutdown(SimpleEpollServer* eps) override;
1031
1032 // If the alarm was registered, unregister it.
1033 void UnregisterIfRegistered();
1034
1035 // Reregisters the alarm at specified time.
1036 void ReregisterAlarm(int64_t timeout_time_in_us);
1037
1038 bool registered() const { return registered_; }
1039
1040 const SimpleEpollServer* eps() const { return eps_; }
1041
1042 private:
1043 SimpleEpollServer::AlarmRegToken token_;
1044 SimpleEpollServer* eps_;
1045 bool registered_;
1046};
1047
1048} // namespace epoll_server
1049
1050#endif // QUICHE_EPOLL_SERVER_H_