blob: 85b3a08f1bce43c55584e1ef8b9461e7070b99ba [file] [log] [blame]
QUICHE team53f08a32019-04-15 14:47:31 -04001// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef QUICHE_EPOLL_SERVER_H_
6#define QUICHE_EPOLL_SERVER_H_
7
8#include <fcntl.h>
9#include <stddef.h>
10#include <stdint.h>
11#include <sys/queue.h>
12
13#include <map>
14#include <memory>
15#include <string>
16#include <unordered_map>
17#include <unordered_set>
18#include <vector>
19
20// #define EPOLL_SERVER_EVENT_TRACING 1
21//
22// Defining EPOLL_SERVER_EVENT_TRACING
23// causes code to exist which didn't before.
24// This code tracks each event generated by the epollserver,
25// as well as providing a per-fd-registered summary of
26// events. Note that enabling this code vastly slows
27// down operations, and uses substantially more
28// memory. For these reasons, it should only be enabled by developers doing
29// development at their workstations.
30//
31// A structure called 'EventRecorder' will exist when
32// the macro is defined. See the EventRecorder class interface
33// within the SimpleEpollServer class for more details.
34#ifdef EPOLL_SERVER_EVENT_TRACING
35#include <ostream>
36#endif
37
38#include <sys/epoll.h>
39
QUICHE team5be974e2020-12-29 18:35:24 -050040#include "epoll_server/platform/api/epoll_export.h"
41#include "epoll_server/platform/api/epoll_logging.h"
QUICHE team53f08a32019-04-15 14:47:31 -040042
43namespace epoll_server {
44
45class SimpleEpollServer;
46class EpollAlarmCallbackInterface;
47class ReadPipeCallback;
48
49struct EpollEvent {
50 EpollEvent(int events) : in_events(events), out_ready_mask(0) {}
51
52 int in_events; // incoming events
53 int out_ready_mask; // the new event mask for ready list (0 means don't
54 // get on the ready list). This field is always
55 // initialized to 0 when the event is passed to
56 // OnEvent.
57};
58
59// Callbacks which go into SimpleEpollServers are expected to derive from this
60// class.
61class EpollCallbackInterface {
62 public:
63 // Summary:
64 // Called when the callback is registered into a SimpleEpollServer.
65 // Args:
66 // eps - the poll server into which this callback was registered
67 // fd - the file descriptor which was registered
68 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
69 // which was registered (and will initially be used
70 // in the epoll() calls)
71 virtual void OnRegistration(SimpleEpollServer* eps, int fd,
72 int event_mask) = 0;
73
74 // Summary:
75 // Called when the event_mask is modified (for a file-descriptor)
76 // Args:
77 // fd - the file descriptor which was registered
78 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
79 // which was is now curren (and will be used
80 // in subsequent epoll() calls)
81 virtual void OnModification(int fd, int event_mask) = 0;
82
83 // Summary:
84 // Called whenever an event occurs on the file-descriptor.
85 // This is where the bulk of processing is expected to occur.
86 // Args:
87 // fd - the file descriptor which was registered
88 // event - a struct that contains the event mask (composed of EPOLLIN,
89 // EPOLLOUT, etc), a flag that indicates whether this is a true
90 // epoll_wait event vs one from the ready list, and an output
91 // parameter for OnEvent to inform the SimpleEpollServer whether to
92 // put this fd on the ready list.
93 virtual void OnEvent(int fd, EpollEvent* event) = 0;
94
95 // Summary:
96 // Called when the file-descriptor is unregistered from the poll-server.
97 // Args:
98 // fd - the file descriptor which was registered, and of this call, is now
99 // unregistered.
100 // replaced - If true, this callback is being replaced by another, otherwise
101 // it is simply being removed.
102 virtual void OnUnregistration(int fd, bool replaced) = 0;
103
104 // Summary:
105 // Called when the epoll server is shutting down. This is different from
106 // OnUnregistration because the subclass may want to clean up memory.
107 // This is called in leiu of OnUnregistration.
108 // Args:
109 // fd - the file descriptor which was registered.
110 virtual void OnShutdown(SimpleEpollServer* eps, int fd) = 0;
111
112 // Summary:
113 // Returns a name describing the class for use in debug/error reporting.
114 virtual std::string Name() const = 0;
115
116 virtual ~EpollCallbackInterface() {}
117
118 protected:
119 EpollCallbackInterface() {}
120};
121
122////////////////////////////////////////////////////////////////////////////////
123////////////////////////////////////////////////////////////////////////////////
124
danzh02640922019-04-16 14:49:47 -0700125class EPOLL_EXPORT_PRIVATE SimpleEpollServer {
QUICHE team53f08a32019-04-15 14:47:31 -0400126 public:
127 typedef EpollAlarmCallbackInterface AlarmCB;
128 typedef EpollCallbackInterface CB;
129
130 typedef std::multimap<int64_t, AlarmCB*> TimeToAlarmCBMap;
131 typedef TimeToAlarmCBMap::iterator AlarmRegToken;
132
133 // Summary:
134 // Constructor:
135 // By default, we don't wait any amount of time for events, and
136 // we suggest to the epoll-system that we're going to use on-the-order
137 // of 1024 FDs.
138 SimpleEpollServer();
139
140 SimpleEpollServer(const SimpleEpollServer&) = delete;
141 SimpleEpollServer operator=(const SimpleEpollServer&) = delete;
142
143 ////////////////////////////////////////
144
145 // Destructor
146 virtual ~SimpleEpollServer();
147
148 ////////////////////////////////////////
149
150 // Summary
151 // Register a callback to be called whenever an event contained
152 // in the set of events included in event_mask occurs on the
153 // file-descriptor 'fd'
154 //
155 // Note that only one callback is allowed to be registered for
156 // any specific file-decriptor.
157 //
158 // If a callback is registered for a file-descriptor which has already
159 // been registered, then the previous callback is unregistered with
160 // the 'replaced' flag set to true. I.e. the previous callback's
161 // OnUnregistration() function is called like so:
162 // OnUnregistration(fd, true);
163 //
164 // The epoll server does NOT take on ownership of the callback: the callback
165 // creator is responsible for managing that memory.
166 //
167 // Args:
168 // fd - a valid file-descriptor
169 // cb - an instance of a subclass of EpollCallbackInterface
170 // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
171 // the events for which the callback would like to be
172 // called.
173 virtual void RegisterFD(int fd, CB* cb, int event_mask);
174
175 ////////////////////////////////////////
176
177 // Summary:
178 // A shortcut for RegisterFD which sets things up such that the
179 // callback is called when 'fd' is available for writing.
180 // Args:
181 // fd - a valid file-descriptor
182 // cb - an instance of a subclass of EpollCallbackInterface
183 virtual void RegisterFDForWrite(int fd, CB* cb);
184
185 ////////////////////////////////////////
186
187 // Summary:
188 // A shortcut for RegisterFD which sets things up such that the
189 // callback is called when 'fd' is available for reading or writing.
190 // Args:
191 // fd - a valid file-descriptor
192 // cb - an instance of a subclass of EpollCallbackInterface
193 virtual void RegisterFDForReadWrite(int fd, CB* cb);
194
195 ////////////////////////////////////////
196
197 // Summary:
198 // A shortcut for RegisterFD which sets things up such that the
199 // callback is called when 'fd' is available for reading.
200 // Args:
201 // fd - a valid file-descriptor
202 // cb - an instance of a subclass of EpollCallbackInterface
203 virtual void RegisterFDForRead(int fd, CB* cb);
204
205 ////////////////////////////////////////
206
207 // Summary:
208 // Removes the FD and the associated callback from the pollserver.
209 // If the callback is registered with other FDs, they will continue
210 // to be processed using the callback without modification.
211 // If the file-descriptor specified is not registered in the
212 // epoll_server, then nothing happens as a result of this call.
213 // Args:
214 // fd - the file-descriptor which should no-longer be monitored.
215 virtual void UnregisterFD(int fd);
216
217 ////////////////////////////////////////
218
219 // Summary:
220 // Modifies the event mask for the file-descriptor, replacing
221 // the old event_mask with the new one specified here.
222 // If the file-descriptor specified is not registered in the
223 // epoll_server, then nothing happens as a result of this call.
224 // Args:
225 // fd - the fd whose event mask should be modified.
226 // event_mask - the new event mask.
227 virtual void ModifyCallback(int fd, int event_mask);
228
229 ////////////////////////////////////////
230
231 // Summary:
232 // Modifies the event mask for the file-descriptor such that we
233 // no longer request events when 'fd' is readable.
234 // If the file-descriptor specified is not registered in the
235 // epoll_server, then nothing happens as a result of this call.
236 // Args:
237 // fd - the fd whose event mask should be modified.
238 virtual void StopRead(int fd);
239
240 ////////////////////////////////////////
241
242 // Summary:
243 // Modifies the event mask for the file-descriptor such that we
244 // request events when 'fd' is readable.
245 // If the file-descriptor specified is not registered in the
246 // epoll_server, then nothing happens as a result of this call.
247 // Args:
248 // fd - the fd whose event mask should be modified.
249 virtual void StartRead(int fd);
250
251 ////////////////////////////////////////
252
253 // Summary:
254 // Modifies the event mask for the file-descriptor such that we
255 // no longer request events when 'fd' is writable.
256 // If the file-descriptor specified is not registered in the
257 // epoll_server, then nothing happens as a result of this call.
258 // Args:
259 // fd - the fd whose event mask should be modified.
260 virtual void StopWrite(int fd);
261
262 ////////////////////////////////////////
263
264 // Summary:
265 // Modifies the event mask for the file-descriptor such that we
266 // request events when 'fd' is writable.
267 // If the file-descriptor specified is not registered in the
268 // epoll_server, then nothing happens as a result of this call.
269 // Args:
270 // fd - the fd whose event mask should be modified.
271 virtual void StartWrite(int fd);
272
273 ////////////////////////////////////////
274
275 // Summary:
276 // Looks up the callback associated with the file-descriptor 'fd'.
277 // If a callback is associated with this file-descriptor, then
278 // it's OnEvent() method is called with the file-descriptor 'fd',
279 // and event_mask 'event_mask'
280 //
281 // If no callback is registered for this file-descriptor, nothing
282 // will happen as a result of this call.
283 //
284 // This function is used internally by the SimpleEpollServer, but is
285 // available publicly so that events might be 'faked'. Calling
286 // this function with an fd and event_mask is equivalent (as far
287 // as the callback is concerned) to having a real event generated
288 // by epoll (except, of course, that read(), etc won't necessarily
289 // be able to read anything)
290 // Args:
291 // fd - the file-descriptor on which an event has occurred.
292 // event_mask - a bitmask representing the events which have occurred
293 // on/for this fd. This bitmask is composed of
294 // POLLIN, POLLOUT, etc.
295 //
296 void HandleEvent(int fd, int event_mask);
297
298 // Summary:
299 // Call this when you want the pollserver to
300 // wait for events and execute the callbacks associated with
301 // the file-descriptors on which those events have occurred.
302 // Depending on the value of timeout_in_us_, this may or may
303 // not return immediately. Please reference the set_timeout()
304 // function for the specific behaviour.
305 virtual void WaitForEventsAndExecuteCallbacks();
306
307 // Summary:
308 // When an fd is registered to use edge trigger notification, the ready
309 // list can be used to simulate level trigger semantics. Edge trigger
310 // registration doesn't send an initial event, and only rising edge (going
311 // from blocked to unblocked) events are sent. A callback can put itself on
312 // the ready list by calling SetFDReady() after calling RegisterFD(). The
313 // OnEvent method of all callbacks associated with the fds on the ready
314 // list will be called immediately after processing the events returned by
315 // epoll_wait(). The fd is removed from the ready list before the
316 // callback's OnEvent() method is invoked. To stay on the ready list, the
317 // OnEvent() (or some function in that call chain) must call SetFDReady
318 // again. When a fd is unregistered using UnregisterFD(), the fd is
319 // automatically removed from the ready list.
320 //
321 // When the callback for a edge triggered fd hits the falling edge (about
322 // to block, either because of it got an EAGAIN, or had a short read/write
323 // operation), it should remove itself from the ready list using
324 // SetFDNotReady() (since OnEvent cannot distinguish between invocation
325 // from the ready list vs from a normal epoll event). All four ready list
326 // methods are safe to be called within the context of the callbacks.
327 //
328 // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
329 // that are registered with the SimpleEpollServer will be put on the ready
330 // list. SetFDReady() and SetFDNotReady() will do nothing if the
331 // SimpleEpollServer doesn't know about the fd passed in.
332 //
333 // Since the ready list cannot reliably determine proper set of events
334 // which should be sent to the callback, SetFDReady() requests the caller
335 // to provide the ready list with the event mask, which will be used later
336 // when OnEvent() is invoked by the ready list. Hence, the event_mask
337 // passedto SetFDReady() does not affect the actual epoll registration of
338 // the fd with the kernel. If a fd is already put on the ready list, and
339 // SetFDReady() is called again for that fd with a different event_mask,
340 // the event_mask will be updated.
341 virtual void SetFDReady(int fd, int events_to_fake);
342
343 virtual void SetFDNotReady(int fd);
344
345 // Summary:
346 // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
347 // debugging tools and for writing unit tests.
348 // ISFDReady() returns whether a fd is in the ready list.
349 // ReadyListSize() returns the number of fds on the ready list.
350 // VerifyReadyList() checks the consistency of internal data structure. It
351 // will CHECK if it finds an error.
352 virtual bool IsFDReady(int fd) const;
353
354 size_t ReadyListSize() const { return ready_list_size_; }
355
356 void VerifyReadyList() const;
357
358 ////////////////////////////////////////
359
360 // Summary:
361 // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
362 // If the callback returns a positive number from its OnAlarm() function,
363 // then the callback will be re-registered at that time, else the alarm
364 // owner is responsible for freeing up memory.
365 //
366 // Important: A give AlarmCB* can not be registered again if it is already
367 // registered. If a user wants to register a callback again it should first
368 // unregister the previous callback before calling RegisterAlarm again.
369 // Args:
370 // timeout_time_in_us - the absolute time at which the alarm should go off
371 // ac - the alarm which will be called.
372 virtual void RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac);
373
374 // Summary:
375 // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
376 // delta_in_us). While this is somewhat less accurate (see the description
377 // for ApproximateNowInUs() to see how 'approximate'), the error is never
378 // worse than the amount of time it takes to process all events in one
379 // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a
380 // positive number from its OnAlarm() function, then the callback will be
381 // re-registered at that time, else the alarm owner is responsible for
382 // freeing up memory.
383 // Note that this function is purely a convienence. The
384 // same thing may be accomplished by using RegisterAlarm with
385 // ApproximateNowInUs() directly.
386 //
387 // Important: A give AlarmCB* can not be registered again if it is already
388 // registered. If a user wants to register a callback again it should first
389 // unregister the previous callback before calling RegisterAlarm again.
390 // Args:
391 // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
392 // which point the alarm should go off.
393 // ac - the alarm which will be called.
394 void RegisterAlarmApproximateDelta(int64_t delta_in_us, AlarmCB* ac) {
395 RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
396 }
397
398 ////////////////////////////////////////
399
400 // Summary:
401 // Unregister the alarm referred to by iterator_token; Callers should
402 // be warned that a token may have become already invalid when OnAlarm()
403 // is called, was unregistered, or OnShutdown was called on that alarm.
404 // Args:
405 // iterator_token - iterator to the alarm callback to unregister.
406 virtual void UnregisterAlarm(
407 const SimpleEpollServer::AlarmRegToken& iterator_token);
408
409 virtual SimpleEpollServer::AlarmRegToken ReregisterAlarm(
410 SimpleEpollServer::AlarmRegToken iterator_token,
411 int64_t timeout_time_in_us);
412
413 ////////////////////////////////////////
414
415 // Summary:
416 // returns the number of file-descriptors registered in this
417 // SimpleEpollServer.
418 // Returns:
419 // number of FDs registered (discounting the internal pipe used for Wake)
420 virtual int NumFDsRegistered() const;
421
422 // Summary:
423 // Force the epoll server to wake up (by writing to an internal pipe).
424 virtual void Wake();
425
426 // Summary:
427 // Wrapper around WallTimer's NowInUsec. We do this so that we can test
428 // SimpleEpollServer without using the system clock (and can avoid the
429 // flakiness that would ensue)
430 // Returns:
431 // the current time as number of microseconds since the Unix epoch.
432 virtual int64_t NowInUsec() const;
433
434 // Summary:
435 // Since calling NowInUsec() many thousands of times per
436 // WaitForEventsAndExecuteCallbacks function call is, to say the least,
437 // inefficient, we allow users to use an approximate time instead. The
438 // time returned from this function is as accurate as NowInUsec() when
439 // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
440 // callstack.
441 // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
442 // this function returns the time at which the
443 // WaitForEventsAndExecuteCallbacks function started to process events or
444 // alarms.
445 //
446 // Essentially, this function makes available a fast and mostly accurate
447 // mechanism for getting the time for any function handling an event or
448 // alarm. When functions which are not handling callbacks or alarms call
449 // this function, they get the slow and "absolutely" accurate time.
450 //
451 // Users should be encouraged to use this function.
452 // Returns:
453 // the "approximate" current time as number of microseconds since the Unix
454 // epoch.
455 virtual int64_t ApproximateNowInUsec() const;
456
457 static std::string EventMaskToString(int event_mask);
458
459 // Summary:
460 // Logs the state of the epoll server with EPOLL_LOG(ERROR).
461 void LogStateOnCrash();
462
463 // Summary:
464 // Set the timeout to the value specified.
465 // If the timeout is set to a negative number,
466 // WaitForEventsAndExecuteCallbacks() will only return when an event has
467 // occurred
468 // If the timeout is set to zero,
469 // WaitForEventsAndExecuteCallbacks() will return immediately
470 // If the timeout is set to a positive number,
471 // WaitForEventsAndExecuteCallbacks() will return when an event has
472 // occurred, or when timeout_in_us microseconds has elapsed, whichever
473 // is first.
474 // Args:
475 // timeout_in_us - value specified depending on behaviour desired.
476 // See above.
477 void set_timeout_in_us(int64_t timeout_in_us) {
478 timeout_in_us_ = timeout_in_us;
479 }
480
481 ////////////////////////////////////////
482
483 // Summary:
484 // Accessor for the current value of timeout_in_us.
485 int timeout_in_us_for_test() const { return timeout_in_us_; }
486
487 // Summary:
488 // Returns true when the SimpleEpollServer() is being destroyed.
489 bool in_shutdown() const { return in_shutdown_; }
QUICHE team5159c752021-03-31 10:51:05 -0700490 bool ShutdownCalled() const { return in_shutdown(); }
QUICHE team53f08a32019-04-15 14:47:31 -0400491
492 // Compatibility stub.
493 void Shutdown() {}
494
495 // Summary:
496 // A function for implementing the ready list. It invokes OnEvent for each
497 // of the fd in the ready list, and takes care of adding them back to the
498 // ready list if the callback requests it (by checking that out_ready_mask
499 // is non-zero).
500 void CallReadyListCallbacks();
501
502 int64_t LastDelayInUsec() const { return last_delay_in_usec_; }
503
504 protected:
505 virtual void SetNonblocking(int fd);
506
507 // This exists here so that we can override this function in unittests
508 // in order to make effective mock SimpleEpollServer objects.
509 virtual int epoll_wait_impl(int epfd, struct epoll_event* events,
510 int max_events, int timeout_in_ms);
511
512 // this struct is used internally, and is never used by anything external
513 // to this class. Some of its members are declared mutable to get around the
514 // restriction imposed by hash_set. Since hash_set knows nothing about the
515 // objects it stores, it has to assume that every bit of the object is used
516 // in the hash function and equal_to comparison. Thus hash_set::iterator is a
517 // const iterator. In this case, the only thing that must stay constant is
518 // fd. Everything else are just along for the ride and changing them doesn't
519 // compromise the hash_set integrity.
520 struct CBAndEventMask {
521 CBAndEventMask()
522 : cb(NULL),
523 fd(-1),
524 event_mask(0),
525 events_asserted(0),
526 events_to_fake(0),
527 in_use(false) {
528 entry.le_next = NULL;
529 entry.le_prev = NULL;
530 }
531
532 CBAndEventMask(EpollCallbackInterface* cb, int event_mask, int fd)
533 : cb(cb),
534 fd(fd),
535 event_mask(event_mask),
536 events_asserted(0),
537 events_to_fake(0),
538 in_use(false) {
539 entry.le_next = NULL;
540 entry.le_prev = NULL;
541 }
542
543 // Required operator for hash_set. Normally operator== should be a free
544 // standing function. However, since CBAndEventMask is a protected type and
545 // it will never be a base class, it makes no difference.
546 bool operator==(const CBAndEventMask& cb_and_mask) const {
547 return fd == cb_and_mask.fd;
548 }
549 // A callback. If the fd is unregistered inside the callchain of OnEvent,
550 // the cb will be set to NULL.
551 mutable EpollCallbackInterface* cb;
552
553 mutable LIST_ENTRY(CBAndEventMask) entry;
554 // file descriptor registered with the epoll server.
555 int fd;
556 // the current event_mask registered for this callback.
557 mutable int event_mask;
558 // the event_mask that was returned by epoll
559 mutable int events_asserted;
560 // the event_mask for the ready list to use to call OnEvent.
561 mutable int events_to_fake;
562 // toggle around calls to OnEvent to tell UnregisterFD to not erase the
563 // iterator because HandleEvent is using it.
564 mutable bool in_use;
565 };
566
567 // Custom hash function to be used by hash_set.
568 struct CBAndEventMaskHash {
569 size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
570 return static_cast<size_t>(cb_and_eventmask.fd);
571 }
572 };
573
574 using FDToCBMap = std::unordered_set<CBAndEventMask, CBAndEventMaskHash>;
575
576 // the following four functions are OS-specific, and are likely
577 // to be changed in a subclass if the poll/select method is changed
578 // from epoll.
579
580 // Summary:
581 // Deletes a file-descriptor from the set of FDs that should be
582 // monitored with epoll.
583 // Note that this only deals with modifying data relating -directly-
584 // with the epoll call-- it does not modify any data within the
585 // epoll_server.
586 // Args:
587 // fd - the file descriptor to-be-removed from the monitoring set
588 virtual void DelFD(int fd) const;
589
590 ////////////////////////////////////////
591
592 // Summary:
593 // Adds a file-descriptor to the set of FDs that should be
594 // monitored with epoll.
595 // Note that this only deals with modifying data relating -directly-
596 // with the epoll call.
597 // Args:
598 // fd - the file descriptor to-be-added to the monitoring set
599 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
600 // OR'd together) which will be associated with this
601 // FD initially.
602 virtual void AddFD(int fd, int event_mask) const;
603
604 ////////////////////////////////////////
605
606 // Summary:
607 // Modifies a file-descriptor in the set of FDs that should be
608 // monitored with epoll.
609 // Note that this only deals with modifying data relating -directly-
610 // with the epoll call.
611 // Args:
612 // fd - the file descriptor to-be-added to the monitoring set
613 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
614 // OR'd together) which will be associated with this
615 // FD after this call.
616 virtual void ModFD(int fd, int event_mask) const;
617
618 ////////////////////////////////////////
619
620 // Summary:
621 // Modified the event mask associated with an FD in the set of
622 // data needed by epoll.
623 // Events are removed before they are added, thus, if ~0 is put
624 // in 'remove_event', whatever is put in 'add_event' will be
625 // the new event mask.
626 // If the file-descriptor specified is not registered in the
627 // epoll_server, then nothing happens as a result of this call.
628 // Args:
629 // fd - the file descriptor whose event mask is to be modified
630 // remove_event - the events which are to be removed from the current
631 // event_mask
632 // add_event - the events which are to be added to the current event_mask
633 //
634 //
635 virtual void ModifyFD(int fd, int remove_event, int add_event);
636
637 ////////////////////////////////////////
638
639 // Summary:
640 // Waits for events, and calls HandleEvents() for each
641 // fd, event pair discovered to possibly have an event.
642 // Note that a callback (B) may get a spurious event if
643 // another callback (A) has closed a file-descriptor N, and
644 // the callback (B) has a newly opened file-descriptor, which
645 // also happens to be N.
646 virtual void WaitForEventsAndCallHandleEvents(int64_t timeout_in_us,
647 struct epoll_event events[],
648 int events_size);
649
650 // Summary:
651 // An internal function for implementing the ready list. It adds a fd's
652 // CBAndEventMask to the ready list. If the fd is already on the ready
653 // list, it is a no-op.
654 void AddToReadyList(CBAndEventMask* cb_and_mask);
655
656 // Summary:
657 // An internal function for implementing the ready list. It remove a fd's
658 // CBAndEventMask from the ready list. If the fd is not on the ready list,
659 // it is a no-op.
660 void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
661
662 // Summary:
663 // Calls any pending alarms that should go off and reregisters them if they
664 // were recurring.
665 virtual void CallAndReregisterAlarmEvents();
666
667 // The file-descriptor created for epolling
668 int epoll_fd_;
669
670 // The mapping of file-descriptor to CBAndEventMasks
671 FDToCBMap cb_map_;
672
673 // Custom hash function to be used by hash_set.
674 struct AlarmCBHash {
675 size_t operator()(AlarmCB* const& p) const {
676 return reinterpret_cast<size_t>(p);
677 }
678 };
679
680 // TODO(sushantj): Having this hash_set is avoidable. We currently have it
681 // only so that we can enforce stringent checks that a caller can not register
682 // the same alarm twice. One option is to have an implementation in which
683 // this hash_set is used only in the debug mode.
684 using AlarmCBMap = std::unordered_set<AlarmCB*, AlarmCBHash>;
685 AlarmCBMap all_alarms_;
686
687 TimeToAlarmCBMap alarm_map_;
688
689 // The amount of time in microseconds that we'll wait before returning
690 // from the WaitForEventsAndExecuteCallbacks() function.
691 // If this is positive, wait that many microseconds.
692 // If this is negative, wait forever, or for the first event that occurs
693 // If this is zero, never wait for an event.
694 int64_t timeout_in_us_;
695
696 // This is nonzero only after the invocation of epoll_wait_impl within
697 // WaitForEventsAndCallHandleEvents and before the function
698 // WaitForEventsAndExecuteCallbacks returns. At all other times, this is
699 // zero. This enables us to have relatively accurate time returned from the
700 // ApproximateNowInUs() function. See that function for more details.
701 int64_t recorded_now_in_us_;
702
703 // This is used to implement CallAndReregisterAlarmEvents. This stores
704 // all alarms that were reregistered because OnAlarm() returned a
705 // value > 0 and the time at which they should be executed is less that
706 // the current time. By storing such alarms in this map we ensure
707 // that while calling CallAndReregisterAlarmEvents we do not call
708 // OnAlarm on any alarm in this set. This ensures that we do not
709 // go in an infinite loop.
710 AlarmCBMap alarms_reregistered_and_should_be_skipped_;
711
712 LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
713 LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
714 int ready_list_size_;
715 // TODO(alyssar): make this into something that scales up.
716 static const int events_size_ = 256;
717 struct epoll_event events_[256];
718
719#ifdef EPOLL_SERVER_EVENT_TRACING
720 struct EventRecorder {
721 public:
722 EventRecorder() : num_records_(0), record_threshold_(10000) {}
723
724 ~EventRecorder() { Clear(); }
725
726 // When a number of events equals the record threshold,
727 // the collected data summary for all FDs will be written
728 // to EPOLL_LOG(INFO). Note that this does not include the
729 // individual events (if you'reinterested in those, you'll
730 // have to get at them programmatically).
731 // After any such flushing to EPOLL_LOG(INFO) all events will
732 // be cleared.
733 // Note that the definition of an 'event' is a bit 'hazy',
734 // as it includes the 'Unregistration' event, and perhaps
735 // others.
736 void set_record_threshold(int64_t new_threshold) {
737 record_threshold_ = new_threshold;
738 }
739
740 void Clear() {
741 for (int i = 0; i < debug_events_.size(); ++i) {
742 delete debug_events_[i];
743 }
744 debug_events_.clear();
745 unregistered_fds_.clear();
746 event_counts_.clear();
747 }
748
749 void MaybeRecordAndClear() {
750 ++num_records_;
751 if ((num_records_ > record_threshold_) && (record_threshold_ > 0)) {
752 EPOLL_LOG(INFO) << "\n" << *this;
753 num_records_ = 0;
754 Clear();
755 }
756 }
757
758 void RecordFDMaskEvent(int fd, int mask, const char* function) {
759 FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
760 debug_events_.push_back(fdmo);
761 MaybeRecordAndClear();
762 }
763
764 void RecordEpollWaitEvent(int timeout_in_ms, int num_events_generated) {
765 EpollWaitOutput* ewo =
766 new EpollWaitOutput(timeout_in_ms, num_events_generated);
767 debug_events_.push_back(ewo);
768 MaybeRecordAndClear();
769 }
770
771 void RecordEpollEvent(int fd, int event_mask) {
772 Events& events_for_fd = event_counts_[fd];
773 events_for_fd.AssignFromMask(event_mask);
774 MaybeRecordAndClear();
775 }
776
777 friend ostream& operator<<(ostream& os, const EventRecorder& er) {
778 for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
779 os << "fd: " << er.unregistered_fds_[i] << "\n";
780 os << er.unregistered_fds_[i];
781 }
782 for (EventCountsMap::const_iterator i = er.event_counts_.begin();
783 i != er.event_counts_.end(); ++i) {
784 os << "fd: " << i->first << "\n";
785 os << i->second;
786 }
787 for (int i = 0; i < er.debug_events_.size(); ++i) {
788 os << *(er.debug_events_[i]) << "\n";
789 }
790 return os;
791 }
792
793 void RecordUnregistration(int fd) {
794 EventCountsMap::iterator i = event_counts_.find(fd);
795 if (i != event_counts_.end()) {
796 unregistered_fds_.push_back(i->second);
797 event_counts_.erase(i);
798 }
799 MaybeRecordAndClear();
800 }
801
802 protected:
803 class DebugOutput {
804 public:
805 friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
806 debug_output.OutputToStream(os);
807 return os;
808 }
809 virtual void OutputToStream(ostream* os) const = 0;
810 virtual ~DebugOutput() {}
811 };
812
813 class FDMaskOutput : public DebugOutput {
814 public:
815 FDMaskOutput(int fd, int mask, const char* function)
816 : fd_(fd), mask_(mask), function_(function) {}
817 virtual void OutputToStream(ostream* os) const {
818 (*os) << "func: " << function_ << "\tfd: " << fd_;
819 if (mask_ != 0) {
820 (*os) << "\tmask: " << EventMaskToString(mask_);
821 }
822 }
823 int fd_;
824 int mask_;
825 const char* function_;
826 };
827
828 class EpollWaitOutput : public DebugOutput {
829 public:
830 EpollWaitOutput(int timeout_in_ms, int num_events_generated)
831 : timeout_in_ms_(timeout_in_ms),
832 num_events_generated_(num_events_generated) {}
833 virtual void OutputToStream(ostream* os) const {
834 (*os) << "timeout_in_ms: " << timeout_in_ms_
835 << "\tnum_events_generated: " << num_events_generated_;
836 }
837
838 protected:
839 int timeout_in_ms_;
840 int num_events_generated_;
841 };
842
843 struct Events {
844 Events()
845 : epoll_in(0),
846 epoll_pri(0),
847 epoll_out(0),
848 epoll_rdnorm(0),
849 epoll_rdband(0),
850 epoll_wrnorm(0),
851 epoll_wrband(0),
852 epoll_msg(0),
853 epoll_err(0),
854 epoll_hup(0),
855 epoll_oneshot(0),
856 epoll_et(0) {}
857
858 void AssignFromMask(int event_mask) {
859 if (event_mask & EPOLLIN) ++epoll_in;
860 if (event_mask & EPOLLPRI) ++epoll_pri;
861 if (event_mask & EPOLLOUT) ++epoll_out;
862 if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
863 if (event_mask & EPOLLRDBAND) ++epoll_rdband;
864 if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
865 if (event_mask & EPOLLWRBAND) ++epoll_wrband;
866 if (event_mask & EPOLLMSG) ++epoll_msg;
867 if (event_mask & EPOLLERR) ++epoll_err;
868 if (event_mask & EPOLLHUP) ++epoll_hup;
869 if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
870 if (event_mask & EPOLLET) ++epoll_et;
871 }
872
873 friend ostream& operator<<(ostream& os, const Events& ev) {
874 if (ev.epoll_in) {
875 os << "\t EPOLLIN: " << ev.epoll_in << "\n";
876 }
877 if (ev.epoll_pri) {
878 os << "\t EPOLLPRI: " << ev.epoll_pri << "\n";
879 }
880 if (ev.epoll_out) {
881 os << "\t EPOLLOUT: " << ev.epoll_out << "\n";
882 }
883 if (ev.epoll_rdnorm) {
884 os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
885 }
886 if (ev.epoll_rdband) {
887 os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n";
888 }
889 if (ev.epoll_wrnorm) {
890 os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
891 }
892 if (ev.epoll_wrband) {
893 os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n";
894 }
895 if (ev.epoll_msg) {
896 os << "\t EPOLLMSG: " << ev.epoll_msg << "\n";
897 }
898 if (ev.epoll_err) {
899 os << "\t EPOLLERR: " << ev.epoll_err << "\n";
900 }
901 if (ev.epoll_hup) {
902 os << "\t EPOLLHUP: " << ev.epoll_hup << "\n";
903 }
904 if (ev.epoll_oneshot) {
905 os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
906 }
907 if (ev.epoll_et) {
908 os << "\t EPOLLET: " << ev.epoll_et << "\n";
909 }
910 return os;
911 }
912
913 unsigned int epoll_in;
914 unsigned int epoll_pri;
915 unsigned int epoll_out;
916 unsigned int epoll_rdnorm;
917 unsigned int epoll_rdband;
918 unsigned int epoll_wrnorm;
919 unsigned int epoll_wrband;
920 unsigned int epoll_msg;
921 unsigned int epoll_err;
922 unsigned int epoll_hup;
923 unsigned int epoll_oneshot;
924 unsigned int epoll_et;
925 };
926
927 std::vector<DebugOutput*> debug_events_;
928 std::vector<Events> unregistered_fds_;
929 using EventCountsMap = std::unordered_map<int, Events>;
930 EventCountsMap event_counts_;
931 int64_t num_records_;
932 int64_t record_threshold_;
933 };
934
935 void ClearEventRecords() { event_recorder_.Clear(); }
936 void WriteEventRecords(ostream* os) const { (*os) << event_recorder_; }
937
938 mutable EventRecorder event_recorder_;
939
940#endif
941
942 private:
943 // Helper functions used in the destructor.
944 void CleanupFDToCBMap();
945 void CleanupTimeToAlarmCBMap();
946
947 // The callback registered to the fds below. As the purpose of their
948 // registration is to wake the epoll server it just clears the pipe and
949 // returns.
950 std::unique_ptr<ReadPipeCallback> wake_cb_;
951
952 // A pipe owned by the epoll server. The server will be registered to listen
953 // on read_fd_ and can be woken by Wake() which writes to write_fd_.
954 int read_fd_;
955 int write_fd_;
956
957 // This boolean is checked to see if it is false at the top of the
958 // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
959 // without doing work, and logs to ERROR, or aborts the program (in
960 // DEBUG mode). If so, then it sets the bool to true, does work, and
961 // sets it back to false when done. This catches unwanted recursion.
962 bool in_wait_for_events_and_execute_callbacks_;
963
964 // Returns true when the SimpleEpollServer() is being destroyed.
965 bool in_shutdown_;
966 int64_t last_delay_in_usec_;
967};
968
969class EpollAlarmCallbackInterface {
970 public:
971 // Summary:
972 // Called when an alarm times out. Invalidates an AlarmRegToken.
973 // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
974 // delete it, as the reference is no longer valid.
975 // Returns:
976 // the unix time (in microseconds) at which this alarm should be signaled
977 // again, or 0 if the alarm should be removed.
978 virtual int64_t OnAlarm() = 0;
979
980 // Summary:
981 // Called when the an alarm is registered. Invalidates an AlarmRegToken.
982 // Args:
983 // token: the iterator to the alarm registered in the alarm map.
984 // WARNING: this token becomes invalid when the alarm fires, is
985 // unregistered, or OnShutdown is called on that alarm.
986 // eps: the epoll server the alarm is registered with.
987 virtual void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
988 SimpleEpollServer* eps) = 0;
989
990 // Summary:
991 // Called when the an alarm is unregistered.
992 // WARNING: It is not valid to unregister a callback and then use the token
993 // that was saved to refer to the callback.
994 virtual void OnUnregistration() = 0;
995
996 // Summary:
997 // Called when the epoll server is shutting down.
998 // Invalidates the AlarmRegToken that was given when this alarm was
999 // registered.
1000 virtual void OnShutdown(SimpleEpollServer* eps) = 0;
1001
1002 virtual ~EpollAlarmCallbackInterface() {}
1003
1004 protected:
1005 EpollAlarmCallbackInterface() {}
1006};
1007
1008// A simple alarm which unregisters itself on destruction.
1009//
1010// PLEASE NOTE:
1011// Any classes overriding these functions must either call the implementation
1012// of the parent class, or is must otherwise make sure that the 'registered_'
1013// boolean and the token, 'token_', are updated appropriately.
danzh02640922019-04-16 14:49:47 -07001014class EPOLL_EXPORT_PRIVATE EpollAlarm : public EpollAlarmCallbackInterface {
QUICHE team53f08a32019-04-15 14:47:31 -04001015 public:
1016 EpollAlarm();
1017
1018 ~EpollAlarm() override;
1019
1020 // Marks the alarm as unregistered and returns 0. The return value may be
1021 // safely ignored by subclasses.
1022 int64_t OnAlarm() override;
1023
1024 // Marks the alarm as registered, and stores the token.
1025 void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
1026 SimpleEpollServer* eps) override;
1027
1028 // Marks the alarm as unregistered.
1029 void OnUnregistration() override;
1030
1031 // Marks the alarm as unregistered.
1032 void OnShutdown(SimpleEpollServer* eps) override;
1033
1034 // If the alarm was registered, unregister it.
1035 void UnregisterIfRegistered();
1036
1037 // Reregisters the alarm at specified time.
1038 void ReregisterAlarm(int64_t timeout_time_in_us);
1039
1040 bool registered() const { return registered_; }
1041
1042 const SimpleEpollServer* eps() const { return eps_; }
1043
1044 private:
1045 SimpleEpollServer::AlarmRegToken token_;
1046 SimpleEpollServer* eps_;
1047 bool registered_;
1048};
1049
1050} // namespace epoll_server
1051
1052#endif // QUICHE_EPOLL_SERVER_H_