Project import generated by Copybara.

PiperOrigin-RevId: 243658601
Change-Id: I2522d468d82c86f8f222cf0e70114d6a9ca2b78c
diff --git a/epoll_server/simple_epoll_server.h b/epoll_server/simple_epoll_server.h
new file mode 100644
index 0000000..78b6d79
--- /dev/null
+++ b/epoll_server/simple_epoll_server.h
@@ -0,0 +1,1050 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef QUICHE_EPOLL_SERVER_H_
+#define QUICHE_EPOLL_SERVER_H_
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+// #define EPOLL_SERVER_EVENT_TRACING 1
+//
+// Defining EPOLL_SERVER_EVENT_TRACING
+// causes code to exist which didn't before.
+// This code tracks each event generated by the epollserver,
+// as well as providing a per-fd-registered summary of
+// events. Note that enabling this code vastly slows
+// down operations, and uses substantially more
+// memory. For these reasons, it should only be enabled by developers doing
+// development at their workstations.
+//
+// A structure called 'EventRecorder' will exist when
+// the macro is defined. See the EventRecorder class interface
+// within the SimpleEpollServer class for more details.
+#ifdef EPOLL_SERVER_EVENT_TRACING
+#include <ostream>
+#endif
+
+#include <sys/epoll.h>
+
+#include "net/third_party/quiche/src/epoll_server/platform/api/epoll_logging.h"
+
+namespace epoll_server {
+
+class SimpleEpollServer;
+class EpollAlarmCallbackInterface;
+class ReadPipeCallback;
+
+struct EpollEvent {
+  EpollEvent(int events) : in_events(events), out_ready_mask(0) {}
+
+  int in_events;       // incoming events
+  int out_ready_mask;  // the new event mask for ready list (0 means don't
+                       // get on the ready list). This field is always
+                       // initialized to 0 when the event is passed to
+                       // OnEvent.
+};
+
+// Callbacks which go into SimpleEpollServers are expected to derive from this
+// class.
+class EpollCallbackInterface {
+ public:
+  // Summary:
+  //   Called when the callback is registered into a SimpleEpollServer.
+  // Args:
+  //   eps - the poll server into which this callback was registered
+  //   fd - the file descriptor which was registered
+  //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
+  //                which was registered (and will initially be used
+  //                in the epoll() calls)
+  virtual void OnRegistration(SimpleEpollServer* eps, int fd,
+                              int event_mask) = 0;
+
+  // Summary:
+  //   Called when the event_mask is modified (for a file-descriptor)
+  // Args:
+  //   fd - the file descriptor which was registered
+  //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
+  //                which was is now curren (and will be used
+  //                in subsequent epoll() calls)
+  virtual void OnModification(int fd, int event_mask) = 0;
+
+  // Summary:
+  //   Called whenever an event occurs on the file-descriptor.
+  //   This is where the bulk of processing is expected to occur.
+  // Args:
+  //   fd - the file descriptor which was registered
+  //   event - a struct that contains the event mask (composed of EPOLLIN,
+  //           EPOLLOUT, etc), a flag that indicates whether this is a true
+  //           epoll_wait event vs one from the ready list, and an output
+  //           parameter for OnEvent to inform the SimpleEpollServer whether to
+  //           put this fd on the ready list.
+  virtual void OnEvent(int fd, EpollEvent* event) = 0;
+
+  // Summary:
+  //   Called when the file-descriptor is unregistered from the poll-server.
+  // Args:
+  //   fd - the file descriptor which was registered, and of this call, is now
+  //        unregistered.
+  //   replaced - If true, this callback is being replaced by another, otherwise
+  //              it is simply being removed.
+  virtual void OnUnregistration(int fd, bool replaced) = 0;
+
+  // Summary:
+  //   Called when the epoll server is shutting down.  This is different from
+  //   OnUnregistration because the subclass may want to clean up memory.
+  //   This is called in leiu of OnUnregistration.
+  // Args:
+  //  fd - the file descriptor which was registered.
+  virtual void OnShutdown(SimpleEpollServer* eps, int fd) = 0;
+
+  // Summary:
+  //   Returns a name describing the class for use in debug/error reporting.
+  virtual std::string Name() const = 0;
+
+  virtual ~EpollCallbackInterface() {}
+
+ protected:
+  EpollCallbackInterface() {}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+class SimpleEpollServer {
+ public:
+  typedef EpollAlarmCallbackInterface AlarmCB;
+  typedef EpollCallbackInterface CB;
+
+  typedef std::multimap<int64_t, AlarmCB*> TimeToAlarmCBMap;
+  typedef TimeToAlarmCBMap::iterator AlarmRegToken;
+
+  // Summary:
+  //   Constructor:
+  //    By default, we don't wait any amount of time for events, and
+  //    we suggest to the epoll-system that we're going to use on-the-order
+  //    of 1024 FDs.
+  SimpleEpollServer();
+
+  SimpleEpollServer(const SimpleEpollServer&) = delete;
+  SimpleEpollServer operator=(const SimpleEpollServer&) = delete;
+
+  ////////////////////////////////////////
+
+  // Destructor
+  virtual ~SimpleEpollServer();
+
+  ////////////////////////////////////////
+
+  // Summary
+  //   Register a callback to be called whenever an event contained
+  //   in the set of events included in event_mask occurs on the
+  //   file-descriptor 'fd'
+  //
+  //   Note that only one callback is allowed to be registered for
+  //   any specific file-decriptor.
+  //
+  //   If a callback is registered for a file-descriptor which has already
+  //   been registered, then the previous callback is unregistered with
+  //   the 'replaced' flag set to true. I.e. the previous callback's
+  //   OnUnregistration() function is called like so:
+  //      OnUnregistration(fd, true);
+  //
+  //  The epoll server does NOT take on ownership of the callback: the callback
+  //  creator is responsible for managing that memory.
+  //
+  // Args:
+  //   fd - a valid file-descriptor
+  //   cb - an instance of a subclass of EpollCallbackInterface
+  //   event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
+  //                the events for which the callback would like to be
+  //                called.
+  virtual void RegisterFD(int fd, CB* cb, int event_mask);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   A shortcut for RegisterFD which sets things up such that the
+  //   callback is called when 'fd' is available for writing.
+  // Args:
+  //   fd - a valid file-descriptor
+  //   cb - an instance of a subclass of EpollCallbackInterface
+  virtual void RegisterFDForWrite(int fd, CB* cb);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   A shortcut for RegisterFD which sets things up such that the
+  //   callback is called when 'fd' is available for reading or writing.
+  // Args:
+  //   fd - a valid file-descriptor
+  //   cb - an instance of a subclass of EpollCallbackInterface
+  virtual void RegisterFDForReadWrite(int fd, CB* cb);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   A shortcut for RegisterFD which sets things up such that the
+  //   callback is called when 'fd' is available for reading.
+  // Args:
+  //   fd - a valid file-descriptor
+  //   cb - an instance of a subclass of EpollCallbackInterface
+  virtual void RegisterFDForRead(int fd, CB* cb);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Removes the FD and the associated callback from the pollserver.
+  //   If the callback is registered with other FDs, they will continue
+  //   to be processed using the callback without modification.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the file-descriptor which should no-longer be monitored.
+  virtual void UnregisterFD(int fd);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies the event mask for the file-descriptor, replacing
+  //   the old event_mask with the new one specified here.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the fd whose event mask should be modified.
+  //   event_mask - the new event mask.
+  virtual void ModifyCallback(int fd, int event_mask);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies the event mask for the file-descriptor such that we
+  //   no longer request events when 'fd' is readable.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the fd whose event mask should be modified.
+  virtual void StopRead(int fd);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies the event mask for the file-descriptor such that we
+  //   request events when 'fd' is readable.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the fd whose event mask should be modified.
+  virtual void StartRead(int fd);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies the event mask for the file-descriptor such that we
+  //   no longer request events when 'fd' is writable.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the fd whose event mask should be modified.
+  virtual void StopWrite(int fd);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies the event mask for the file-descriptor such that we
+  //   request events when 'fd' is writable.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the fd whose event mask should be modified.
+  virtual void StartWrite(int fd);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Looks up the callback associated with the file-descriptor 'fd'.
+  //   If a callback is associated with this file-descriptor, then
+  //   it's OnEvent() method is called with the file-descriptor 'fd',
+  //   and event_mask 'event_mask'
+  //
+  //   If no callback is registered for this file-descriptor, nothing
+  //   will happen as a result of this call.
+  //
+  //   This function is used internally by the SimpleEpollServer, but is
+  //   available publicly so that events might be 'faked'. Calling
+  //   this function with an fd and event_mask is equivalent (as far
+  //   as the callback is concerned) to having a real event generated
+  //   by epoll (except, of course, that read(), etc won't necessarily
+  //   be able to read anything)
+  // Args:
+  //   fd - the file-descriptor on which an event has occurred.
+  //   event_mask - a bitmask representing the events which have occurred
+  //                on/for this fd. This bitmask is composed of
+  //                POLLIN, POLLOUT, etc.
+  //
+  void HandleEvent(int fd, int event_mask);
+
+  // Summary:
+  //   Call this when you want the pollserver to
+  //   wait for events and execute the callbacks associated with
+  //   the file-descriptors on which those events have occurred.
+  //   Depending on the value of timeout_in_us_, this may or may
+  //   not return immediately. Please reference the set_timeout()
+  //   function for the specific behaviour.
+  virtual void WaitForEventsAndExecuteCallbacks();
+
+  // Summary:
+  //   When an fd is registered to use edge trigger notification, the ready
+  //   list can be used to simulate level trigger semantics. Edge trigger
+  //   registration doesn't send an initial event, and only rising edge (going
+  //   from blocked to unblocked) events are sent. A callback can put itself on
+  //   the ready list by calling SetFDReady() after calling RegisterFD(). The
+  //   OnEvent method of all callbacks associated with the fds on the ready
+  //   list will be called immediately after processing the events returned by
+  //   epoll_wait(). The fd is removed from the ready list before the
+  //   callback's OnEvent() method is invoked. To stay on the ready list, the
+  //   OnEvent() (or some function in that call chain) must call SetFDReady
+  //   again. When a fd is unregistered using UnregisterFD(), the fd is
+  //   automatically removed from the ready list.
+  //
+  //   When the callback for a edge triggered fd hits the falling edge (about
+  //   to block, either because of it got an EAGAIN, or had a short read/write
+  //   operation), it should remove itself from the ready list using
+  //   SetFDNotReady() (since OnEvent cannot distinguish between invocation
+  //   from the ready list vs from a normal epoll event). All four ready list
+  //   methods are safe to be called  within the context of the callbacks.
+  //
+  //   Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
+  //   that are registered with the SimpleEpollServer will be put on the ready
+  //   list. SetFDReady() and SetFDNotReady() will do nothing if the
+  //   SimpleEpollServer doesn't know about the fd passed in.
+  //
+  //   Since the ready list cannot reliably determine proper set of events
+  //   which should be sent to the callback, SetFDReady() requests the caller
+  //   to provide the ready list with the event mask, which will be used later
+  //   when OnEvent() is invoked by the ready list. Hence, the event_mask
+  //   passedto SetFDReady() does not affect the actual epoll registration of
+  //   the fd with the kernel. If a fd is already put on the ready list, and
+  //   SetFDReady() is called again for that fd with a different event_mask,
+  //   the event_mask will be updated.
+  virtual void SetFDReady(int fd, int events_to_fake);
+
+  virtual void SetFDNotReady(int fd);
+
+  // Summary:
+  //   IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
+  //   debugging tools and for writing unit tests.
+  //   ISFDReady() returns whether a fd is in the ready list.
+  //   ReadyListSize() returns the number of fds on the ready list.
+  //   VerifyReadyList() checks the consistency of internal data structure. It
+  //   will CHECK if it finds an error.
+  virtual bool IsFDReady(int fd) const;
+
+  size_t ReadyListSize() const { return ready_list_size_; }
+
+  void VerifyReadyList() const;
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
+  //   If the callback returns a positive number from its OnAlarm() function,
+  //   then the callback will be re-registered at that time, else the alarm
+  //   owner is responsible for freeing up memory.
+  //
+  //   Important: A give AlarmCB* can not be registered again if it is already
+  //    registered. If a user wants to register a callback again it should first
+  //    unregister the previous callback before calling RegisterAlarm again.
+  // Args:
+  //   timeout_time_in_us - the absolute time at which the alarm should go off
+  //   ac - the alarm which will be called.
+  virtual void RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac);
+
+  // Summary:
+  //   Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
+  //   delta_in_us). While this is somewhat less accurate (see the description
+  //   for ApproximateNowInUs() to see how 'approximate'), the error is never
+  //   worse than the amount of time it takes to process all events in one
+  //   WaitForEvents.  As with 'RegisterAlarm()', if the callback returns a
+  //   positive number from its OnAlarm() function, then the callback will be
+  //   re-registered at that time, else the alarm owner is responsible for
+  //   freeing up memory.
+  //   Note that this function is purely a convienence. The
+  //   same thing may be accomplished by using RegisterAlarm with
+  //   ApproximateNowInUs() directly.
+  //
+  //   Important: A give AlarmCB* can not be registered again if it is already
+  //    registered. If a user wants to register a callback again it should first
+  //    unregister the previous callback before calling RegisterAlarm again.
+  // Args:
+  //   delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
+  //                 which point the alarm should go off.
+  //   ac - the alarm which will be called.
+  void RegisterAlarmApproximateDelta(int64_t delta_in_us, AlarmCB* ac) {
+    RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
+  }
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Unregister  the alarm referred to by iterator_token; Callers should
+  //   be warned that a token may have become already invalid when OnAlarm()
+  //   is called, was unregistered, or OnShutdown was called on that alarm.
+  // Args:
+  //    iterator_token - iterator to the alarm callback to unregister.
+  virtual void UnregisterAlarm(
+      const SimpleEpollServer::AlarmRegToken& iterator_token);
+
+  virtual SimpleEpollServer::AlarmRegToken ReregisterAlarm(
+      SimpleEpollServer::AlarmRegToken iterator_token,
+      int64_t timeout_time_in_us);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   returns the number of file-descriptors registered in this
+  //   SimpleEpollServer.
+  // Returns:
+  //   number of FDs registered (discounting the internal pipe used for Wake)
+  virtual int NumFDsRegistered() const;
+
+  // Summary:
+  //   Force the epoll server to wake up (by writing to an internal pipe).
+  virtual void Wake();
+
+  // Summary:
+  //   Wrapper around WallTimer's NowInUsec.  We do this so that we can test
+  //   SimpleEpollServer without using the system clock (and can avoid the
+  //   flakiness that would ensue)
+  // Returns:
+  //   the current time as number of microseconds since the Unix epoch.
+  virtual int64_t NowInUsec() const;
+
+  // Summary:
+  //   Since calling NowInUsec() many thousands of times per
+  //   WaitForEventsAndExecuteCallbacks function call is, to say the least,
+  //   inefficient, we allow users to use an approximate time instead. The
+  //   time returned from this function is as accurate as NowInUsec() when
+  //   WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
+  //   callstack.
+  //   However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
+  //   this function returns the time at which the
+  //   WaitForEventsAndExecuteCallbacks function started to process events or
+  //   alarms.
+  //
+  //   Essentially, this function makes available a fast and mostly accurate
+  //   mechanism for getting the time for any function handling an event or
+  //   alarm. When functions which are not handling callbacks or alarms call
+  //   this function, they get the slow and "absolutely" accurate time.
+  //
+  //   Users should be encouraged to use this function.
+  // Returns:
+  //   the "approximate" current time as number of microseconds since the Unix
+  //   epoch.
+  virtual int64_t ApproximateNowInUsec() const;
+
+  static std::string EventMaskToString(int event_mask);
+
+  // Summary:
+  //   Logs the state of the epoll server with EPOLL_LOG(ERROR).
+  void LogStateOnCrash();
+
+  // Summary:
+  //   Set the timeout to the value specified.
+  //   If the timeout is set to a negative number,
+  //      WaitForEventsAndExecuteCallbacks() will only return when an event has
+  //      occurred
+  //   If the timeout is set to zero,
+  //      WaitForEventsAndExecuteCallbacks() will return immediately
+  //   If the timeout is set to a positive number,
+  //      WaitForEventsAndExecuteCallbacks() will return when an event has
+  //      occurred, or when timeout_in_us microseconds has elapsed, whichever
+  //      is first.
+  //  Args:
+  //    timeout_in_us - value specified depending on behaviour desired.
+  //                    See above.
+  void set_timeout_in_us(int64_t timeout_in_us) {
+    timeout_in_us_ = timeout_in_us;
+  }
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Accessor for the current value of timeout_in_us.
+  int timeout_in_us_for_test() const { return timeout_in_us_; }
+
+  // Summary:
+  // Returns true when the SimpleEpollServer() is being destroyed.
+  bool in_shutdown() const { return in_shutdown_; }
+
+  // Compatibility stub.
+  void Shutdown() {}
+
+  // Summary:
+  //   A function for implementing the ready list. It invokes OnEvent for each
+  //   of the fd in the ready list, and takes care of adding them back to the
+  //   ready list if the callback requests it (by checking that out_ready_mask
+  //   is non-zero).
+  void CallReadyListCallbacks();
+
+  int64_t LastDelayInUsec() const { return last_delay_in_usec_; }
+
+ protected:
+  virtual void SetNonblocking(int fd);
+
+  // This exists here so that we can override this function in unittests
+  // in order to make effective mock SimpleEpollServer objects.
+  virtual int epoll_wait_impl(int epfd, struct epoll_event* events,
+                              int max_events, int timeout_in_ms);
+
+  // this struct is used internally, and is never used by anything external
+  // to this class. Some of its members are declared mutable to get around the
+  // restriction imposed by hash_set. Since hash_set knows nothing about the
+  // objects it stores, it has to assume that every bit of the object is used
+  // in the hash function and equal_to comparison. Thus hash_set::iterator is a
+  // const iterator. In this case, the only thing that must stay constant is
+  // fd. Everything else are just along for the ride and changing them doesn't
+  // compromise the hash_set integrity.
+  struct CBAndEventMask {
+    CBAndEventMask()
+        : cb(NULL),
+          fd(-1),
+          event_mask(0),
+          events_asserted(0),
+          events_to_fake(0),
+          in_use(false) {
+      entry.le_next = NULL;
+      entry.le_prev = NULL;
+    }
+
+    CBAndEventMask(EpollCallbackInterface* cb, int event_mask, int fd)
+        : cb(cb),
+          fd(fd),
+          event_mask(event_mask),
+          events_asserted(0),
+          events_to_fake(0),
+          in_use(false) {
+      entry.le_next = NULL;
+      entry.le_prev = NULL;
+    }
+
+    // Required operator for hash_set. Normally operator== should be a free
+    // standing function. However, since CBAndEventMask is a protected type and
+    // it will never be a base class, it makes no difference.
+    bool operator==(const CBAndEventMask& cb_and_mask) const {
+      return fd == cb_and_mask.fd;
+    }
+    // A callback. If the fd is unregistered inside the callchain of OnEvent,
+    // the cb will be set to NULL.
+    mutable EpollCallbackInterface* cb;
+
+    mutable LIST_ENTRY(CBAndEventMask) entry;
+    // file descriptor registered with the epoll server.
+    int fd;
+    // the current event_mask registered for this callback.
+    mutable int event_mask;
+    // the event_mask that was returned by epoll
+    mutable int events_asserted;
+    // the event_mask for the ready list to use to call OnEvent.
+    mutable int events_to_fake;
+    // toggle around calls to OnEvent to tell UnregisterFD to not erase the
+    // iterator because HandleEvent is using it.
+    mutable bool in_use;
+  };
+
+  // Custom hash function to be used by hash_set.
+  struct CBAndEventMaskHash {
+    size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
+      return static_cast<size_t>(cb_and_eventmask.fd);
+    }
+  };
+
+  using FDToCBMap = std::unordered_set<CBAndEventMask, CBAndEventMaskHash>;
+
+  // the following four functions are OS-specific, and are likely
+  // to be changed in a subclass if the poll/select method is changed
+  // from epoll.
+
+  // Summary:
+  //   Deletes a file-descriptor from the set of FDs that should be
+  //   monitored with epoll.
+  //   Note that this only deals with modifying data relating -directly-
+  //   with the epoll call-- it does not modify any data within the
+  //   epoll_server.
+  // Args:
+  //   fd - the file descriptor to-be-removed from the monitoring set
+  virtual void DelFD(int fd) const;
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Adds a file-descriptor to the set of FDs that should be
+  //   monitored with epoll.
+  //   Note that this only deals with modifying data relating -directly-
+  //   with the epoll call.
+  // Args:
+  //   fd - the file descriptor to-be-added to the monitoring set
+  //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
+  //                 OR'd together) which will be associated with this
+  //                 FD initially.
+  virtual void AddFD(int fd, int event_mask) const;
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modifies a file-descriptor in the set of FDs that should be
+  //   monitored with epoll.
+  //   Note that this only deals with modifying data relating -directly-
+  //   with the epoll call.
+  // Args:
+  //   fd - the file descriptor to-be-added to the monitoring set
+  //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
+  //                 OR'd together) which will be associated with this
+  //                 FD after this call.
+  virtual void ModFD(int fd, int event_mask) const;
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Modified the event mask associated with an FD in the set of
+  //   data needed by epoll.
+  //   Events are removed before they are added, thus, if ~0 is put
+  //   in 'remove_event', whatever is put in 'add_event' will be
+  //   the new event mask.
+  //   If the file-descriptor specified is not registered in the
+  //   epoll_server, then nothing happens as a result of this call.
+  // Args:
+  //   fd - the file descriptor whose event mask is to be modified
+  //   remove_event - the events which are to be removed from the current
+  //                  event_mask
+  //   add_event - the events which are to be added to the current event_mask
+  //
+  //
+  virtual void ModifyFD(int fd, int remove_event, int add_event);
+
+  ////////////////////////////////////////
+
+  // Summary:
+  //   Waits for events, and calls HandleEvents() for each
+  //   fd, event pair discovered to possibly have an event.
+  //   Note that a callback (B) may get a spurious event if
+  //   another callback (A) has closed a file-descriptor N, and
+  //   the callback (B) has a newly opened file-descriptor, which
+  //   also happens to be N.
+  virtual void WaitForEventsAndCallHandleEvents(int64_t timeout_in_us,
+                                                struct epoll_event events[],
+                                                int events_size);
+
+  // Summary:
+  //   An internal function for implementing the ready list. It adds a fd's
+  //   CBAndEventMask to the ready list. If the fd is already on the ready
+  //   list, it is a no-op.
+  void AddToReadyList(CBAndEventMask* cb_and_mask);
+
+  // Summary:
+  //   An internal function for implementing the ready list. It remove a fd's
+  //   CBAndEventMask from the ready list. If the fd is not on the ready list,
+  //   it is a no-op.
+  void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
+
+  // Summary:
+  // Calls any pending alarms that should go off and reregisters them if they
+  // were recurring.
+  virtual void CallAndReregisterAlarmEvents();
+
+  // The file-descriptor created for epolling
+  int epoll_fd_;
+
+  // The mapping of file-descriptor to CBAndEventMasks
+  FDToCBMap cb_map_;
+
+  // Custom hash function to be used by hash_set.
+  struct AlarmCBHash {
+    size_t operator()(AlarmCB* const& p) const {
+      return reinterpret_cast<size_t>(p);
+    }
+  };
+
+  // TODO(sushantj): Having this hash_set is avoidable. We currently have it
+  // only so that we can enforce stringent checks that a caller can not register
+  // the same alarm twice. One option is to have an implementation in which
+  // this hash_set is used only in the debug mode.
+  using AlarmCBMap = std::unordered_set<AlarmCB*, AlarmCBHash>;
+  AlarmCBMap all_alarms_;
+
+  TimeToAlarmCBMap alarm_map_;
+
+  // The amount of time in microseconds that we'll wait before returning
+  // from the WaitForEventsAndExecuteCallbacks() function.
+  // If this is positive, wait that many microseconds.
+  // If this is negative, wait forever, or for the first event that occurs
+  // If this is zero, never wait for an event.
+  int64_t timeout_in_us_;
+
+  // This is nonzero only after the invocation of epoll_wait_impl within
+  // WaitForEventsAndCallHandleEvents and before the function
+  // WaitForEventsAndExecuteCallbacks returns.  At all other times, this is
+  // zero. This enables us to have relatively accurate time returned from the
+  // ApproximateNowInUs() function. See that function for more details.
+  int64_t recorded_now_in_us_;
+
+  // This is used to implement CallAndReregisterAlarmEvents. This stores
+  // all alarms that were reregistered because OnAlarm() returned a
+  // value > 0 and the time at which they should be executed is less that
+  // the current time.  By storing such alarms in this map we ensure
+  // that while calling CallAndReregisterAlarmEvents we do not call
+  // OnAlarm on any alarm in this set. This ensures that we do not
+  // go in an infinite loop.
+  AlarmCBMap alarms_reregistered_and_should_be_skipped_;
+
+  LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
+  LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
+  int ready_list_size_;
+  // TODO(alyssar): make this into something that scales up.
+  static const int events_size_ = 256;
+  struct epoll_event events_[256];
+
+#ifdef EPOLL_SERVER_EVENT_TRACING
+  struct EventRecorder {
+   public:
+    EventRecorder() : num_records_(0), record_threshold_(10000) {}
+
+    ~EventRecorder() { Clear(); }
+
+    // When a number of events equals the record threshold,
+    // the collected data summary for all FDs will be written
+    // to EPOLL_LOG(INFO). Note that this does not include the
+    // individual events (if you'reinterested in those, you'll
+    // have to get at them programmatically).
+    // After any such flushing to EPOLL_LOG(INFO) all events will
+    // be cleared.
+    // Note that the definition of an 'event' is a bit 'hazy',
+    // as it includes the 'Unregistration' event, and perhaps
+    // others.
+    void set_record_threshold(int64_t new_threshold) {
+      record_threshold_ = new_threshold;
+    }
+
+    void Clear() {
+      for (int i = 0; i < debug_events_.size(); ++i) {
+        delete debug_events_[i];
+      }
+      debug_events_.clear();
+      unregistered_fds_.clear();
+      event_counts_.clear();
+    }
+
+    void MaybeRecordAndClear() {
+      ++num_records_;
+      if ((num_records_ > record_threshold_) && (record_threshold_ > 0)) {
+        EPOLL_LOG(INFO) << "\n" << *this;
+        num_records_ = 0;
+        Clear();
+      }
+    }
+
+    void RecordFDMaskEvent(int fd, int mask, const char* function) {
+      FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
+      debug_events_.push_back(fdmo);
+      MaybeRecordAndClear();
+    }
+
+    void RecordEpollWaitEvent(int timeout_in_ms, int num_events_generated) {
+      EpollWaitOutput* ewo =
+          new EpollWaitOutput(timeout_in_ms, num_events_generated);
+      debug_events_.push_back(ewo);
+      MaybeRecordAndClear();
+    }
+
+    void RecordEpollEvent(int fd, int event_mask) {
+      Events& events_for_fd = event_counts_[fd];
+      events_for_fd.AssignFromMask(event_mask);
+      MaybeRecordAndClear();
+    }
+
+    friend ostream& operator<<(ostream& os, const EventRecorder& er) {
+      for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
+        os << "fd: " << er.unregistered_fds_[i] << "\n";
+        os << er.unregistered_fds_[i];
+      }
+      for (EventCountsMap::const_iterator i = er.event_counts_.begin();
+           i != er.event_counts_.end(); ++i) {
+        os << "fd: " << i->first << "\n";
+        os << i->second;
+      }
+      for (int i = 0; i < er.debug_events_.size(); ++i) {
+        os << *(er.debug_events_[i]) << "\n";
+      }
+      return os;
+    }
+
+    void RecordUnregistration(int fd) {
+      EventCountsMap::iterator i = event_counts_.find(fd);
+      if (i != event_counts_.end()) {
+        unregistered_fds_.push_back(i->second);
+        event_counts_.erase(i);
+      }
+      MaybeRecordAndClear();
+    }
+
+   protected:
+    class DebugOutput {
+     public:
+      friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
+        debug_output.OutputToStream(os);
+        return os;
+      }
+      virtual void OutputToStream(ostream* os) const = 0;
+      virtual ~DebugOutput() {}
+    };
+
+    class FDMaskOutput : public DebugOutput {
+     public:
+      FDMaskOutput(int fd, int mask, const char* function)
+          : fd_(fd), mask_(mask), function_(function) {}
+      virtual void OutputToStream(ostream* os) const {
+        (*os) << "func: " << function_ << "\tfd: " << fd_;
+        if (mask_ != 0) {
+          (*os) << "\tmask: " << EventMaskToString(mask_);
+        }
+      }
+      int fd_;
+      int mask_;
+      const char* function_;
+    };
+
+    class EpollWaitOutput : public DebugOutput {
+     public:
+      EpollWaitOutput(int timeout_in_ms, int num_events_generated)
+          : timeout_in_ms_(timeout_in_ms),
+            num_events_generated_(num_events_generated) {}
+      virtual void OutputToStream(ostream* os) const {
+        (*os) << "timeout_in_ms: " << timeout_in_ms_
+              << "\tnum_events_generated: " << num_events_generated_;
+      }
+
+     protected:
+      int timeout_in_ms_;
+      int num_events_generated_;
+    };
+
+    struct Events {
+      Events()
+          : epoll_in(0),
+            epoll_pri(0),
+            epoll_out(0),
+            epoll_rdnorm(0),
+            epoll_rdband(0),
+            epoll_wrnorm(0),
+            epoll_wrband(0),
+            epoll_msg(0),
+            epoll_err(0),
+            epoll_hup(0),
+            epoll_oneshot(0),
+            epoll_et(0) {}
+
+      void AssignFromMask(int event_mask) {
+        if (event_mask & EPOLLIN) ++epoll_in;
+        if (event_mask & EPOLLPRI) ++epoll_pri;
+        if (event_mask & EPOLLOUT) ++epoll_out;
+        if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
+        if (event_mask & EPOLLRDBAND) ++epoll_rdband;
+        if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
+        if (event_mask & EPOLLWRBAND) ++epoll_wrband;
+        if (event_mask & EPOLLMSG) ++epoll_msg;
+        if (event_mask & EPOLLERR) ++epoll_err;
+        if (event_mask & EPOLLHUP) ++epoll_hup;
+        if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
+        if (event_mask & EPOLLET) ++epoll_et;
+      }
+
+      friend ostream& operator<<(ostream& os, const Events& ev) {
+        if (ev.epoll_in) {
+          os << "\t      EPOLLIN: " << ev.epoll_in << "\n";
+        }
+        if (ev.epoll_pri) {
+          os << "\t     EPOLLPRI: " << ev.epoll_pri << "\n";
+        }
+        if (ev.epoll_out) {
+          os << "\t     EPOLLOUT: " << ev.epoll_out << "\n";
+        }
+        if (ev.epoll_rdnorm) {
+          os << "\t  EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
+        }
+        if (ev.epoll_rdband) {
+          os << "\t  EPOLLRDBAND: " << ev.epoll_rdband << "\n";
+        }
+        if (ev.epoll_wrnorm) {
+          os << "\t  EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
+        }
+        if (ev.epoll_wrband) {
+          os << "\t  EPOLLWRBAND: " << ev.epoll_wrband << "\n";
+        }
+        if (ev.epoll_msg) {
+          os << "\t     EPOLLMSG: " << ev.epoll_msg << "\n";
+        }
+        if (ev.epoll_err) {
+          os << "\t     EPOLLERR: " << ev.epoll_err << "\n";
+        }
+        if (ev.epoll_hup) {
+          os << "\t     EPOLLHUP: " << ev.epoll_hup << "\n";
+        }
+        if (ev.epoll_oneshot) {
+          os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
+        }
+        if (ev.epoll_et) {
+          os << "\t      EPOLLET: " << ev.epoll_et << "\n";
+        }
+        return os;
+      }
+
+      unsigned int epoll_in;
+      unsigned int epoll_pri;
+      unsigned int epoll_out;
+      unsigned int epoll_rdnorm;
+      unsigned int epoll_rdband;
+      unsigned int epoll_wrnorm;
+      unsigned int epoll_wrband;
+      unsigned int epoll_msg;
+      unsigned int epoll_err;
+      unsigned int epoll_hup;
+      unsigned int epoll_oneshot;
+      unsigned int epoll_et;
+    };
+
+    std::vector<DebugOutput*> debug_events_;
+    std::vector<Events> unregistered_fds_;
+    using EventCountsMap = std::unordered_map<int, Events>;
+    EventCountsMap event_counts_;
+    int64_t num_records_;
+    int64_t record_threshold_;
+  };
+
+  void ClearEventRecords() { event_recorder_.Clear(); }
+  void WriteEventRecords(ostream* os) const { (*os) << event_recorder_; }
+
+  mutable EventRecorder event_recorder_;
+
+#endif
+
+ private:
+  // Helper functions used in the destructor.
+  void CleanupFDToCBMap();
+  void CleanupTimeToAlarmCBMap();
+
+  // The callback registered to the fds below.  As the purpose of their
+  // registration is to wake the epoll server it just clears the pipe and
+  // returns.
+  std::unique_ptr<ReadPipeCallback> wake_cb_;
+
+  // A pipe owned by the epoll server.  The server will be registered to listen
+  // on read_fd_ and can be woken by Wake() which writes to write_fd_.
+  int read_fd_;
+  int write_fd_;
+
+  // This boolean is checked to see if it is false at the top of the
+  // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
+  // without doing work, and logs to ERROR, or aborts the program (in
+  // DEBUG mode). If so, then it sets the bool to true, does work, and
+  // sets it back to false when done. This catches unwanted recursion.
+  bool in_wait_for_events_and_execute_callbacks_;
+
+  // Returns true when the SimpleEpollServer() is being destroyed.
+  bool in_shutdown_;
+  int64_t last_delay_in_usec_;
+};
+
+class EpollAlarmCallbackInterface {
+ public:
+  // Summary:
+  //   Called when an alarm times out. Invalidates an AlarmRegToken.
+  //   WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
+  //   delete it, as the reference is no longer valid.
+  // Returns:
+  //   the unix time (in microseconds) at which this alarm should be signaled
+  //   again, or 0 if the alarm should be removed.
+  virtual int64_t OnAlarm() = 0;
+
+  // Summary:
+  //   Called when the an alarm is registered. Invalidates an AlarmRegToken.
+  // Args:
+  //   token: the iterator to the alarm registered in the alarm map.
+  //   WARNING: this token becomes invalid when the alarm fires, is
+  //   unregistered, or OnShutdown is called on that alarm.
+  //   eps: the epoll server the alarm is registered with.
+  virtual void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
+                              SimpleEpollServer* eps) = 0;
+
+  // Summary:
+  //   Called when the an alarm is unregistered.
+  //   WARNING: It is not valid to unregister a callback and then use the token
+  //   that was saved to refer to the callback.
+  virtual void OnUnregistration() = 0;
+
+  // Summary:
+  //   Called when the epoll server is shutting down.
+  //   Invalidates the AlarmRegToken that was given when this alarm was
+  //   registered.
+  virtual void OnShutdown(SimpleEpollServer* eps) = 0;
+
+  virtual ~EpollAlarmCallbackInterface() {}
+
+ protected:
+  EpollAlarmCallbackInterface() {}
+};
+
+// A simple alarm which unregisters itself on destruction.
+//
+// PLEASE NOTE:
+// Any classes overriding these functions must either call the implementation
+// of the parent class, or is must otherwise make sure that the 'registered_'
+// boolean and the token, 'token_', are updated appropriately.
+class EpollAlarm : public EpollAlarmCallbackInterface {
+ public:
+  EpollAlarm();
+
+  ~EpollAlarm() override;
+
+  // Marks the alarm as unregistered and returns 0.  The return value may be
+  // safely ignored by subclasses.
+  int64_t OnAlarm() override;
+
+  // Marks the alarm as registered, and stores the token.
+  void OnRegistration(const SimpleEpollServer::AlarmRegToken& token,
+                      SimpleEpollServer* eps) override;
+
+  // Marks the alarm as unregistered.
+  void OnUnregistration() override;
+
+  // Marks the alarm as unregistered.
+  void OnShutdown(SimpleEpollServer* eps) override;
+
+  // If the alarm was registered, unregister it.
+  void UnregisterIfRegistered();
+
+  // Reregisters the alarm at specified time.
+  void ReregisterAlarm(int64_t timeout_time_in_us);
+
+  bool registered() const { return registered_; }
+
+  const SimpleEpollServer* eps() const { return eps_; }
+
+ private:
+  SimpleEpollServer::AlarmRegToken token_;
+  SimpleEpollServer* eps_;
+  bool registered_;
+};
+
+}  // namespace epoll_server
+
+#endif  // QUICHE_EPOLL_SERVER_H_