QUICHE team | 53f08a3 | 2019-04-15 14:47:31 -0400 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "net/third_party/quiche/src/epoll_server/simple_epoll_server.h" |
| 6 | |
| 7 | #include <errno.h> // for errno and strerror_r |
| 8 | #include <stdlib.h> // for abort |
| 9 | #include <unistd.h> // For read, pipe, close and write. |
| 10 | |
| 11 | #include <algorithm> |
| 12 | #include <utility> |
| 13 | |
| 14 | #include "net/third_party/quiche/src/epoll_server/platform/api/epoll_bug.h" |
| 15 | #include "net/third_party/quiche/src/epoll_server/platform/api/epoll_time.h" |
| 16 | |
| 17 | // Design notes: An efficient implementation of ready list has the following |
| 18 | // desirable properties: |
| 19 | // |
| 20 | // A. O(1) insertion into/removal from the list in any location. |
| 21 | // B. Once the callback is found by hash lookup using the fd, the lookup of |
| 22 | // corresponding entry in the list is O(1). |
| 23 | // C. Safe insertion into/removal from the list during list iteration. (The |
| 24 | // ready list's purpose is to enable completely event driven I/O model. |
| 25 | // Thus, all the interesting bits happen in the callback. It is critical |
| 26 | // to not place any restriction on the API during list iteration. |
| 27 | // |
| 28 | // The current implementation achieves these goals with the following design: |
| 29 | // |
| 30 | // - The ready list is constructed as a doubly linked list to enable O(1) |
| 31 | // insertion/removal (see man 3 queue). |
| 32 | // - The forward and backward links are directly embedded inside the |
| 33 | // CBAndEventMask struct. This enables O(1) lookup in the list for a given |
| 34 | // callback. (Techincally, we could've used std::list of hash_set::iterator, |
| 35 | // and keep a list::iterator in CBAndEventMask to achieve the same effect. |
| 36 | // However, iterators have two problems: no way to portably invalidate them, |
| 37 | // and no way to tell whether an iterator is singular or not. The only way to |
| 38 | // overcome these issues is to keep bools in both places, but that throws off |
| 39 | // memory alignment (up to 7 wasted bytes for each bool). The extra level of |
| 40 | // indirection will also likely be less cache friendly. Direct manipulation |
| 41 | // of link pointers makes it easier to retrieve the CBAndEventMask from the |
| 42 | // list, easier to check whether an CBAndEventMask is in the list, uses less |
| 43 | // memory (save 32 bytes/fd), and does not affect cache usage (we need to |
| 44 | // read in the struct to use the callback anyway).) |
| 45 | // - Embed the fd directly into CBAndEventMask and switch to using hash_set. |
| 46 | // This removes the need to store hash_map::iterator in the list just so that |
| 47 | // we can get both the fd and the callback. |
| 48 | // - The ready list is "one shot": each entry is removed before OnEvent is |
| 49 | // called. This removes the mutation-while-iterating problem. |
| 50 | // - Use two lists to keep track of callbacks. The ready_list_ is the one used |
| 51 | // for registration. Before iteration, the ready_list_ is swapped into the |
| 52 | // tmp_list_. Once iteration is done, tmp_list_ will be empty, and |
| 53 | // ready_list_ will have all the new ready fds. |
| 54 | |
| 55 | // The size we use for buffers passed to strerror_r |
| 56 | static const int kErrorBufferSize = 256; |
| 57 | |
| 58 | namespace epoll_server { |
| 59 | |
| 60 | template <typename T> |
| 61 | class AutoReset { |
| 62 | public: |
| 63 | AutoReset(T* scoped_variable, T new_value) |
| 64 | : scoped_variable_(scoped_variable), |
| 65 | original_value_(std::move(*scoped_variable)) { |
| 66 | *scoped_variable_ = std::move(new_value); |
| 67 | } |
| 68 | AutoReset(const AutoReset&) = delete; |
| 69 | AutoReset& operator=(const AutoReset&) = delete; |
| 70 | |
| 71 | ~AutoReset() { *scoped_variable_ = std::move(original_value_); } |
| 72 | |
| 73 | private: |
| 74 | T* scoped_variable_; |
| 75 | T original_value_; |
| 76 | }; |
| 77 | |
| 78 | // Clears the pipe and returns. Used for waking the epoll server up. |
| 79 | class ReadPipeCallback : public EpollCallbackInterface { |
| 80 | public: |
| 81 | void OnEvent(int fd, EpollEvent* event) override { |
| 82 | DCHECK(event->in_events == EPOLLIN); |
| 83 | int data; |
| 84 | int data_read = 1; |
| 85 | // Read until the pipe is empty. |
| 86 | while (data_read > 0) { |
| 87 | data_read = read(fd, &data, sizeof(data)); |
| 88 | } |
| 89 | } |
| 90 | void OnShutdown(SimpleEpollServer* eps, int fd) override {} |
| 91 | void OnRegistration(SimpleEpollServer*, int, int) override {} |
| 92 | void OnModification(int, int) override {} // COV_NF_LINE |
| 93 | void OnUnregistration(int, bool) override {} // COV_NF_LINE |
| 94 | std::string Name() const override { return "ReadPipeCallback"; } |
| 95 | }; |
| 96 | |
| 97 | //////////////////////////////////////////////////////////////////////////////// |
| 98 | //////////////////////////////////////////////////////////////////////////////// |
| 99 | |
| 100 | SimpleEpollServer::SimpleEpollServer() |
| 101 | : epoll_fd_(epoll_create(1024)), |
| 102 | timeout_in_us_(0), |
| 103 | recorded_now_in_us_(0), |
| 104 | ready_list_size_(0), |
| 105 | wake_cb_(new ReadPipeCallback), |
| 106 | read_fd_(-1), |
| 107 | write_fd_(-1), |
| 108 | in_wait_for_events_and_execute_callbacks_(false), |
| 109 | in_shutdown_(false), |
| 110 | last_delay_in_usec_(0) { |
| 111 | // ensure that the epoll_fd_ is valid. |
| 112 | CHECK_NE(epoll_fd_, -1); |
| 113 | LIST_INIT(&ready_list_); |
| 114 | LIST_INIT(&tmp_list_); |
| 115 | |
| 116 | int pipe_fds[2]; |
| 117 | if (pipe(pipe_fds) < 0) { |
| 118 | // Unfortunately, it is impossible to test any such initialization in |
| 119 | // a constructor (as virtual methods do not yet work). |
| 120 | // This -could- be solved by moving initialization to an outside |
| 121 | // call... |
| 122 | int saved_errno = errno; |
| 123 | char buf[kErrorBufferSize]; |
| 124 | EPOLL_LOG(FATAL) << "Error " << saved_errno << " in pipe(): " |
| 125 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 126 | } |
| 127 | read_fd_ = pipe_fds[0]; |
| 128 | write_fd_ = pipe_fds[1]; |
| 129 | RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN); |
| 130 | } |
| 131 | |
| 132 | void SimpleEpollServer::CleanupFDToCBMap() { |
| 133 | auto cb_iter = cb_map_.begin(); |
| 134 | while (cb_iter != cb_map_.end()) { |
| 135 | int fd = cb_iter->fd; |
| 136 | CB* cb = cb_iter->cb; |
| 137 | |
| 138 | cb_iter->in_use = true; |
| 139 | if (cb) { |
| 140 | cb->OnShutdown(this, fd); |
| 141 | } |
| 142 | |
| 143 | cb_map_.erase(cb_iter); |
| 144 | cb_iter = cb_map_.begin(); |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | void SimpleEpollServer::CleanupTimeToAlarmCBMap() { |
| 149 | TimeToAlarmCBMap::iterator erase_it; |
| 150 | |
| 151 | // Call OnShutdown() on alarms. Note that the structure of the loop |
| 152 | // is similar to the structure of loop in the function HandleAlarms() |
| 153 | for (auto i = alarm_map_.begin(); i != alarm_map_.end();) { |
| 154 | // Note that OnShutdown() can call UnregisterAlarm() on |
| 155 | // other iterators. OnShutdown() should not call UnregisterAlarm() |
| 156 | // on self because by definition the iterator is not valid any more. |
| 157 | i->second->OnShutdown(this); |
| 158 | erase_it = i; |
| 159 | ++i; |
| 160 | alarm_map_.erase(erase_it); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | SimpleEpollServer::~SimpleEpollServer() { |
| 165 | DCHECK_EQ(in_shutdown_, false); |
| 166 | in_shutdown_ = true; |
| 167 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 168 | EPOLL_LOG(INFO) << "\n" << event_recorder_; |
| 169 | #endif |
| 170 | EPOLL_VLOG(2) << "Shutting down epoll server "; |
| 171 | CleanupFDToCBMap(); |
| 172 | |
| 173 | LIST_INIT(&ready_list_); |
| 174 | LIST_INIT(&tmp_list_); |
| 175 | |
| 176 | CleanupTimeToAlarmCBMap(); |
| 177 | |
| 178 | close(read_fd_); |
| 179 | close(write_fd_); |
| 180 | close(epoll_fd_); |
| 181 | } |
| 182 | |
| 183 | // Whether a CBAandEventMask is on the ready list is determined by a non-NULL |
| 184 | // le_prev pointer (le_next being NULL indicates end of list). |
| 185 | inline void SimpleEpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) { |
| 186 | if (cb_and_mask->entry.le_prev == NULL) { |
| 187 | LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry); |
| 188 | ++ready_list_size_; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | inline void SimpleEpollServer::RemoveFromReadyList( |
| 193 | const CBAndEventMask& cb_and_mask) { |
| 194 | if (cb_and_mask.entry.le_prev != NULL) { |
| 195 | LIST_REMOVE(&cb_and_mask, entry); |
| 196 | // Clean up all the ready list states. Don't bother with the other fields |
| 197 | // as they are initialized when the CBAandEventMask is added to the ready |
| 198 | // list. This saves a few cycles in the inner loop. |
| 199 | cb_and_mask.entry.le_prev = NULL; |
| 200 | --ready_list_size_; |
| 201 | if (ready_list_size_ == 0) { |
| 202 | DCHECK(ready_list_.lh_first == NULL); |
| 203 | DCHECK(tmp_list_.lh_first == NULL); |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | void SimpleEpollServer::RegisterFD(int fd, CB* cb, int event_mask) { |
| 209 | CHECK(cb); |
| 210 | EPOLL_VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask; |
| 211 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 212 | if (cb_map_.end() != fd_i) { |
| 213 | // do we just abort, or do we just unregister the other callback? |
| 214 | // for now, lets just unregister the other callback. |
| 215 | |
| 216 | // unregister any callback that may already be registered for this FD. |
| 217 | CB* other_cb = fd_i->cb; |
| 218 | if (other_cb) { |
| 219 | // Must remove from the ready list before erasing. |
| 220 | RemoveFromReadyList(*fd_i); |
| 221 | other_cb->OnUnregistration(fd, true); |
| 222 | ModFD(fd, event_mask); |
| 223 | } else { |
| 224 | // already unregistered, so just recycle the node. |
| 225 | AddFD(fd, event_mask); |
| 226 | } |
| 227 | fd_i->cb = cb; |
| 228 | fd_i->event_mask = event_mask; |
| 229 | fd_i->events_to_fake = 0; |
| 230 | } else { |
| 231 | AddFD(fd, event_mask); |
| 232 | cb_map_.insert(CBAndEventMask(cb, event_mask, fd)); |
| 233 | } |
| 234 | |
| 235 | // set the FD to be non-blocking. |
| 236 | SetNonblocking(fd); |
| 237 | |
| 238 | cb->OnRegistration(this, fd, event_mask); |
| 239 | } |
| 240 | |
| 241 | void SimpleEpollServer::SetNonblocking(int fd) { |
| 242 | int flags = fcntl(fd, F_GETFL, 0); |
| 243 | if (flags == -1) { |
| 244 | int saved_errno = errno; |
| 245 | char buf[kErrorBufferSize]; |
| 246 | EPOLL_LOG(FATAL) << "Error " << saved_errno << " doing fcntl(" << fd |
| 247 | << ", F_GETFL, 0): " |
| 248 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 249 | } |
| 250 | if (!(flags & O_NONBLOCK)) { |
| 251 | int saved_flags = flags; |
| 252 | flags = fcntl(fd, F_SETFL, flags | O_NONBLOCK); |
| 253 | if (flags == -1) { |
| 254 | // bad. |
| 255 | int saved_errno = errno; |
| 256 | char buf[kErrorBufferSize]; |
| 257 | EPOLL_LOG(FATAL) << "Error " << saved_errno << " doing fcntl(" << fd |
| 258 | << ", F_SETFL, " << saved_flags |
| 259 | << "): " << strerror_r(saved_errno, buf, sizeof(buf)); |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | int SimpleEpollServer::epoll_wait_impl(int epfd, struct epoll_event* events, |
| 265 | int max_events, int timeout_in_ms) { |
| 266 | return epoll_wait(epfd, events, max_events, timeout_in_ms); |
| 267 | } |
| 268 | |
| 269 | void SimpleEpollServer::RegisterFDForWrite(int fd, CB* cb) { |
| 270 | RegisterFD(fd, cb, EPOLLOUT); |
| 271 | } |
| 272 | |
| 273 | void SimpleEpollServer::RegisterFDForReadWrite(int fd, CB* cb) { |
| 274 | RegisterFD(fd, cb, EPOLLIN | EPOLLOUT); |
| 275 | } |
| 276 | |
| 277 | void SimpleEpollServer::RegisterFDForRead(int fd, CB* cb) { |
| 278 | RegisterFD(fd, cb, EPOLLIN); |
| 279 | } |
| 280 | |
| 281 | void SimpleEpollServer::UnregisterFD(int fd) { |
| 282 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 283 | if (cb_map_.end() == fd_i || fd_i->cb == NULL) { |
| 284 | // Doesn't exist in server, or has gone through UnregisterFD once and still |
| 285 | // inside the callchain of OnEvent. |
| 286 | return; |
| 287 | } |
| 288 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 289 | event_recorder_.RecordUnregistration(fd); |
| 290 | #endif |
| 291 | CB* cb = fd_i->cb; |
| 292 | // Since the links are embedded within the struct, we must remove it from the |
| 293 | // list before erasing it from the hash_set. |
| 294 | RemoveFromReadyList(*fd_i); |
| 295 | DelFD(fd); |
| 296 | cb->OnUnregistration(fd, false); |
| 297 | // fd_i->cb is NULL if that fd is unregistered inside the callchain of |
| 298 | // OnEvent. Since the SimpleEpollServer needs a valid CBAndEventMask after |
| 299 | // OnEvent returns in order to add it to the ready list, we cannot have |
| 300 | // UnregisterFD erase the entry if it is in use. Thus, a NULL fd_i->cb is used |
| 301 | // as a condition that tells the SimpleEpollServer that this entry is unused |
| 302 | // at a later point. |
| 303 | if (!fd_i->in_use) { |
| 304 | cb_map_.erase(fd_i); |
| 305 | } else { |
| 306 | // Remove all trace of the registration, and just keep the node alive long |
| 307 | // enough so the code that calls OnEvent doesn't have to worry about |
| 308 | // figuring out whether the CBAndEventMask is valid or not. |
| 309 | fd_i->cb = NULL; |
| 310 | fd_i->event_mask = 0; |
| 311 | fd_i->events_to_fake = 0; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | void SimpleEpollServer::ModifyCallback(int fd, int event_mask) { |
| 316 | ModifyFD(fd, ~0, event_mask); |
| 317 | } |
| 318 | |
| 319 | void SimpleEpollServer::StopRead(int fd) { ModifyFD(fd, EPOLLIN, 0); } |
| 320 | |
| 321 | void SimpleEpollServer::StartRead(int fd) { ModifyFD(fd, 0, EPOLLIN); } |
| 322 | |
| 323 | void SimpleEpollServer::StopWrite(int fd) { ModifyFD(fd, EPOLLOUT, 0); } |
| 324 | |
| 325 | void SimpleEpollServer::StartWrite(int fd) { ModifyFD(fd, 0, EPOLLOUT); } |
| 326 | |
| 327 | void SimpleEpollServer::HandleEvent(int fd, int event_mask) { |
| 328 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 329 | event_recorder_.RecordEpollEvent(fd, event_mask); |
| 330 | #endif |
| 331 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 332 | if (fd_i == cb_map_.end() || fd_i->cb == NULL) { |
| 333 | // Ignore the event. |
| 334 | // This could occur if epoll() returns a set of events, and |
| 335 | // while processing event A (earlier) we removed the callback |
| 336 | // for event B (and are now processing event B). |
| 337 | return; |
| 338 | } |
| 339 | fd_i->events_asserted = event_mask; |
| 340 | CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); |
| 341 | AddToReadyList(cb_and_mask); |
| 342 | } |
| 343 | |
| 344 | void SimpleEpollServer::WaitForEventsAndExecuteCallbacks() { |
| 345 | if (in_wait_for_events_and_execute_callbacks_) { |
| 346 | EPOLL_LOG(DFATAL) << "Attempting to call WaitForEventsAndExecuteCallbacks" |
| 347 | " when an ancestor to the current function is already" |
| 348 | " WaitForEventsAndExecuteCallbacks!"; |
| 349 | // The line below is actually tested, but in coverage mode, |
| 350 | // we never see it. |
| 351 | return; // COV_NF_LINE |
| 352 | } |
| 353 | AutoReset<bool> recursion_guard(&in_wait_for_events_and_execute_callbacks_, |
| 354 | true); |
| 355 | if (alarm_map_.empty()) { |
| 356 | // no alarms, this is business as usual. |
| 357 | WaitForEventsAndCallHandleEvents(timeout_in_us_, events_, events_size_); |
| 358 | recorded_now_in_us_ = 0; |
| 359 | return; |
| 360 | } |
| 361 | |
| 362 | // store the 'now'. If we recomputed 'now' every iteration |
| 363 | // down below, then we might never exit that loop-- any |
| 364 | // long-running alarms might install other long-running |
| 365 | // alarms, etc. By storing it here now, we ensure that |
| 366 | // a more reasonable amount of work is done here. |
| 367 | int64_t now_in_us = NowInUsec(); |
| 368 | |
| 369 | // Get the first timeout from the alarm_map where it is |
| 370 | // stored in absolute time. |
| 371 | int64_t next_alarm_time_in_us = alarm_map_.begin()->first; |
| 372 | EPOLL_VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us |
| 373 | << " now = " << now_in_us |
| 374 | << " timeout_in_us = " << timeout_in_us_; |
| 375 | |
| 376 | int64_t wait_time_in_us; |
| 377 | int64_t alarm_timeout_in_us = next_alarm_time_in_us - now_in_us; |
| 378 | |
| 379 | // If the next alarm is sooner than the default timeout, or if there is no |
| 380 | // timeout (timeout_in_us_ == -1), wake up when the alarm should fire. |
| 381 | // Otherwise use the default timeout. |
| 382 | if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) { |
| 383 | wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64_t>(0)); |
| 384 | } else { |
| 385 | wait_time_in_us = timeout_in_us_; |
| 386 | } |
| 387 | |
| 388 | EPOLL_VLOG(4) << "wait_time_in_us = " << wait_time_in_us; |
| 389 | |
| 390 | // wait for events. |
| 391 | |
| 392 | WaitForEventsAndCallHandleEvents(wait_time_in_us, events_, events_size_); |
| 393 | CallAndReregisterAlarmEvents(); |
| 394 | recorded_now_in_us_ = 0; |
| 395 | } |
| 396 | |
| 397 | void SimpleEpollServer::SetFDReady(int fd, int events_to_fake) { |
| 398 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 399 | if (cb_map_.end() != fd_i && fd_i->cb != NULL) { |
| 400 | // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring |
| 401 | // entry mutable is insufficient because LIST_HEAD_INSERT assigns the |
| 402 | // forward pointer of the list head to the current cb_and_mask, and the |
| 403 | // compiler complains that it can't assign a const T* to a T*. |
| 404 | CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); |
| 405 | // Note that there is no clearly correct behavior here when |
| 406 | // cb_and_mask->events_to_fake != 0 and this function is called. |
| 407 | // Of the two operations: |
| 408 | // cb_and_mask->events_to_fake = events_to_fake |
| 409 | // cb_and_mask->events_to_fake |= events_to_fake |
| 410 | // the first was picked because it discourages users from calling |
| 411 | // SetFDReady repeatedly to build up the correct event set as it is more |
| 412 | // efficient to call SetFDReady once with the correct, final mask. |
| 413 | cb_and_mask->events_to_fake = events_to_fake; |
| 414 | AddToReadyList(cb_and_mask); |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | void SimpleEpollServer::SetFDNotReady(int fd) { |
| 419 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 420 | if (cb_map_.end() != fd_i) { |
| 421 | RemoveFromReadyList(*fd_i); |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | bool SimpleEpollServer::IsFDReady(int fd) const { |
| 426 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 427 | return (cb_map_.end() != fd_i && fd_i->cb != NULL && |
| 428 | fd_i->entry.le_prev != NULL); |
| 429 | } |
| 430 | |
| 431 | void SimpleEpollServer::VerifyReadyList() const { |
| 432 | int count = 0; |
| 433 | CBAndEventMask* cur = ready_list_.lh_first; |
| 434 | for (; cur; cur = cur->entry.le_next) { |
| 435 | ++count; |
| 436 | } |
| 437 | for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) { |
| 438 | ++count; |
| 439 | } |
| 440 | CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count"; |
| 441 | } |
| 442 | |
| 443 | void SimpleEpollServer::RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac) { |
| 444 | EPOLL_VLOG(4) << "RegisteringAlarm " << ac << " at : " << timeout_time_in_us; |
| 445 | CHECK(ac); |
| 446 | if (all_alarms_.find(ac) != all_alarms_.end()) { |
| 447 | EPOLL_BUG << "Alarm already exists"; |
| 448 | } |
| 449 | |
| 450 | auto alarm_iter = alarm_map_.insert(std::make_pair(timeout_time_in_us, ac)); |
| 451 | |
| 452 | all_alarms_.insert(ac); |
| 453 | // Pass the iterator to the EpollAlarmCallbackInterface. |
| 454 | ac->OnRegistration(alarm_iter, this); |
| 455 | } |
| 456 | |
| 457 | // Unregister a specific alarm callback: iterator_token must be a |
| 458 | // valid iterator. The caller must ensure the validity of the iterator. |
| 459 | void SimpleEpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) { |
| 460 | AlarmCB* cb = iterator_token->second; |
| 461 | EPOLL_VLOG(4) << "UnregisteringAlarm " << cb; |
| 462 | alarm_map_.erase(iterator_token); |
| 463 | all_alarms_.erase(cb); |
| 464 | cb->OnUnregistration(); |
| 465 | } |
| 466 | |
| 467 | SimpleEpollServer::AlarmRegToken SimpleEpollServer::ReregisterAlarm( |
| 468 | SimpleEpollServer::AlarmRegToken iterator_token, |
| 469 | int64_t timeout_time_in_us) { |
| 470 | AlarmCB* cb = iterator_token->second; |
| 471 | alarm_map_.erase(iterator_token); |
| 472 | return alarm_map_.emplace(timeout_time_in_us, cb); |
| 473 | } |
| 474 | |
| 475 | int SimpleEpollServer::NumFDsRegistered() const { |
| 476 | DCHECK_GE(cb_map_.size(), 1u); |
| 477 | // Omit the internal FD (read_fd_) |
| 478 | return cb_map_.size() - 1; |
| 479 | } |
| 480 | |
| 481 | void SimpleEpollServer::Wake() { |
| 482 | char data = 'd'; // 'd' is for data. It's good enough for me. |
| 483 | int rv = write(write_fd_, &data, 1); |
| 484 | DCHECK_EQ(rv, 1); |
| 485 | } |
| 486 | |
| 487 | int64_t SimpleEpollServer::NowInUsec() const { return WallTimeNowInUsec(); } |
| 488 | |
| 489 | int64_t SimpleEpollServer::ApproximateNowInUsec() const { |
| 490 | if (recorded_now_in_us_ != 0) { |
| 491 | return recorded_now_in_us_; |
| 492 | } |
| 493 | return this->NowInUsec(); |
| 494 | } |
| 495 | |
| 496 | std::string SimpleEpollServer::EventMaskToString(int event_mask) { |
| 497 | std::string s; |
| 498 | if (event_mask & EPOLLIN) s += "EPOLLIN "; |
| 499 | if (event_mask & EPOLLPRI) s += "EPOLLPRI "; |
| 500 | if (event_mask & EPOLLOUT) s += "EPOLLOUT "; |
| 501 | if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM "; |
| 502 | if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND "; |
| 503 | if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM "; |
| 504 | if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND "; |
| 505 | if (event_mask & EPOLLMSG) s += "EPOLLMSG "; |
| 506 | if (event_mask & EPOLLERR) s += "EPOLLERR "; |
| 507 | if (event_mask & EPOLLHUP) s += "EPOLLHUP "; |
| 508 | if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT "; |
| 509 | if (event_mask & EPOLLET) s += "EPOLLET "; |
| 510 | return s; |
| 511 | } |
| 512 | |
| 513 | void SimpleEpollServer::LogStateOnCrash() { |
| 514 | EPOLL_LOG(ERROR) |
| 515 | << "-------------------Epoll Server-------------------------"; |
| 516 | EPOLL_LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_; |
| 517 | EPOLL_LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_; |
| 518 | |
| 519 | // Log sessions with alarms. |
| 520 | EPOLL_LOG(ERROR) << alarm_map_.size() << " alarms registered."; |
| 521 | for (auto it = alarm_map_.begin(); it != alarm_map_.end(); ++it) { |
| 522 | const bool skipped = |
| 523 | alarms_reregistered_and_should_be_skipped_.find(it->second) != |
| 524 | alarms_reregistered_and_should_be_skipped_.end(); |
| 525 | EPOLL_LOG(ERROR) << "Alarm " << it->second << " registered at time " |
| 526 | << it->first << " and should be skipped = " << skipped; |
| 527 | } |
| 528 | |
| 529 | EPOLL_LOG(ERROR) << cb_map_.size() << " fd callbacks registered."; |
| 530 | for (auto it = cb_map_.begin(); it != cb_map_.end(); ++it) { |
| 531 | EPOLL_LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask |
| 532 | << " registered with cb: " << it->cb; |
| 533 | } |
| 534 | EPOLL_LOG(ERROR) |
| 535 | << "-------------------/Epoll Server------------------------"; |
| 536 | } |
| 537 | |
| 538 | //////////////////////////////////////////////////////////////////////////////// |
| 539 | //////////////////////////////////////////////////////////////////////////////// |
| 540 | |
| 541 | void SimpleEpollServer::DelFD(int fd) const { |
| 542 | struct epoll_event ee; |
| 543 | memset(&ee, 0, sizeof(ee)); |
| 544 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 545 | event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD"); |
| 546 | #endif |
| 547 | if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) { |
| 548 | int saved_errno = errno; |
| 549 | char buf[kErrorBufferSize]; |
| 550 | EPOLL_LOG(FATAL) << "Epoll set removal error for fd " << fd << ": " |
| 551 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 552 | } |
| 553 | } |
| 554 | |
| 555 | //////////////////////////////////////// |
| 556 | |
| 557 | void SimpleEpollServer::AddFD(int fd, int event_mask) const { |
| 558 | struct epoll_event ee; |
| 559 | memset(&ee, 0, sizeof(ee)); |
| 560 | ee.events = event_mask | EPOLLERR | EPOLLHUP; |
| 561 | ee.data.fd = fd; |
| 562 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 563 | event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD"); |
| 564 | #endif |
| 565 | if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) { |
| 566 | int saved_errno = errno; |
| 567 | char buf[kErrorBufferSize]; |
| 568 | EPOLL_LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": " |
| 569 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 570 | } |
| 571 | } |
| 572 | |
| 573 | //////////////////////////////////////// |
| 574 | |
| 575 | void SimpleEpollServer::ModFD(int fd, int event_mask) const { |
| 576 | struct epoll_event ee; |
| 577 | memset(&ee, 0, sizeof(ee)); |
| 578 | ee.events = event_mask | EPOLLERR | EPOLLHUP; |
| 579 | ee.data.fd = fd; |
| 580 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 581 | event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD"); |
| 582 | #endif |
| 583 | EPOLL_VLOG(3) << "modifying fd= " << fd << " " |
| 584 | << EventMaskToString(ee.events); |
| 585 | if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) { |
| 586 | int saved_errno = errno; |
| 587 | char buf[kErrorBufferSize]; |
| 588 | EPOLL_LOG(FATAL) << "Epoll set modification error for fd " << fd << ": " |
| 589 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 590 | } |
| 591 | } |
| 592 | |
| 593 | //////////////////////////////////////// |
| 594 | |
| 595 | void SimpleEpollServer::ModifyFD(int fd, int remove_event, int add_event) { |
| 596 | auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); |
| 597 | if (cb_map_.end() == fd_i) { |
| 598 | EPOLL_VLOG(2) << "Didn't find the fd " << fd << "in internal structures"; |
| 599 | return; |
| 600 | } |
| 601 | |
| 602 | if (fd_i->cb != NULL) { |
| 603 | int& event_mask = fd_i->event_mask; |
| 604 | EPOLL_VLOG(3) << "fd= " << fd |
| 605 | << " event_mask before: " << EventMaskToString(event_mask); |
| 606 | event_mask &= ~remove_event; |
| 607 | event_mask |= add_event; |
| 608 | |
| 609 | EPOLL_VLOG(3) << " event_mask after: " << EventMaskToString(event_mask); |
| 610 | |
| 611 | ModFD(fd, event_mask); |
| 612 | |
| 613 | fd_i->cb->OnModification(fd, event_mask); |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | void SimpleEpollServer::WaitForEventsAndCallHandleEvents( |
| 618 | int64_t timeout_in_us, struct epoll_event events[], int events_size) { |
| 619 | if (timeout_in_us == 0 || ready_list_.lh_first != NULL) { |
| 620 | // If ready list is not empty, then don't sleep at all. |
| 621 | timeout_in_us = 0; |
| 622 | } else if (timeout_in_us < 0) { |
| 623 | EPOLL_LOG(INFO) << "Negative epoll timeout: " << timeout_in_us |
| 624 | << "us; epoll will wait forever for events."; |
| 625 | // If timeout_in_us is < 0 we are supposed to Wait forever. This means we |
| 626 | // should set timeout_in_us to -1000 so we will |
| 627 | // Wait(-1000/1000) == Wait(-1) == Wait forever. |
| 628 | timeout_in_us = -1000; |
| 629 | } else { |
| 630 | // If timeout is specified, and the ready list is empty. |
| 631 | if (timeout_in_us < 1000) { |
| 632 | timeout_in_us = 1000; |
| 633 | } |
| 634 | } |
| 635 | const int timeout_in_ms = timeout_in_us / 1000; |
| 636 | int64_t expected_wakeup_us = NowInUsec() + timeout_in_us; |
| 637 | |
| 638 | int nfds = epoll_wait_impl(epoll_fd_, events, events_size, timeout_in_ms); |
| 639 | EPOLL_VLOG(3) << "nfds=" << nfds; |
| 640 | |
| 641 | #ifdef EPOLL_SERVER_EVENT_TRACING |
| 642 | event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds); |
| 643 | #endif |
| 644 | |
| 645 | // If you're wondering why the NowInUsec() is recorded here, the answer is |
| 646 | // simple: If we did it before the epoll_wait_impl, then the max error for |
| 647 | // the ApproximateNowInUs() call would be as large as the maximum length of |
| 648 | // epoll_wait, which can be arbitrarily long. Since this would make |
| 649 | // ApproximateNowInUs() worthless, we instead record the time -after- we've |
| 650 | // done epoll_wait, which guarantees that the maximum error is the amount of |
| 651 | // time it takes to process all the events generated by epoll_wait. |
| 652 | recorded_now_in_us_ = NowInUsec(); |
| 653 | |
| 654 | if (timeout_in_us > 0) { |
| 655 | int64_t delta = NowInUsec() - expected_wakeup_us; |
| 656 | last_delay_in_usec_ = delta > 0 ? delta : 0; |
| 657 | } else { |
| 658 | // timeout_in_us < 0 means we waited forever until an event; |
| 659 | // timeout_in_us == 0 means there was no kernel delay to track. |
| 660 | last_delay_in_usec_ = 0; |
| 661 | } |
| 662 | |
| 663 | if (nfds > 0) { |
| 664 | for (int i = 0; i < nfds; ++i) { |
| 665 | int event_mask = events[i].events; |
| 666 | int fd = events[i].data.fd; |
| 667 | HandleEvent(fd, event_mask); |
| 668 | } |
| 669 | } else if (nfds < 0) { |
| 670 | // Catch interrupted syscall and just ignore it and move on. |
| 671 | if (errno != EINTR && errno != 0) { |
| 672 | int saved_errno = errno; |
| 673 | char buf[kErrorBufferSize]; |
| 674 | EPOLL_LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: " |
| 675 | << strerror_r(saved_errno, buf, sizeof(buf)); |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | // Now run through the ready list. |
| 680 | if (ready_list_.lh_first) { |
| 681 | CallReadyListCallbacks(); |
| 682 | } |
| 683 | } |
| 684 | |
| 685 | void SimpleEpollServer::CallReadyListCallbacks() { |
| 686 | // Check pre-conditions. |
| 687 | DCHECK(tmp_list_.lh_first == NULL); |
| 688 | // Swap out the ready_list_ into the tmp_list_ before traversing the list to |
| 689 | // enable SetFDReady() to just push new items into the ready_list_. |
| 690 | std::swap(ready_list_.lh_first, tmp_list_.lh_first); |
| 691 | if (tmp_list_.lh_first) { |
| 692 | tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first; |
| 693 | EpollEvent event(0); |
| 694 | while (tmp_list_.lh_first != NULL) { |
| 695 | DCHECK_GT(ready_list_size_, 0); |
| 696 | CBAndEventMask* cb_and_mask = tmp_list_.lh_first; |
| 697 | RemoveFromReadyList(*cb_and_mask); |
| 698 | |
| 699 | event.out_ready_mask = 0; |
| 700 | event.in_events = |
| 701 | cb_and_mask->events_asserted | cb_and_mask->events_to_fake; |
| 702 | // TODO(fenix): get rid of the two separate fields in cb_and_mask. |
| 703 | cb_and_mask->events_asserted = 0; |
| 704 | cb_and_mask->events_to_fake = 0; |
| 705 | { |
| 706 | // OnEvent() may call UnRegister, so we set in_use, here. Any |
| 707 | // UnRegister call will now simply set the cb to NULL instead of |
| 708 | // invalidating the cb_and_mask object (by deleting the object in the |
| 709 | // map to which cb_and_mask refers) |
| 710 | AutoReset<bool> in_use_guard(&(cb_and_mask->in_use), true); |
| 711 | cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event); |
| 712 | } |
| 713 | |
| 714 | // Since OnEvent may have called UnregisterFD, we must check here that |
| 715 | // the callback is still valid. If it isn't, then UnregisterFD *was* |
| 716 | // called, and we should now get rid of the object. |
| 717 | if (cb_and_mask->cb == NULL) { |
| 718 | cb_map_.erase(*cb_and_mask); |
| 719 | } else if (event.out_ready_mask != 0) { |
| 720 | cb_and_mask->events_to_fake = event.out_ready_mask; |
| 721 | AddToReadyList(cb_and_mask); |
| 722 | } |
| 723 | } |
| 724 | } |
| 725 | DCHECK(tmp_list_.lh_first == NULL); |
| 726 | } |
| 727 | |
| 728 | void SimpleEpollServer::CallAndReregisterAlarmEvents() { |
| 729 | int64_t now_in_us = recorded_now_in_us_; |
| 730 | DCHECK_NE(0, recorded_now_in_us_); |
| 731 | |
| 732 | TimeToAlarmCBMap::iterator erase_it; |
| 733 | |
| 734 | // execute alarms. |
| 735 | for (auto i = alarm_map_.begin(); i != alarm_map_.end();) { |
| 736 | if (i->first > now_in_us) { |
| 737 | break; |
| 738 | } |
| 739 | AlarmCB* cb = i->second; |
| 740 | // Execute the OnAlarm() only if we did not register |
| 741 | // it in this loop itself. |
| 742 | const bool added_in_this_round = |
| 743 | alarms_reregistered_and_should_be_skipped_.find(cb) != |
| 744 | alarms_reregistered_and_should_be_skipped_.end(); |
| 745 | if (added_in_this_round) { |
| 746 | ++i; |
| 747 | continue; |
| 748 | } |
| 749 | all_alarms_.erase(cb); |
| 750 | const int64_t new_timeout_time_in_us = cb->OnAlarm(); |
| 751 | |
| 752 | erase_it = i; |
| 753 | ++i; |
| 754 | alarm_map_.erase(erase_it); |
| 755 | |
| 756 | if (new_timeout_time_in_us > 0) { |
| 757 | // We add to hash_set only if the new timeout is <= now_in_us. |
| 758 | // if timeout is > now_in_us then we have no fear that this alarm |
| 759 | // can be reexecuted in this loop, and hence we do not need to |
| 760 | // worry about a recursive loop. |
| 761 | EPOLL_DVLOG(3) << "Reregistering alarm " |
| 762 | << " " << cb << " " << new_timeout_time_in_us << " " |
| 763 | << now_in_us; |
| 764 | if (new_timeout_time_in_us <= now_in_us) { |
| 765 | alarms_reregistered_and_should_be_skipped_.insert(cb); |
| 766 | } |
| 767 | RegisterAlarm(new_timeout_time_in_us, cb); |
| 768 | } |
| 769 | } |
| 770 | alarms_reregistered_and_should_be_skipped_.clear(); |
| 771 | } |
| 772 | |
| 773 | EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) {} |
| 774 | |
| 775 | EpollAlarm::~EpollAlarm() { UnregisterIfRegistered(); } |
| 776 | |
| 777 | int64_t EpollAlarm::OnAlarm() { |
| 778 | registered_ = false; |
| 779 | return 0; |
| 780 | } |
| 781 | |
| 782 | void EpollAlarm::OnRegistration(const SimpleEpollServer::AlarmRegToken& token, |
| 783 | SimpleEpollServer* eps) { |
| 784 | DCHECK_EQ(false, registered_); |
| 785 | |
| 786 | token_ = token; |
| 787 | eps_ = eps; |
| 788 | registered_ = true; |
| 789 | } |
| 790 | |
| 791 | void EpollAlarm::OnUnregistration() { registered_ = false; } |
| 792 | |
| 793 | void EpollAlarm::OnShutdown(SimpleEpollServer* eps) { |
| 794 | registered_ = false; |
| 795 | eps_ = NULL; |
| 796 | } |
| 797 | |
| 798 | // If the alarm was registered, unregister it. |
| 799 | void EpollAlarm::UnregisterIfRegistered() { |
| 800 | if (!registered_) { |
| 801 | return; |
| 802 | } |
| 803 | |
| 804 | eps_->UnregisterAlarm(token_); |
| 805 | } |
| 806 | |
| 807 | void EpollAlarm::ReregisterAlarm(int64_t timeout_time_in_us) { |
| 808 | DCHECK(registered_); |
| 809 | token_ = eps_->ReregisterAlarm(token_, timeout_time_in_us); |
| 810 | } |
| 811 | |
| 812 | } // namespace epoll_server |