event.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413
  1. /*
  2. * The contents of this file are subject to the Mozilla Public
  3. * License Version 1.1 (the "License"); you may not use this file
  4. * except in compliance with the License. You may obtain a copy of
  5. * the License at http://www.mozilla.org/MPL/
  6. *
  7. * Software distributed under the License is distributed on an "AS
  8. * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  9. * implied. See the License for the specific language governing
  10. * rights and limitations under the License.
  11. *
  12. * The Original Code is the Netscape Portable Runtime library.
  13. *
  14. * The Initial Developer of the Original Code is Netscape
  15. * Communications Corporation. Portions created by Netscape are
  16. * Copyright (C) 1994-2000 Netscape Communications Corporation. All
  17. * Rights Reserved.
  18. *
  19. * Contributor(s): Silicon Graphics, Inc.
  20. * Yahoo! Inc.
  21. *
  22. * Alternatively, the contents of this file may be used under the
  23. * terms of the GNU General Public License Version 2 or later (the
  24. * "GPL"), in which case the provisions of the GPL are applicable
  25. * instead of those above. If you wish to allow use of your
  26. * version of this file only under the terms of the GPL and not to
  27. * allow others to use your version of this file under the MPL,
  28. * indicate your decision by deleting the provisions above and
  29. * replace them with the notice and other provisions required by
  30. * the GPL. If you do not delete the provisions above, a recipient
  31. * may use your version of this file under either the MPL or the
  32. * GPL.
  33. */
  34. #include <stdlib.h>
  35. #include <unistd.h>
  36. #include <fcntl.h>
  37. #include <string.h>
  38. #include <time.h>
  39. #include <errno.h>
  40. #include "common.h"
  41. #ifdef MD_HAVE_KQUEUE
  42. #include <sys/event.h>
  43. #endif
  44. #ifdef MD_HAVE_EPOLL
  45. #include <sys/epoll.h>
  46. #endif
  47. #if defined(USE_POLL) && !defined(MD_HAVE_POLL)
  48. /* Force poll usage if explicitly asked for it */
  49. #define MD_HAVE_POLL
  50. #endif
  51. static struct _st_seldata {
  52. fd_set fd_read_set, fd_write_set, fd_exception_set;
  53. int fd_ref_cnts[FD_SETSIZE][3];
  54. int maxfd;
  55. } *_st_select_data;
  56. #define _ST_SELECT_MAX_OSFD (_st_select_data->maxfd)
  57. #define _ST_SELECT_READ_SET (_st_select_data->fd_read_set)
  58. #define _ST_SELECT_WRITE_SET (_st_select_data->fd_write_set)
  59. #define _ST_SELECT_EXCEP_SET (_st_select_data->fd_exception_set)
  60. #define _ST_SELECT_READ_CNT(fd) (_st_select_data->fd_ref_cnts[fd][0])
  61. #define _ST_SELECT_WRITE_CNT(fd) (_st_select_data->fd_ref_cnts[fd][1])
  62. #define _ST_SELECT_EXCEP_CNT(fd) (_st_select_data->fd_ref_cnts[fd][2])
  63. #ifdef MD_HAVE_POLL
  64. static struct _st_polldata {
  65. struct pollfd *pollfds;
  66. int pollfds_size;
  67. int fdcnt;
  68. } *_st_poll_data;
  69. #define _ST_POLL_OSFD_CNT (_st_poll_data->fdcnt)
  70. #define _ST_POLLFDS (_st_poll_data->pollfds)
  71. #define _ST_POLLFDS_SIZE (_st_poll_data->pollfds_size)
  72. #endif /* MD_HAVE_POLL */
  73. #ifdef MD_HAVE_KQUEUE
  74. typedef struct _kq_fd_data {
  75. int rd_ref_cnt;
  76. int wr_ref_cnt;
  77. int revents;
  78. } _kq_fd_data_t;
  79. static struct _st_kqdata {
  80. _kq_fd_data_t *fd_data;
  81. struct kevent *evtlist;
  82. struct kevent *addlist;
  83. struct kevent *dellist;
  84. int fd_data_size;
  85. int evtlist_size;
  86. int addlist_size;
  87. int addlist_cnt;
  88. int dellist_size;
  89. int dellist_cnt;
  90. int kq;
  91. pid_t pid;
  92. } *_st_kq_data;
  93. #ifndef ST_KQ_MIN_EVTLIST_SIZE
  94. #define ST_KQ_MIN_EVTLIST_SIZE 64
  95. #endif
  96. #define _ST_KQ_READ_CNT(fd) (_st_kq_data->fd_data[fd].rd_ref_cnt)
  97. #define _ST_KQ_WRITE_CNT(fd) (_st_kq_data->fd_data[fd].wr_ref_cnt)
  98. #define _ST_KQ_REVENTS(fd) (_st_kq_data->fd_data[fd].revents)
  99. #endif /* MD_HAVE_KQUEUE */
  100. #ifdef MD_HAVE_EPOLL
  101. typedef struct _epoll_fd_data {
  102. int rd_ref_cnt;
  103. int wr_ref_cnt;
  104. int ex_ref_cnt;
  105. int revents;
  106. } _epoll_fd_data_t;
  107. static struct _st_epolldata {
  108. _epoll_fd_data_t *fd_data;
  109. struct epoll_event *evtlist;
  110. int fd_data_size;
  111. int evtlist_size;
  112. int evtlist_cnt;
  113. int fd_hint;
  114. int epfd;
  115. pid_t pid;
  116. } *_st_epoll_data;
  117. #ifndef ST_EPOLL_EVTLIST_SIZE
  118. /* Not a limit, just a hint */
  119. #define ST_EPOLL_EVTLIST_SIZE 4096
  120. #endif
  121. #define _ST_EPOLL_READ_CNT(fd) (_st_epoll_data->fd_data[fd].rd_ref_cnt)
  122. #define _ST_EPOLL_WRITE_CNT(fd) (_st_epoll_data->fd_data[fd].wr_ref_cnt)
  123. #define _ST_EPOLL_EXCEP_CNT(fd) (_st_epoll_data->fd_data[fd].ex_ref_cnt)
  124. #define _ST_EPOLL_REVENTS(fd) (_st_epoll_data->fd_data[fd].revents)
  125. #define _ST_EPOLL_READ_BIT(fd) (_ST_EPOLL_READ_CNT(fd) ? EPOLLIN : 0)
  126. #define _ST_EPOLL_WRITE_BIT(fd) (_ST_EPOLL_WRITE_CNT(fd) ? EPOLLOUT : 0)
  127. #define _ST_EPOLL_EXCEP_BIT(fd) (_ST_EPOLL_EXCEP_CNT(fd) ? EPOLLPRI : 0)
  128. #define _ST_EPOLL_EVENTS(fd) \
  129. (_ST_EPOLL_READ_BIT(fd)|_ST_EPOLL_WRITE_BIT(fd)|_ST_EPOLL_EXCEP_BIT(fd))
  130. #endif /* MD_HAVE_EPOLL */
  131. _st_eventsys_t *_st_eventsys = NULL;
  132. /*****************************************
  133. * select event system
  134. */
  135. ST_HIDDEN int _st_select_init(void)
  136. {
  137. _st_select_data = (struct _st_seldata *) malloc(sizeof(*_st_select_data));
  138. if (!_st_select_data)
  139. return -1;
  140. memset(_st_select_data, 0, sizeof(*_st_select_data));
  141. _st_select_data->maxfd = -1;
  142. return 0;
  143. }
  144. ST_HIDDEN int _st_select_pollset_add(struct pollfd *pds, int npds)
  145. {
  146. struct pollfd *pd;
  147. struct pollfd *epd = pds + npds;
  148. /* Do checks up front */
  149. for (pd = pds; pd < epd; pd++) {
  150. if (pd->fd < 0 || pd->fd >= FD_SETSIZE || !pd->events ||
  151. (pd->events & ~(POLLIN | POLLOUT | POLLPRI))) {
  152. errno = EINVAL;
  153. return -1;
  154. }
  155. }
  156. for (pd = pds; pd < epd; pd++) {
  157. if (pd->events & POLLIN) {
  158. FD_SET(pd->fd, &_ST_SELECT_READ_SET);
  159. _ST_SELECT_READ_CNT(pd->fd)++;
  160. }
  161. if (pd->events & POLLOUT) {
  162. FD_SET(pd->fd, &_ST_SELECT_WRITE_SET);
  163. _ST_SELECT_WRITE_CNT(pd->fd)++;
  164. }
  165. if (pd->events & POLLPRI) {
  166. FD_SET(pd->fd, &_ST_SELECT_EXCEP_SET);
  167. _ST_SELECT_EXCEP_CNT(pd->fd)++;
  168. }
  169. if (_ST_SELECT_MAX_OSFD < pd->fd)
  170. _ST_SELECT_MAX_OSFD = pd->fd;
  171. }
  172. return 0;
  173. }
  174. ST_HIDDEN void _st_select_pollset_del(struct pollfd *pds, int npds)
  175. {
  176. struct pollfd *pd;
  177. struct pollfd *epd = pds + npds;
  178. for (pd = pds; pd < epd; pd++) {
  179. if (pd->events & POLLIN) {
  180. if (--_ST_SELECT_READ_CNT(pd->fd) == 0)
  181. FD_CLR(pd->fd, &_ST_SELECT_READ_SET);
  182. }
  183. if (pd->events & POLLOUT) {
  184. if (--_ST_SELECT_WRITE_CNT(pd->fd) == 0)
  185. FD_CLR(pd->fd, &_ST_SELECT_WRITE_SET);
  186. }
  187. if (pd->events & POLLPRI) {
  188. if (--_ST_SELECT_EXCEP_CNT(pd->fd) == 0)
  189. FD_CLR(pd->fd, &_ST_SELECT_EXCEP_SET);
  190. }
  191. }
  192. }
  193. ST_HIDDEN void _st_select_find_bad_fd(void)
  194. {
  195. _st_clist_t *q;
  196. _st_pollq_t *pq;
  197. int notify;
  198. struct pollfd *pds, *epds;
  199. int pq_max_osfd, osfd;
  200. short events;
  201. _ST_SELECT_MAX_OSFD = -1;
  202. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  203. pq = _ST_POLLQUEUE_PTR(q);
  204. notify = 0;
  205. epds = pq->pds + pq->npds;
  206. pq_max_osfd = -1;
  207. for (pds = pq->pds; pds < epds; pds++) {
  208. osfd = pds->fd;
  209. pds->revents = 0;
  210. if (pds->events == 0)
  211. continue;
  212. if (fcntl(osfd, F_GETFL, 0) < 0) {
  213. pds->revents = POLLNVAL;
  214. notify = 1;
  215. }
  216. if (osfd > pq_max_osfd) {
  217. pq_max_osfd = osfd;
  218. }
  219. }
  220. if (notify) {
  221. ST_REMOVE_LINK(&pq->links);
  222. pq->on_ioq = 0;
  223. /*
  224. * Decrement the count of descriptors for each descriptor/event
  225. * because this I/O request is being removed from the ioq
  226. */
  227. for (pds = pq->pds; pds < epds; pds++) {
  228. osfd = pds->fd;
  229. events = pds->events;
  230. if (events & POLLIN) {
  231. if (--_ST_SELECT_READ_CNT(osfd) == 0) {
  232. FD_CLR(osfd, &_ST_SELECT_READ_SET);
  233. }
  234. }
  235. if (events & POLLOUT) {
  236. if (--_ST_SELECT_WRITE_CNT(osfd) == 0) {
  237. FD_CLR(osfd, &_ST_SELECT_WRITE_SET);
  238. }
  239. }
  240. if (events & POLLPRI) {
  241. if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) {
  242. FD_CLR(osfd, &_ST_SELECT_EXCEP_SET);
  243. }
  244. }
  245. }
  246. if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
  247. _ST_DEL_SLEEPQ(pq->thread);
  248. pq->thread->state = _ST_ST_RUNNABLE;
  249. _ST_ADD_RUNQ(pq->thread);
  250. } else {
  251. if (_ST_SELECT_MAX_OSFD < pq_max_osfd)
  252. _ST_SELECT_MAX_OSFD = pq_max_osfd;
  253. }
  254. }
  255. }
  256. ST_HIDDEN void _st_select_dispatch(void)
  257. {
  258. struct timeval timeout, *tvp;
  259. fd_set r, w, e;
  260. fd_set *rp, *wp, *ep;
  261. int nfd, pq_max_osfd, osfd;
  262. _st_clist_t *q;
  263. st_utime_t min_timeout;
  264. _st_pollq_t *pq;
  265. int notify;
  266. struct pollfd *pds, *epds;
  267. short events, revents;
  268. /*
  269. * Assignment of fd_sets
  270. */
  271. r = _ST_SELECT_READ_SET;
  272. w = _ST_SELECT_WRITE_SET;
  273. e = _ST_SELECT_EXCEP_SET;
  274. rp = &r;
  275. wp = &w;
  276. ep = &e;
  277. if (_ST_SLEEPQ == NULL) {
  278. tvp = NULL;
  279. } else {
  280. min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
  281. (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
  282. timeout.tv_sec = (int) (min_timeout / 1000000);
  283. timeout.tv_usec = (int) (min_timeout % 1000000);
  284. tvp = &timeout;
  285. }
  286. /* Check for I/O operations */
  287. nfd = select(_ST_SELECT_MAX_OSFD + 1, rp, wp, ep, tvp);
  288. /* Notify threads that are associated with the selected descriptors */
  289. if (nfd > 0) {
  290. _ST_SELECT_MAX_OSFD = -1;
  291. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  292. pq = _ST_POLLQUEUE_PTR(q);
  293. notify = 0;
  294. epds = pq->pds + pq->npds;
  295. pq_max_osfd = -1;
  296. for (pds = pq->pds; pds < epds; pds++) {
  297. osfd = pds->fd;
  298. events = pds->events;
  299. revents = 0;
  300. if ((events & POLLIN) && FD_ISSET(osfd, rp)) {
  301. revents |= POLLIN;
  302. }
  303. if ((events & POLLOUT) && FD_ISSET(osfd, wp)) {
  304. revents |= POLLOUT;
  305. }
  306. if ((events & POLLPRI) && FD_ISSET(osfd, ep)) {
  307. revents |= POLLPRI;
  308. }
  309. pds->revents = revents;
  310. if (revents) {
  311. notify = 1;
  312. }
  313. if (osfd > pq_max_osfd) {
  314. pq_max_osfd = osfd;
  315. }
  316. }
  317. if (notify) {
  318. ST_REMOVE_LINK(&pq->links);
  319. pq->on_ioq = 0;
  320. /*
  321. * Decrement the count of descriptors for each descriptor/event
  322. * because this I/O request is being removed from the ioq
  323. */
  324. for (pds = pq->pds; pds < epds; pds++) {
  325. osfd = pds->fd;
  326. events = pds->events;
  327. if (events & POLLIN) {
  328. if (--_ST_SELECT_READ_CNT(osfd) == 0) {
  329. FD_CLR(osfd, &_ST_SELECT_READ_SET);
  330. }
  331. }
  332. if (events & POLLOUT) {
  333. if (--_ST_SELECT_WRITE_CNT(osfd) == 0) {
  334. FD_CLR(osfd, &_ST_SELECT_WRITE_SET);
  335. }
  336. }
  337. if (events & POLLPRI) {
  338. if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) {
  339. FD_CLR(osfd, &_ST_SELECT_EXCEP_SET);
  340. }
  341. }
  342. }
  343. if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
  344. _ST_DEL_SLEEPQ(pq->thread);
  345. pq->thread->state = _ST_ST_RUNNABLE;
  346. _ST_ADD_RUNQ(pq->thread);
  347. } else {
  348. if (_ST_SELECT_MAX_OSFD < pq_max_osfd)
  349. _ST_SELECT_MAX_OSFD = pq_max_osfd;
  350. }
  351. }
  352. } else if (nfd < 0) {
  353. /*
  354. * It can happen when a thread closes file descriptor
  355. * that is being used by some other thread -- BAD!
  356. */
  357. if (errno == EBADF)
  358. _st_select_find_bad_fd();
  359. }
  360. }
  361. ST_HIDDEN int _st_select_fd_new(int osfd)
  362. {
  363. if (osfd >= FD_SETSIZE) {
  364. errno = EMFILE;
  365. return -1;
  366. }
  367. return 0;
  368. }
  369. ST_HIDDEN int _st_select_fd_close(int osfd)
  370. {
  371. if (_ST_SELECT_READ_CNT(osfd) || _ST_SELECT_WRITE_CNT(osfd) ||
  372. _ST_SELECT_EXCEP_CNT(osfd)) {
  373. errno = EBUSY;
  374. return -1;
  375. }
  376. return 0;
  377. }
  378. ST_HIDDEN int _st_select_fd_getlimit(void)
  379. {
  380. return FD_SETSIZE;
  381. }
  382. static _st_eventsys_t _st_select_eventsys = {
  383. "select",
  384. ST_EVENTSYS_SELECT,
  385. _st_select_init,
  386. _st_select_dispatch,
  387. _st_select_pollset_add,
  388. _st_select_pollset_del,
  389. _st_select_fd_new,
  390. _st_select_fd_close,
  391. _st_select_fd_getlimit
  392. };
  393. #ifdef MD_HAVE_POLL
  394. /*****************************************
  395. * poll event system
  396. */
  397. ST_HIDDEN int _st_poll_init(void)
  398. {
  399. _st_poll_data = (struct _st_polldata *) malloc(sizeof(*_st_poll_data));
  400. if (!_st_poll_data)
  401. return -1;
  402. _ST_POLLFDS = (struct pollfd *) malloc(ST_MIN_POLLFDS_SIZE *
  403. sizeof(struct pollfd));
  404. if (!_ST_POLLFDS) {
  405. free(_st_poll_data);
  406. _st_poll_data = NULL;
  407. return -1;
  408. }
  409. _ST_POLLFDS_SIZE = ST_MIN_POLLFDS_SIZE;
  410. _ST_POLL_OSFD_CNT = 0;
  411. return 0;
  412. }
  413. ST_HIDDEN int _st_poll_pollset_add(struct pollfd *pds, int npds)
  414. {
  415. struct pollfd *pd;
  416. struct pollfd *epd = pds + npds;
  417. for (pd = pds; pd < epd; pd++) {
  418. if (pd->fd < 0 || !pd->events) {
  419. errno = EINVAL;
  420. return -1;
  421. }
  422. }
  423. _ST_POLL_OSFD_CNT += npds;
  424. return 0;
  425. }
  426. /* ARGSUSED */
  427. ST_HIDDEN void _st_poll_pollset_del(struct pollfd *pds, int npds)
  428. {
  429. _ST_POLL_OSFD_CNT -= npds;
  430. ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
  431. }
  432. ST_HIDDEN void _st_poll_dispatch(void)
  433. {
  434. int timeout, nfd;
  435. _st_clist_t *q;
  436. st_utime_t min_timeout;
  437. _st_pollq_t *pq;
  438. struct pollfd *pds, *epds, *pollfds;
  439. /*
  440. * Build up the array of struct pollfd to wait on.
  441. * If existing array is not big enough, release it and allocate a new one.
  442. */
  443. ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
  444. if (_ST_POLL_OSFD_CNT > _ST_POLLFDS_SIZE) {
  445. free(_ST_POLLFDS);
  446. _ST_POLLFDS = (struct pollfd *) malloc((_ST_POLL_OSFD_CNT + 10) *
  447. sizeof(struct pollfd));
  448. ST_ASSERT(_ST_POLLFDS != NULL);
  449. _ST_POLLFDS_SIZE = _ST_POLL_OSFD_CNT + 10;
  450. }
  451. pollfds = _ST_POLLFDS;
  452. /* Gather all descriptors into one array */
  453. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  454. pq = _ST_POLLQUEUE_PTR(q);
  455. memcpy(pollfds, pq->pds, sizeof(struct pollfd) * pq->npds);
  456. pollfds += pq->npds;
  457. }
  458. ST_ASSERT(pollfds <= _ST_POLLFDS + _ST_POLLFDS_SIZE);
  459. if (_ST_SLEEPQ == NULL) {
  460. timeout = -1;
  461. } else {
  462. min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
  463. (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
  464. timeout = (int) (min_timeout / 1000);
  465. }
  466. /* Check for I/O operations */
  467. nfd = poll(_ST_POLLFDS, _ST_POLL_OSFD_CNT, timeout);
  468. /* Notify threads that are associated with the selected descriptors */
  469. if (nfd > 0) {
  470. pollfds = _ST_POLLFDS;
  471. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  472. pq = _ST_POLLQUEUE_PTR(q);
  473. epds = pollfds + pq->npds;
  474. for (pds = pollfds; pds < epds; pds++) {
  475. if (pds->revents)
  476. break;
  477. }
  478. if (pds < epds) {
  479. memcpy(pq->pds, pollfds, sizeof(struct pollfd) * pq->npds);
  480. ST_REMOVE_LINK(&pq->links);
  481. pq->on_ioq = 0;
  482. if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
  483. _ST_DEL_SLEEPQ(pq->thread);
  484. pq->thread->state = _ST_ST_RUNNABLE;
  485. _ST_ADD_RUNQ(pq->thread);
  486. _ST_POLL_OSFD_CNT -= pq->npds;
  487. ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
  488. }
  489. pollfds = epds;
  490. }
  491. }
  492. }
  493. /* ARGSUSED */
  494. ST_HIDDEN int _st_poll_fd_new(int osfd)
  495. {
  496. return 0;
  497. }
  498. /* ARGSUSED */
  499. ST_HIDDEN int _st_poll_fd_close(int osfd)
  500. {
  501. /*
  502. * We don't maintain I/O counts for poll event system
  503. * so nothing to check here.
  504. */
  505. return 0;
  506. }
  507. ST_HIDDEN int _st_poll_fd_getlimit(void)
  508. {
  509. /* zero means no specific limit */
  510. return 0;
  511. }
  512. static _st_eventsys_t _st_poll_eventsys = {
  513. "poll",
  514. ST_EVENTSYS_POLL,
  515. _st_poll_init,
  516. _st_poll_dispatch,
  517. _st_poll_pollset_add,
  518. _st_poll_pollset_del,
  519. _st_poll_fd_new,
  520. _st_poll_fd_close,
  521. _st_poll_fd_getlimit
  522. };
  523. #endif /* MD_HAVE_POLL */
  524. #ifdef MD_HAVE_KQUEUE
  525. /*****************************************
  526. * kqueue event system
  527. */
  528. ST_HIDDEN int _st_kq_init(void)
  529. {
  530. int err = 0;
  531. int rv = 0;
  532. _st_kq_data = (struct _st_kqdata *) calloc(1, sizeof(*_st_kq_data));
  533. if (!_st_kq_data)
  534. return -1;
  535. if ((_st_kq_data->kq = kqueue()) < 0) {
  536. err = errno;
  537. rv = -1;
  538. goto cleanup_kq;
  539. }
  540. fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC);
  541. _st_kq_data->pid = getpid();
  542. /*
  543. * Allocate file descriptor data array.
  544. * FD_SETSIZE looks like good initial size.
  545. */
  546. _st_kq_data->fd_data_size = FD_SETSIZE;
  547. _st_kq_data->fd_data = (_kq_fd_data_t *)calloc(_st_kq_data->fd_data_size, sizeof(_kq_fd_data_t));
  548. if (!_st_kq_data->fd_data) {
  549. err = errno;
  550. rv = -1;
  551. goto cleanup_kq;
  552. }
  553. /* Allocate event lists */
  554. _st_kq_data->evtlist_size = ST_KQ_MIN_EVTLIST_SIZE;
  555. _st_kq_data->evtlist = (struct kevent *)malloc(_st_kq_data->evtlist_size * sizeof(struct kevent));
  556. _st_kq_data->addlist_size = ST_KQ_MIN_EVTLIST_SIZE;
  557. _st_kq_data->addlist = (struct kevent *)malloc(_st_kq_data->addlist_size * sizeof(struct kevent));
  558. _st_kq_data->dellist_size = ST_KQ_MIN_EVTLIST_SIZE;
  559. _st_kq_data->dellist = (struct kevent *)malloc(_st_kq_data->dellist_size * sizeof(struct kevent));
  560. if (!_st_kq_data->evtlist || !_st_kq_data->addlist ||
  561. !_st_kq_data->dellist) {
  562. err = ENOMEM;
  563. rv = -1;
  564. }
  565. cleanup_kq:
  566. if (rv < 0) {
  567. if (_st_kq_data->kq >= 0)
  568. close(_st_kq_data->kq);
  569. free(_st_kq_data->fd_data);
  570. free(_st_kq_data->evtlist);
  571. free(_st_kq_data->addlist);
  572. free(_st_kq_data->dellist);
  573. free(_st_kq_data);
  574. _st_kq_data = NULL;
  575. errno = err;
  576. }
  577. return rv;
  578. }
  579. ST_HIDDEN int _st_kq_fd_data_expand(int maxfd)
  580. {
  581. _kq_fd_data_t *ptr;
  582. int n = _st_kq_data->fd_data_size;
  583. while (maxfd >= n)
  584. n <<= 1;
  585. ptr = (_kq_fd_data_t *)realloc(_st_kq_data->fd_data, n * sizeof(_kq_fd_data_t));
  586. if (!ptr)
  587. return -1;
  588. memset(ptr + _st_kq_data->fd_data_size, 0, (n - _st_kq_data->fd_data_size) * sizeof(_kq_fd_data_t));
  589. _st_kq_data->fd_data = ptr;
  590. _st_kq_data->fd_data_size = n;
  591. return 0;
  592. }
  593. ST_HIDDEN int _st_kq_addlist_expand(int avail)
  594. {
  595. struct kevent *ptr;
  596. int n = _st_kq_data->addlist_size;
  597. while (avail > n - _st_kq_data->addlist_cnt)
  598. n <<= 1;
  599. ptr = (struct kevent *)realloc(_st_kq_data->addlist, n * sizeof(struct kevent));
  600. if (!ptr)
  601. return -1;
  602. _st_kq_data->addlist = ptr;
  603. _st_kq_data->addlist_size = n;
  604. /*
  605. * Try to expand the result event list too
  606. * (although we don't have to do it).
  607. */
  608. ptr = (struct kevent *)realloc(_st_kq_data->evtlist, n * sizeof(struct kevent));
  609. if (ptr) {
  610. _st_kq_data->evtlist = ptr;
  611. _st_kq_data->evtlist_size = n;
  612. }
  613. return 0;
  614. }
  615. ST_HIDDEN void _st_kq_addlist_add(const struct kevent *kev)
  616. {
  617. ST_ASSERT(_st_kq_data->addlist_cnt < _st_kq_data->addlist_size);
  618. memcpy(_st_kq_data->addlist + _st_kq_data->addlist_cnt, kev, sizeof(struct kevent));
  619. _st_kq_data->addlist_cnt++;
  620. }
  621. ST_HIDDEN void _st_kq_dellist_add(const struct kevent *kev)
  622. {
  623. int n = _st_kq_data->dellist_size;
  624. if (_st_kq_data->dellist_cnt >= n) {
  625. struct kevent *ptr;
  626. n <<= 1;
  627. ptr = (struct kevent *)realloc(_st_kq_data->dellist, n * sizeof(struct kevent));
  628. if (!ptr) {
  629. /* See comment in _st_kq_pollset_del() */
  630. return;
  631. }
  632. _st_kq_data->dellist = ptr;
  633. _st_kq_data->dellist_size = n;
  634. }
  635. memcpy(_st_kq_data->dellist + _st_kq_data->dellist_cnt, kev, sizeof(struct kevent));
  636. _st_kq_data->dellist_cnt++;
  637. }
  638. ST_HIDDEN int _st_kq_pollset_add(struct pollfd *pds, int npds)
  639. {
  640. struct kevent kev;
  641. struct pollfd *pd;
  642. struct pollfd *epd = pds + npds;
  643. /*
  644. * Pollset adding is "atomic". That is, either it succeeded for
  645. * all descriptors in the set or it failed. It means that we
  646. * need to do all the checks up front so we don't have to
  647. * "unwind" if adding of one of the descriptors failed.
  648. */
  649. for (pd = pds; pd < epd; pd++) {
  650. /* POLLIN and/or POLLOUT must be set, but nothing else */
  651. if (pd->fd < 0 || !pd->events || (pd->events & ~(POLLIN | POLLOUT))) {
  652. errno = EINVAL;
  653. return -1;
  654. }
  655. if (pd->fd >= _st_kq_data->fd_data_size &&
  656. _st_kq_fd_data_expand(pd->fd) < 0)
  657. return -1;
  658. }
  659. /*
  660. * Make sure we have enough room in the addlist for twice as many
  661. * descriptors as in the pollset (for both READ and WRITE filters).
  662. */
  663. npds <<= 1;
  664. if (npds > _st_kq_data->addlist_size - _st_kq_data->addlist_cnt && _st_kq_addlist_expand(npds) < 0)
  665. return -1;
  666. for (pd = pds; pd < epd; pd++) {
  667. if ((pd->events & POLLIN) && (_ST_KQ_READ_CNT(pd->fd)++ == 0)) {
  668. memset(&kev, 0, sizeof(kev));
  669. kev.ident = pd->fd;
  670. kev.filter = EVFILT_READ;
  671. #ifdef NOTE_EOF
  672. /* Make it behave like select() and poll() */
  673. kev.fflags = NOTE_EOF;
  674. #endif
  675. kev.flags = (EV_ADD | EV_ONESHOT);
  676. _st_kq_addlist_add(&kev);
  677. }
  678. if ((pd->events & POLLOUT) && (_ST_KQ_WRITE_CNT(pd->fd)++ == 0)) {
  679. memset(&kev, 0, sizeof(kev));
  680. kev.ident = pd->fd;
  681. kev.filter = EVFILT_WRITE;
  682. kev.flags = (EV_ADD | EV_ONESHOT);
  683. _st_kq_addlist_add(&kev);
  684. }
  685. }
  686. return 0;
  687. }
  688. ST_HIDDEN void _st_kq_pollset_del(struct pollfd *pds, int npds)
  689. {
  690. struct kevent kev;
  691. struct pollfd *pd;
  692. struct pollfd *epd = pds + npds;
  693. /*
  694. * It's OK if deleting fails because a descriptor will either be
  695. * closed or fire only once (we set EV_ONESHOT flag).
  696. */
  697. _st_kq_data->dellist_cnt = 0;
  698. for (pd = pds; pd < epd; pd++) {
  699. if ((pd->events & POLLIN) && (--_ST_KQ_READ_CNT(pd->fd) == 0)) {
  700. memset(&kev, 0, sizeof(kev));
  701. kev.ident = pd->fd;
  702. kev.filter = EVFILT_READ;
  703. kev.flags = EV_DELETE;
  704. _st_kq_dellist_add(&kev);
  705. }
  706. if ((pd->events & POLLOUT) && (--_ST_KQ_WRITE_CNT(pd->fd) == 0)) {
  707. memset(&kev, 0, sizeof(kev));
  708. kev.ident = pd->fd;
  709. kev.filter = EVFILT_WRITE;
  710. kev.flags = EV_DELETE;
  711. _st_kq_dellist_add(&kev);
  712. }
  713. }
  714. if (_st_kq_data->dellist_cnt > 0) {
  715. /*
  716. * We do "synchronous" kqueue deletes to avoid deleting
  717. * closed descriptors and other possible problems.
  718. */
  719. int rv;
  720. do {
  721. /* This kevent() won't block since result list size is 0 */
  722. rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, _st_kq_data->dellist_cnt, NULL, 0, NULL);
  723. } while (rv < 0 && errno == EINTR);
  724. }
  725. }
  726. ST_HIDDEN void _st_kq_dispatch(void)
  727. {
  728. struct timespec timeout, *tsp;
  729. struct kevent kev;
  730. st_utime_t min_timeout;
  731. _st_clist_t *q;
  732. _st_pollq_t *pq;
  733. struct pollfd *pds, *epds;
  734. int nfd, i, osfd, notify, filter;
  735. short events, revents;
  736. if (_ST_SLEEPQ == NULL) {
  737. tsp = NULL;
  738. } else {
  739. min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
  740. timeout.tv_sec = (time_t) (min_timeout / 1000000);
  741. timeout.tv_nsec = (long) ((min_timeout % 1000000) * 1000);
  742. tsp = &timeout;
  743. }
  744. retry_kevent:
  745. /* Check for I/O operations */
  746. nfd = kevent(_st_kq_data->kq,
  747. _st_kq_data->addlist, _st_kq_data->addlist_cnt,
  748. _st_kq_data->evtlist, _st_kq_data->evtlist_size, tsp);
  749. _st_kq_data->addlist_cnt = 0;
  750. if (nfd > 0) {
  751. for (i = 0; i < nfd; i++) {
  752. osfd = _st_kq_data->evtlist[i].ident;
  753. filter = _st_kq_data->evtlist[i].filter;
  754. if (filter == EVFILT_READ) {
  755. _ST_KQ_REVENTS(osfd) |= POLLIN;
  756. } else if (filter == EVFILT_WRITE) {
  757. _ST_KQ_REVENTS(osfd) |= POLLOUT;
  758. }
  759. if (_st_kq_data->evtlist[i].flags & EV_ERROR) {
  760. if (_st_kq_data->evtlist[i].data == EBADF) {
  761. _ST_KQ_REVENTS(osfd) |= POLLNVAL;
  762. } else {
  763. _ST_KQ_REVENTS(osfd) |= POLLERR;
  764. }
  765. }
  766. }
  767. _st_kq_data->dellist_cnt = 0;
  768. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  769. pq = _ST_POLLQUEUE_PTR(q);
  770. notify = 0;
  771. epds = pq->pds + pq->npds;
  772. for (pds = pq->pds; pds < epds; pds++) {
  773. osfd = pds->fd;
  774. events = pds->events;
  775. revents = (short)(_ST_KQ_REVENTS(osfd) & ~(POLLIN | POLLOUT));
  776. if ((events & POLLIN) && (_ST_KQ_REVENTS(osfd) & POLLIN)) {
  777. revents |= POLLIN;
  778. }
  779. if ((events & POLLOUT) && (_ST_KQ_REVENTS(osfd) & POLLOUT)) {
  780. revents |= POLLOUT;
  781. }
  782. pds->revents = revents;
  783. if (revents) {
  784. notify = 1;
  785. }
  786. }
  787. if (notify) {
  788. ST_REMOVE_LINK(&pq->links);
  789. pq->on_ioq = 0;
  790. for (pds = pq->pds; pds < epds; pds++) {
  791. osfd = pds->fd;
  792. events = pds->events;
  793. /*
  794. * We set EV_ONESHOT flag so we only need to delete
  795. * descriptor if it didn't fire.
  796. */
  797. if ((events & POLLIN) && (--_ST_KQ_READ_CNT(osfd) == 0) && ((_ST_KQ_REVENTS(osfd) & POLLIN) == 0)) {
  798. memset(&kev, 0, sizeof(kev));
  799. kev.ident = osfd;
  800. kev.filter = EVFILT_READ;
  801. kev.flags = EV_DELETE;
  802. _st_kq_dellist_add(&kev);
  803. }
  804. if ((events & POLLOUT) && (--_ST_KQ_WRITE_CNT(osfd) == 0) && ((_ST_KQ_REVENTS(osfd) & POLLOUT) == 0)) {
  805. memset(&kev, 0, sizeof(kev));
  806. kev.ident = osfd;
  807. kev.filter = EVFILT_WRITE;
  808. kev.flags = EV_DELETE;
  809. _st_kq_dellist_add(&kev);
  810. }
  811. }
  812. if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
  813. _ST_DEL_SLEEPQ(pq->thread);
  814. pq->thread->state = _ST_ST_RUNNABLE;
  815. _ST_ADD_RUNQ(pq->thread);
  816. }
  817. }
  818. if (_st_kq_data->dellist_cnt > 0) {
  819. int rv;
  820. do {
  821. /* This kevent() won't block since result list size is 0 */
  822. rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, _st_kq_data->dellist_cnt, NULL, 0, NULL);
  823. } while (rv < 0 && errno == EINTR);
  824. }
  825. for (i = 0; i < nfd; i++) {
  826. osfd = _st_kq_data->evtlist[i].ident;
  827. _ST_KQ_REVENTS(osfd) = 0;
  828. }
  829. } else if (nfd < 0) {
  830. if (errno == EBADF && _st_kq_data->pid != getpid()) {
  831. /* We probably forked, reinitialize kqueue */
  832. if ((_st_kq_data->kq = kqueue()) < 0) {
  833. /* There is nothing we can do here, will retry later */
  834. return;
  835. }
  836. fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC);
  837. _st_kq_data->pid = getpid();
  838. /* Re-register all descriptors on ioq with new kqueue */
  839. memset(_st_kq_data->fd_data, 0, _st_kq_data->fd_data_size * sizeof(_kq_fd_data_t));
  840. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  841. pq = _ST_POLLQUEUE_PTR(q);
  842. _st_kq_pollset_add(pq->pds, pq->npds);
  843. }
  844. goto retry_kevent;
  845. }
  846. }
  847. }
  848. ST_HIDDEN int _st_kq_fd_new(int osfd)
  849. {
  850. if (osfd >= _st_kq_data->fd_data_size && _st_kq_fd_data_expand(osfd) < 0)
  851. return -1;
  852. return 0;
  853. }
  854. ST_HIDDEN int _st_kq_fd_close(int osfd)
  855. {
  856. if (_ST_KQ_READ_CNT(osfd) || _ST_KQ_WRITE_CNT(osfd)) {
  857. errno = EBUSY;
  858. return -1;
  859. }
  860. return 0;
  861. }
  862. ST_HIDDEN int _st_kq_fd_getlimit(void)
  863. {
  864. /* zero means no specific limit */
  865. return 0;
  866. }
  867. static _st_eventsys_t _st_kq_eventsys = {
  868. "kqueue",
  869. ST_EVENTSYS_ALT,
  870. _st_kq_init,
  871. _st_kq_dispatch,
  872. _st_kq_pollset_add,
  873. _st_kq_pollset_del,
  874. _st_kq_fd_new,
  875. _st_kq_fd_close,
  876. _st_kq_fd_getlimit
  877. };
  878. #endif /* MD_HAVE_KQUEUE */
  879. #ifdef MD_HAVE_EPOLL
  880. /*****************************************
  881. * epoll event system
  882. */
  883. ST_HIDDEN int _st_epoll_init(void)
  884. {
  885. int fdlim;
  886. int err = 0;
  887. int rv = 0;
  888. _st_epoll_data = (struct _st_epolldata *) calloc(1, sizeof(*_st_epoll_data));
  889. if (!_st_epoll_data)
  890. return -1;
  891. fdlim = st_getfdlimit();
  892. _st_epoll_data->fd_hint = (fdlim > 0 && fdlim < ST_EPOLL_EVTLIST_SIZE) ? fdlim : ST_EPOLL_EVTLIST_SIZE;
  893. if ((_st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint)) < 0) {
  894. err = errno;
  895. rv = -1;
  896. goto cleanup_epoll;
  897. }
  898. fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC);
  899. _st_epoll_data->pid = getpid();
  900. /* Allocate file descriptor data array */
  901. _st_epoll_data->fd_data_size = _st_epoll_data->fd_hint;
  902. _st_epoll_data->fd_data = (_epoll_fd_data_t *)calloc(_st_epoll_data->fd_data_size, sizeof(_epoll_fd_data_t));
  903. if (!_st_epoll_data->fd_data) {
  904. err = errno;
  905. rv = -1;
  906. goto cleanup_epoll;
  907. }
  908. /* Allocate event lists */
  909. _st_epoll_data->evtlist_size = _st_epoll_data->fd_hint;
  910. _st_epoll_data->evtlist = (struct epoll_event *)malloc(_st_epoll_data->evtlist_size * sizeof(struct epoll_event));
  911. if (!_st_epoll_data->evtlist) {
  912. err = errno;
  913. rv = -1;
  914. }
  915. cleanup_epoll:
  916. if (rv < 0) {
  917. if (_st_epoll_data->epfd >= 0)
  918. close(_st_epoll_data->epfd);
  919. free(_st_epoll_data->fd_data);
  920. free(_st_epoll_data->evtlist);
  921. free(_st_epoll_data);
  922. _st_epoll_data = NULL;
  923. errno = err;
  924. }
  925. return rv;
  926. }
  927. ST_HIDDEN int _st_epoll_fd_data_expand(int maxfd)
  928. {
  929. _epoll_fd_data_t *ptr;
  930. int n = _st_epoll_data->fd_data_size;
  931. while (maxfd >= n)
  932. n <<= 1;
  933. ptr = (_epoll_fd_data_t *)realloc(_st_epoll_data->fd_data, n * sizeof(_epoll_fd_data_t));
  934. if (!ptr)
  935. return -1;
  936. memset(ptr + _st_epoll_data->fd_data_size, 0, (n - _st_epoll_data->fd_data_size) * sizeof(_epoll_fd_data_t));
  937. _st_epoll_data->fd_data = ptr;
  938. _st_epoll_data->fd_data_size = n;
  939. return 0;
  940. }
  941. ST_HIDDEN void _st_epoll_evtlist_expand(void)
  942. {
  943. struct epoll_event *ptr;
  944. int n = _st_epoll_data->evtlist_size;
  945. while (_st_epoll_data->evtlist_cnt > n)
  946. n <<= 1;
  947. ptr = (struct epoll_event *)realloc(_st_epoll_data->evtlist, n * sizeof(struct epoll_event));
  948. if (ptr) {
  949. _st_epoll_data->evtlist = ptr;
  950. _st_epoll_data->evtlist_size = n;
  951. }
  952. }
  953. ST_HIDDEN void _st_epoll_pollset_del(struct pollfd *pds, int npds)
  954. {
  955. struct epoll_event ev;
  956. struct pollfd *pd;
  957. struct pollfd *epd = pds + npds;
  958. int old_events, events, op;
  959. /*
  960. * It's more or less OK if deleting fails because a descriptor
  961. * will either be closed or deleted in dispatch function after
  962. * it fires.
  963. */
  964. for (pd = pds; pd < epd; pd++) {
  965. old_events = _ST_EPOLL_EVENTS(pd->fd);
  966. if (pd->events & POLLIN)
  967. _ST_EPOLL_READ_CNT(pd->fd)--;
  968. if (pd->events & POLLOUT)
  969. _ST_EPOLL_WRITE_CNT(pd->fd)--;
  970. if (pd->events & POLLPRI)
  971. _ST_EPOLL_EXCEP_CNT(pd->fd)--;
  972. events = _ST_EPOLL_EVENTS(pd->fd);
  973. /*
  974. * The _ST_EPOLL_REVENTS check below is needed so we can use
  975. * this function inside dispatch(). Outside of dispatch()
  976. * _ST_EPOLL_REVENTS is always zero for all descriptors.
  977. */
  978. if (events != old_events && _ST_EPOLL_REVENTS(pd->fd) == 0) {
  979. op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
  980. ev.events = events;
  981. ev.data.fd = pd->fd;
  982. if (epoll_ctl(_st_epoll_data->epfd, op, pd->fd, &ev) == 0 && op == EPOLL_CTL_DEL) {
  983. _st_epoll_data->evtlist_cnt--;
  984. }
  985. }
  986. }
  987. }
  988. ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds)
  989. {
  990. struct epoll_event ev;
  991. int i, fd;
  992. int old_events, events, op;
  993. /* Do as many checks as possible up front */
  994. for (i = 0; i < npds; i++) {
  995. fd = pds[i].fd;
  996. if (fd < 0 || !pds[i].events ||
  997. (pds[i].events & ~(POLLIN | POLLOUT | POLLPRI))) {
  998. errno = EINVAL;
  999. return -1;
  1000. }
  1001. if (fd >= _st_epoll_data->fd_data_size && _st_epoll_fd_data_expand(fd) < 0)
  1002. return -1;
  1003. }
  1004. for (i = 0; i < npds; i++) {
  1005. fd = pds[i].fd;
  1006. old_events = _ST_EPOLL_EVENTS(fd);
  1007. if (pds[i].events & POLLIN)
  1008. _ST_EPOLL_READ_CNT(fd)++;
  1009. if (pds[i].events & POLLOUT)
  1010. _ST_EPOLL_WRITE_CNT(fd)++;
  1011. if (pds[i].events & POLLPRI)
  1012. _ST_EPOLL_EXCEP_CNT(fd)++;
  1013. events = _ST_EPOLL_EVENTS(fd);
  1014. if (events != old_events) {
  1015. op = old_events ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
  1016. ev.events = events;
  1017. ev.data.fd = fd;
  1018. if (epoll_ctl(_st_epoll_data->epfd, op, fd, &ev) < 0 && (op != EPOLL_CTL_ADD || errno != EEXIST))
  1019. break;
  1020. if (op == EPOLL_CTL_ADD) {
  1021. _st_epoll_data->evtlist_cnt++;
  1022. if (_st_epoll_data->evtlist_cnt > _st_epoll_data->evtlist_size)
  1023. _st_epoll_evtlist_expand();
  1024. }
  1025. }
  1026. }
  1027. if (i < npds) {
  1028. /* Error */
  1029. int err = errno;
  1030. /* Unroll the state */
  1031. _st_epoll_pollset_del(pds, i + 1);
  1032. errno = err;
  1033. return -1;
  1034. }
  1035. return 0;
  1036. }
  1037. ST_HIDDEN void _st_epoll_dispatch(void)
  1038. {
  1039. st_utime_t min_timeout;
  1040. _st_clist_t *q;
  1041. _st_pollq_t *pq;
  1042. struct pollfd *pds, *epds;
  1043. struct epoll_event ev;
  1044. int timeout, nfd, i, osfd, notify;
  1045. int events, op;
  1046. short revents;
  1047. if (_ST_SLEEPQ == NULL) {
  1048. timeout = -1;
  1049. } else {
  1050. min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
  1051. timeout = (int) (min_timeout / 1000);
  1052. }
  1053. if (_st_epoll_data->pid != getpid()) {
  1054. /* We probably forked, reinitialize epoll set */
  1055. close(_st_epoll_data->epfd);
  1056. _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint);
  1057. if (_st_epoll_data->epfd < 0) {
  1058. /* There is nothing we can do here, will retry later */
  1059. return;
  1060. }
  1061. fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC);
  1062. _st_epoll_data->pid = getpid();
  1063. /* Put all descriptors on ioq into new epoll set */
  1064. memset(_st_epoll_data->fd_data, 0, _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t));
  1065. _st_epoll_data->evtlist_cnt = 0;
  1066. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  1067. pq = _ST_POLLQUEUE_PTR(q);
  1068. _st_epoll_pollset_add(pq->pds, pq->npds);
  1069. }
  1070. }
  1071. /* Check for I/O operations */
  1072. nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, _st_epoll_data->evtlist_size, timeout);
  1073. if (nfd > 0) {
  1074. for (i = 0; i < nfd; i++) {
  1075. osfd = _st_epoll_data->evtlist[i].data.fd;
  1076. _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events;
  1077. if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) {
  1078. /* Also set I/O bits on error */
  1079. _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd);
  1080. }
  1081. }
  1082. for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
  1083. pq = _ST_POLLQUEUE_PTR(q);
  1084. notify = 0;
  1085. epds = pq->pds + pq->npds;
  1086. for (pds = pq->pds; pds < epds; pds++) {
  1087. if (_ST_EPOLL_REVENTS(pds->fd) == 0) {
  1088. pds->revents = 0;
  1089. continue;
  1090. }
  1091. osfd = pds->fd;
  1092. events = pds->events;
  1093. revents = 0;
  1094. if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN))
  1095. revents |= POLLIN;
  1096. if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT))
  1097. revents |= POLLOUT;
  1098. if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI))
  1099. revents |= POLLPRI;
  1100. if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR)
  1101. revents |= POLLERR;
  1102. if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP)
  1103. revents |= POLLHUP;
  1104. pds->revents = revents;
  1105. if (revents) {
  1106. notify = 1;
  1107. }
  1108. }
  1109. if (notify) {
  1110. ST_REMOVE_LINK(&pq->links);
  1111. pq->on_ioq = 0;
  1112. /*
  1113. * Here we will only delete/modify descriptors that
  1114. * didn't fire (see comments in _st_epoll_pollset_del()).
  1115. */
  1116. _st_epoll_pollset_del(pq->pds, pq->npds);
  1117. if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
  1118. _ST_DEL_SLEEPQ(pq->thread);
  1119. pq->thread->state = _ST_ST_RUNNABLE;
  1120. _ST_ADD_RUNQ(pq->thread);
  1121. }
  1122. }
  1123. for (i = 0; i < nfd; i++) {
  1124. /* Delete/modify descriptors that fired */
  1125. osfd = _st_epoll_data->evtlist[i].data.fd;
  1126. _ST_EPOLL_REVENTS(osfd) = 0;
  1127. events = _ST_EPOLL_EVENTS(osfd);
  1128. op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
  1129. ev.events = events;
  1130. ev.data.fd = osfd;
  1131. if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && op == EPOLL_CTL_DEL) {
  1132. _st_epoll_data->evtlist_cnt--;
  1133. }
  1134. }
  1135. }
  1136. }
  1137. ST_HIDDEN int _st_epoll_fd_new(int osfd)
  1138. {
  1139. if (osfd >= _st_epoll_data->fd_data_size && _st_epoll_fd_data_expand(osfd) < 0)
  1140. return -1;
  1141. return 0;
  1142. }
  1143. ST_HIDDEN int _st_epoll_fd_close(int osfd)
  1144. {
  1145. if (_ST_EPOLL_READ_CNT(osfd) || _ST_EPOLL_WRITE_CNT(osfd) || _ST_EPOLL_EXCEP_CNT(osfd)) {
  1146. errno = EBUSY;
  1147. return -1;
  1148. }
  1149. return 0;
  1150. }
  1151. ST_HIDDEN int _st_epoll_fd_getlimit(void)
  1152. {
  1153. /* zero means no specific limit */
  1154. return 0;
  1155. }
  1156. /*
  1157. * Check if epoll functions are just stubs.
  1158. */
  1159. ST_HIDDEN int _st_epoll_is_supported(void)
  1160. {
  1161. struct epoll_event ev;
  1162. ev.events = EPOLLIN;
  1163. ev.data.ptr = NULL;
  1164. /* Guaranteed to fail */
  1165. epoll_ctl(-1, EPOLL_CTL_ADD, -1, &ev);
  1166. return (errno != ENOSYS);
  1167. }
  1168. static _st_eventsys_t _st_epoll_eventsys = {
  1169. "epoll",
  1170. ST_EVENTSYS_ALT,
  1171. _st_epoll_init,
  1172. _st_epoll_dispatch,
  1173. _st_epoll_pollset_add,
  1174. _st_epoll_pollset_del,
  1175. _st_epoll_fd_new,
  1176. _st_epoll_fd_close,
  1177. _st_epoll_fd_getlimit
  1178. };
  1179. #endif /* MD_HAVE_EPOLL */
  1180. /*****************************************
  1181. * Public functions
  1182. */
  1183. int st_set_eventsys(int eventsys)
  1184. {
  1185. if (_st_eventsys) {
  1186. errno = EBUSY;
  1187. return -1;
  1188. }
  1189. switch (eventsys) {
  1190. case ST_EVENTSYS_DEFAULT:
  1191. #ifdef USE_POLL
  1192. _st_eventsys = &_st_poll_eventsys;
  1193. #else
  1194. _st_eventsys = &_st_select_eventsys;
  1195. #endif
  1196. break;
  1197. case ST_EVENTSYS_SELECT:
  1198. _st_eventsys = &_st_select_eventsys;
  1199. break;
  1200. #ifdef MD_HAVE_POLL
  1201. case ST_EVENTSYS_POLL:
  1202. _st_eventsys = &_st_poll_eventsys;
  1203. break;
  1204. #endif
  1205. case ST_EVENTSYS_ALT:
  1206. #if defined (MD_HAVE_KQUEUE)
  1207. _st_eventsys = &_st_kq_eventsys;
  1208. #elif defined (MD_HAVE_EPOLL)
  1209. if (_st_epoll_is_supported())
  1210. _st_eventsys = &_st_epoll_eventsys;
  1211. #endif
  1212. break;
  1213. default:
  1214. errno = EINVAL;
  1215. return -1;
  1216. }
  1217. return 0;
  1218. }
  1219. int st_get_eventsys(void)
  1220. {
  1221. return _st_eventsys ? _st_eventsys->val : -1;
  1222. }
  1223. const char *st_get_eventsys_name(void)
  1224. {
  1225. return _st_eventsys ? _st_eventsys->name : "";
  1226. }