Ruby 3.3.2p78 (2024-05-30 revision e5a195edf62fe1bf7146a191da13fa1c4fecbd71)
thread_pthread_mn.c
1// included by "thread_pthread.c"
2
3#if USE_MN_THREADS
4
5static void timer_thread_unregister_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags);
6
7static bool
8timer_thread_cancel_waiting(rb_thread_t *th)
9{
10 bool canceled = false;
11
12 if (th->sched.waiting_reason.flags) {
13 rb_native_mutex_lock(&timer_th.waiting_lock);
14 {
15 if (th->sched.waiting_reason.flags) {
16 canceled = true;
17 ccan_list_del_init(&th->sched.waiting_reason.node);
18 if (th->sched.waiting_reason.flags & (thread_sched_waiting_io_read | thread_sched_waiting_io_write)) {
19 timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd, th->sched.waiting_reason.flags);
20 }
21 th->sched.waiting_reason.flags = thread_sched_waiting_none;
22 }
23 }
24 rb_native_mutex_unlock(&timer_th.waiting_lock);
25 }
26
27 return canceled;
28}
29
30static void
31ubf_event_waiting(void *ptr)
32{
33 rb_thread_t *th = (rb_thread_t *)ptr;
34 struct rb_thread_sched *sched = TH_SCHED(th);
35
36 RUBY_DEBUG_LOG("th:%u", rb_th_serial(th));
37
38 VM_ASSERT(th->nt == NULL || !th_has_dedicated_nt(th));
39
40 // only once. it is safe because th->interrupt_lock is already acquired.
41 th->unblock.func = NULL;
42 th->unblock.arg = NULL;
43
44 bool canceled = timer_thread_cancel_waiting(th);
45
46 thread_sched_lock(sched, th);
47 {
48 if (sched->running == th) {
49 RUBY_DEBUG_LOG("not waiting yet");
50 }
51 else if (canceled) {
52 thread_sched_to_ready_common(sched, th, true, false);
53 }
54 else {
55 RUBY_DEBUG_LOG("already not waiting");
56 }
57 }
58 thread_sched_unlock(sched, th);
59}
60
61static bool timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel);
62
63// return true if timed out
64static bool
65thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, enum thread_sched_waiting_flag events, rb_hrtime_t *rel)
66{
67 VM_ASSERT(!th_has_dedicated_nt(th)); // on SNT
68
69 volatile bool timedout = false, need_cancel = false;
70
71 if (timer_thread_register_waiting(th, fd, events, rel)) {
72 RUBY_DEBUG_LOG("wait fd:%d", fd);
73
74 RB_VM_SAVE_MACHINE_CONTEXT(th);
75 setup_ubf(th, ubf_event_waiting, (void *)th);
76
77 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED, th);
78
79 thread_sched_lock(sched, th);
80 {
81 if (th->sched.waiting_reason.flags == thread_sched_waiting_none) {
82 // already awaken
83 }
84 else if (RUBY_VM_INTERRUPTED(th->ec)) {
85 need_cancel = true;
86 }
87 else {
88 RUBY_DEBUG_LOG("sleep");
89
90 th->status = THREAD_STOPPED_FOREVER;
91 thread_sched_wakeup_next_thread(sched, th, true);
92 thread_sched_wait_running_turn(sched, th, true);
93
94 RUBY_DEBUG_LOG("wakeup");
95 }
96
97 timedout = th->sched.waiting_reason.data.result == 0;
98 }
99 thread_sched_unlock(sched, th);
100
101 if (need_cancel) {
102 timer_thread_cancel_waiting(th);
103 }
104
105 setup_ubf(th, NULL, NULL); // TODO: maybe it is already NULL?
106
107 th->status = THREAD_RUNNABLE;
108 }
109 else {
110 RUBY_DEBUG_LOG("can not wait fd:%d", fd);
111 return false;
112 }
113
114 VM_ASSERT(sched->running == th);
115
116 return timedout;
117}
118
120
121static int
122get_sysconf_page_size(void)
123{
124 static long page_size = 0;
125
126 if (UNLIKELY(page_size == 0)) {
127 page_size = sysconf(_SC_PAGESIZE);
128 VM_ASSERT(page_size < INT_MAX);
129 }
130 return (int)page_size;
131}
132
133#define MSTACK_CHUNK_SIZE (512 * 1024 * 1024) // 512MB
134#define MSTACK_PAGE_SIZE get_sysconf_page_size()
135#define MSTACK_CHUNK_PAGE_NUM (MSTACK_CHUNK_SIZE / MSTACK_PAGE_SIZE - 1) // 1 is start redzone
136
137// 512MB chunk
138// 131,072 pages (> 65,536)
139// 0th page is Redzone. Start from 1st page.
140
141/*
142 * <--> machine stack + vm stack
143 * ----------------------------------
144 * |HD...|RZ| ... |RZ| ... ... |RZ|
145 * <------------- 512MB ------------->
146 */
147
148static struct nt_stack_chunk_header {
149 struct nt_stack_chunk_header *prev_chunk;
150 struct nt_stack_chunk_header *prev_free_chunk;
151
152 uint16_t start_page;
153 uint16_t stack_count;
154 uint16_t uninitialized_stack_count;
155
156 uint16_t free_stack_pos;
157 uint16_t free_stack[];
158} *nt_stack_chunks = NULL,
159 *nt_free_stack_chunks = NULL;
160
161struct nt_machine_stack_footer {
162 struct nt_stack_chunk_header *ch;
163 size_t index;
164};
165
166static rb_nativethread_lock_t nt_machine_stack_lock = RB_NATIVETHREAD_LOCK_INIT;
167
168#include <sys/mman.h>
169
170// vm_stack_size + machine_stack_size + 1 * (guard page size)
171static inline size_t
172nt_thread_stack_size(void)
173{
174 static size_t msz;
175 if (LIKELY(msz > 0)) return msz;
176
177 rb_vm_t *vm = GET_VM();
178 int sz = (int)(vm->default_params.thread_vm_stack_size + vm->default_params.thread_machine_stack_size + MSTACK_PAGE_SIZE);
179 int page_num = roomof(sz, MSTACK_PAGE_SIZE);
180 msz = (size_t)page_num * MSTACK_PAGE_SIZE;
181 return msz;
182}
183
184static struct nt_stack_chunk_header *
185nt_alloc_thread_stack_chunk(void)
186{
187 int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE;
188#if defined(MAP_STACK) && !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__)
189 mmap_flags |= MAP_STACK;
190#endif
191
192 const char *m = (void *)mmap(NULL, MSTACK_CHUNK_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
193 if (m == MAP_FAILED) {
194 return NULL;
195 }
196
197 size_t msz = nt_thread_stack_size();
198 int header_page_cnt = 1;
199 int stack_count = ((MSTACK_CHUNK_PAGE_NUM - header_page_cnt) * MSTACK_PAGE_SIZE) / msz;
200 int ch_size = sizeof(struct nt_stack_chunk_header) + sizeof(uint16_t) * stack_count;
201
202 if (ch_size > MSTACK_PAGE_SIZE * header_page_cnt) {
203 header_page_cnt = (ch_size + MSTACK_PAGE_SIZE - 1) / MSTACK_PAGE_SIZE;
204 stack_count = ((MSTACK_CHUNK_PAGE_NUM - header_page_cnt) * MSTACK_PAGE_SIZE) / msz;
205 }
206
207 VM_ASSERT(stack_count <= UINT16_MAX);
208
209 struct nt_stack_chunk_header *ch = (struct nt_stack_chunk_header *)m;
210
211 ch->start_page = header_page_cnt;
212 ch->prev_chunk = nt_stack_chunks;
213 ch->prev_free_chunk = nt_free_stack_chunks;
214 ch->uninitialized_stack_count = ch->stack_count = (uint16_t)stack_count;
215 ch->free_stack_pos = 0;
216
217 RUBY_DEBUG_LOG("ch:%p start_page:%d stack_cnt:%d stack_size:%d", ch, (int)ch->start_page, (int)ch->stack_count, (int)msz);
218
219 return ch;
220}
221
222static void *
223nt_stack_chunk_get_stack_start(struct nt_stack_chunk_header *ch, size_t idx)
224{
225 const char *m = (char *)ch;
226 return (void *)(m + ch->start_page * MSTACK_PAGE_SIZE + idx * nt_thread_stack_size());
227}
228
229static struct nt_machine_stack_footer *
230nt_stack_chunk_get_msf(const rb_vm_t *vm, const char *mstack)
231{
232 // TODO: stack direction
233 const size_t msz = vm->default_params.thread_machine_stack_size;
234 return (struct nt_machine_stack_footer *)&mstack[msz - sizeof(struct nt_machine_stack_footer)];
235}
236
237static void *
238nt_stack_chunk_get_stack(const rb_vm_t *vm, struct nt_stack_chunk_header *ch, size_t idx, void **vm_stack, void **machine_stack)
239{
240 // TODO: only support stack going down
241 // [VM ... <GUARD> machine stack ...]
242
243 const char *vstack, *mstack;
244 const char *guard_page;
245 vstack = nt_stack_chunk_get_stack_start(ch, idx);
246 guard_page = vstack + vm->default_params.thread_vm_stack_size;
247 mstack = guard_page + MSTACK_PAGE_SIZE;
248
249 struct nt_machine_stack_footer *msf = nt_stack_chunk_get_msf(vm, mstack);
250 msf->ch = ch;
251 msf->index = idx;
252
253#if 0
254 RUBY_DEBUG_LOG("msf:%p vstack:%p-%p guard_page:%p-%p mstack:%p-%p", msf,
255 vstack, (void *)(guard_page-1),
256 guard_page, (void *)(mstack-1),
257 mstack, (void *)(msf));
258#endif
259
260 *vm_stack = (void *)vstack;
261 *machine_stack = (void *)mstack;
262
263 return (void *)guard_page;
264}
265
267static void
268nt_stack_chunk_dump(void)
269{
270 struct nt_stack_chunk_header *ch;
271 int i;
272
273 fprintf(stderr, "** nt_stack_chunks\n");
274 ch = nt_stack_chunks;
275 for (i=0; ch; i++, ch = ch->prev_chunk) {
276 fprintf(stderr, "%d %p free_pos:%d\n", i, (void *)ch, (int)ch->free_stack_pos);
277 }
278
279 fprintf(stderr, "** nt_free_stack_chunks\n");
280 ch = nt_free_stack_chunks;
281 for (i=0; ch; i++, ch = ch->prev_free_chunk) {
282 fprintf(stderr, "%d %p free_pos:%d\n", i, (void *)ch, (int)ch->free_stack_pos);
283 }
284}
285
286static int
287nt_guard_page(const char *p, size_t len)
288{
289 if (mprotect((void *)p, len, PROT_NONE) != -1) {
290 return 0;
291 }
292 else {
293 return errno;
294 }
295}
296
297static int
298nt_alloc_stack(rb_vm_t *vm, void **vm_stack, void **machine_stack)
299{
300 int err = 0;
301
302 rb_native_mutex_lock(&nt_machine_stack_lock);
303 {
304 retry:
305 if (nt_free_stack_chunks) {
306 struct nt_stack_chunk_header *ch = nt_free_stack_chunks;
307 if (ch->free_stack_pos > 0) {
308 RUBY_DEBUG_LOG("free_stack_pos:%d", ch->free_stack_pos);
309 nt_stack_chunk_get_stack(vm, ch, ch->free_stack[--ch->free_stack_pos], vm_stack, machine_stack);
310 }
311 else if (ch->uninitialized_stack_count > 0) {
312 RUBY_DEBUG_LOG("uninitialized_stack_count:%d", ch->uninitialized_stack_count);
313
314 size_t idx = ch->stack_count - ch->uninitialized_stack_count--;
315 void *guard_page = nt_stack_chunk_get_stack(vm, ch, idx, vm_stack, machine_stack);
316 err = nt_guard_page(guard_page, MSTACK_PAGE_SIZE);
317 }
318 else {
319 nt_free_stack_chunks = ch->prev_free_chunk;
320 ch->prev_free_chunk = NULL;
321 goto retry;
322 }
323 }
324 else {
325 struct nt_stack_chunk_header *p = nt_alloc_thread_stack_chunk();
326 if (p == NULL) {
327 err = errno;
328 }
329 else {
330 nt_free_stack_chunks = nt_stack_chunks = p;
331 goto retry;
332 }
333 }
334 }
335 rb_native_mutex_unlock(&nt_machine_stack_lock);
336
337 return err;
338}
339
340static void
341nt_free_stack(void *mstack)
342{
343 if (!mstack) return;
344
345 rb_native_mutex_lock(&nt_machine_stack_lock);
346 {
347 struct nt_machine_stack_footer *msf = nt_stack_chunk_get_msf(GET_VM(), mstack);
348 struct nt_stack_chunk_header *ch = msf->ch;
349 int idx = (int)msf->index;
350 void *stack = nt_stack_chunk_get_stack_start(ch, idx);
351
352 RUBY_DEBUG_LOG("stack:%p mstack:%p ch:%p index:%d", stack, mstack, ch, idx);
353
354 if (ch->prev_free_chunk == NULL) {
355 ch->prev_free_chunk = nt_free_stack_chunks;
356 nt_free_stack_chunks = ch;
357 }
358 ch->free_stack[ch->free_stack_pos++] = idx;
359
360 // clear the stack pages
361#if defined(MADV_FREE)
362 int r = madvise(stack, nt_thread_stack_size(), MADV_FREE);
363#elif defined(MADV_DONTNEED)
364 int r = madvise(stack, nt_thread_stack_size(), MADV_DONTNEED);
365#else
366 int r = 0;
367#endif
368
369 if (r != 0) rb_bug("madvise errno:%d", errno);
370 }
371 rb_native_mutex_unlock(&nt_machine_stack_lock);
372}
373
374static int
375native_thread_check_and_create_shared(rb_vm_t *vm)
376{
377 bool need_to_make = false;
378
379 rb_native_mutex_lock(&vm->ractor.sched.lock);
380 {
381 unsigned int snt_cnt = vm->ractor.sched.snt_cnt;
382 if (!vm->ractor.main_ractor->threads.sched.enable_mn_threads) snt_cnt++; // do not need snt for main ractor
383
384 if (((int)snt_cnt < MINIMUM_SNT) ||
385 (snt_cnt < vm->ractor.cnt &&
386 snt_cnt < vm->ractor.sched.max_cpu)) {
387
388 RUBY_DEBUG_LOG("added snt:%u dnt:%u ractor_cnt:%u grq_cnt:%u",
389 vm->ractor.sched.snt_cnt,
390 vm->ractor.sched.dnt_cnt,
391 vm->ractor.cnt,
392 vm->ractor.sched.grq_cnt);
393
394 vm->ractor.sched.snt_cnt++;
395 need_to_make = true;
396 }
397 else {
398 RUBY_DEBUG_LOG("snt:%d ractor_cnt:%d", (int)vm->ractor.sched.snt_cnt, (int)vm->ractor.cnt);
399 }
400 }
401 rb_native_mutex_unlock(&vm->ractor.sched.lock);
402
403 if (need_to_make) {
404 struct rb_native_thread *nt = native_thread_alloc();
405 nt->vm = vm;
406 return native_thread_create0(nt);
407 }
408 else {
409 return 0;
410 }
411}
412
413static COROUTINE
414co_start(struct coroutine_context *from, struct coroutine_context *self)
415{
416 rb_thread_t *th = (rb_thread_t *)self->argument;
417 struct rb_thread_sched *sched = TH_SCHED(th);
418 VM_ASSERT(th->nt != NULL);
419 VM_ASSERT(th == sched->running);
420 VM_ASSERT(sched->lock_owner == NULL);
421
422 // RUBY_DEBUG_LOG("th:%u", rb_th_serial(th));
423
424 thread_sched_set_lock_owner(sched, th);
425 thread_sched_add_running_thread(TH_SCHED(th), th);
426 thread_sched_unlock(sched, th);
427 {
428 RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_RESUMED, th);
429 call_thread_start_func_2(th);
430 }
431 thread_sched_lock(sched, NULL);
432
433 RUBY_DEBUG_LOG("terminated th:%d", (int)th->serial);
434
435 // Thread is terminated
436
437 VM_ASSERT(!th_has_dedicated_nt(th));
438
439 rb_vm_t *vm = th->vm;
440 bool has_ready_ractor = vm->ractor.sched.grq_cnt > 0; // at least this ractor is not queued
441
442 rb_thread_t *next_th = sched->running;
443 struct rb_native_thread *nt = th->nt;
444 native_thread_assign(NULL, th);
445 rb_ractor_set_current_ec(th->ractor, NULL);
446
447 if (!has_ready_ractor && next_th && !next_th->nt) {
448 // switch to the next thread
449 thread_sched_set_lock_owner(sched, NULL);
450 thread_sched_switch0(th->sched.context, next_th, nt);
451 th->sched.finished = true;
452 }
453 else {
454 // switch to the next Ractor
455 th->sched.finished = true;
456 coroutine_transfer(self, nt->nt_context);
457 }
458 rb_bug("unreachable");
459}
460
461static int
462native_thread_create_shared(rb_thread_t *th)
463{
464 // setup coroutine
465 rb_vm_t *vm = th->vm;
466 void *vm_stack = NULL, *machine_stack = NULL;
467 int err = nt_alloc_stack(vm, &vm_stack, &machine_stack);
468 if (err) return err;
469
470 VM_ASSERT(vm_stack < machine_stack);
471
472 // setup vm stack
473 size_t vm_stack_words = th->vm->default_params.thread_vm_stack_size/sizeof(VALUE);
474 rb_ec_initialize_vm_stack(th->ec, vm_stack, vm_stack_words);
475
476 // setup machine stack
477 size_t machine_stack_size = vm->default_params.thread_machine_stack_size - sizeof(struct nt_machine_stack_footer);
478 th->ec->machine.stack_start = (void *)((uintptr_t)machine_stack + machine_stack_size);
479 th->ec->machine.stack_maxsize = machine_stack_size; // TODO
480 th->sched.context_stack = machine_stack;
481
482 th->sched.context = ruby_xmalloc(sizeof(struct coroutine_context));
483 coroutine_initialize(th->sched.context, co_start, machine_stack, machine_stack_size);
484 th->sched.context->argument = th;
485
486 RUBY_DEBUG_LOG("th:%u vm_stack:%p machine_stack:%p", rb_th_serial(th), vm_stack, machine_stack);
487 thread_sched_to_ready(TH_SCHED(th), th);
488
489 // setup nt
490 return native_thread_check_and_create_shared(th->vm);
491}
492
493#else // USE_MN_THREADS
494
495static int
496native_thread_create_shared(rb_thread_t *th)
497{
498 rb_bug("unreachable");
499}
500
501static bool
502thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, enum thread_sched_waiting_flag events, rb_hrtime_t *rel)
503{
504 rb_bug("unreachable");
505}
506
507#endif // USE_MN_THREADS
508
510#if (HAVE_SYS_EPOLL_H || HAVE_SYS_EVENT_H) && USE_MN_THREADS
511
512static bool
513fd_readable_nonblock(int fd)
514{
515 struct pollfd pfd = {
516 .fd = fd,
517 .events = POLLIN,
518 };
519 return poll(&pfd, 1, 0) != 0;
520}
521
522static bool
523fd_writable_nonblock(int fd)
524{
525 struct pollfd pfd = {
526 .fd = fd,
527 .events = POLLOUT,
528 };
529 return poll(&pfd, 1, 0) != 0;
530}
531
532static void
533verify_waiting_list(void)
534{
535#if VM_CHECK_MODE > 0
536 rb_thread_t *wth, *prev_wth = NULL;
537 ccan_list_for_each(&timer_th.waiting, wth, sched.waiting_reason.node) {
538 // fprintf(stderr, "verify_waiting_list th:%u abs:%lu\n", rb_th_serial(wth), (unsigned long)wth->sched.waiting_reason.data.timeout);
539 if (prev_wth) {
540 rb_hrtime_t timeout = wth->sched.waiting_reason.data.timeout;
541 rb_hrtime_t prev_timeout = prev_wth->sched.waiting_reason.data.timeout;
542 VM_ASSERT(timeout == 0 || prev_timeout <= timeout);
543 }
544 prev_wth = wth;
545 }
546#endif
547}
548
549#if HAVE_SYS_EVENT_H // kqueue helpers
550
551static enum thread_sched_waiting_flag
552kqueue_translate_filter_to_flags(int16_t filter)
553{
554 switch (filter) {
555 case EVFILT_READ:
556 return thread_sched_waiting_io_read;
557 case EVFILT_WRITE:
558 return thread_sched_waiting_io_write;
559 case EVFILT_TIMER:
560 return thread_sched_waiting_timeout;
561 default:
562 rb_bug("kevent filter:%d not supported", filter);
563 }
564}
565
566static int
567kqueue_wait(rb_vm_t *vm)
568{
569 struct timespec calculated_timeout;
570 struct timespec *timeout = NULL;
571 int timeout_ms = timer_thread_set_timeout(vm);
572
573 if (timeout_ms >= 0) {
574 calculated_timeout.tv_sec = timeout_ms / 1000;
575 calculated_timeout.tv_nsec = (timeout_ms % 1000) * 1000000;
576 timeout = &calculated_timeout;
577 }
578
579 return kevent(timer_th.event_fd, NULL, 0, timer_th.finished_events, KQUEUE_EVENTS_MAX, timeout);
580}
581
582static void
583kqueue_create(void)
584{
585 if ((timer_th.event_fd = kqueue()) == -1) rb_bug("kqueue creation failed (errno:%d)", errno);
586 int flags = fcntl(timer_th.event_fd, F_GETFD);
587 if (flags == -1) {
588 rb_bug("kqueue GETFD failed (errno:%d)", errno);
589 }
590
591 flags |= FD_CLOEXEC;
592 if (fcntl(timer_th.event_fd, F_SETFD, flags) == -1) {
593 rb_bug("kqueue SETFD failed (errno:%d)", errno);
594 }
595}
596
597static void
598kqueue_unregister_waiting(int fd, enum thread_sched_waiting_flag flags)
599{
600 if (flags) {
601 struct kevent ke[2];
602 int num_events = 0;
603
604 if (flags & thread_sched_waiting_io_read) {
605 EV_SET(&ke[num_events], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
606 num_events++;
607 }
608 if (flags & thread_sched_waiting_io_write) {
609 EV_SET(&ke[num_events], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
610 num_events++;
611 }
612 if (kevent(timer_th.event_fd, ke, num_events, NULL, 0, NULL) == -1) {
613 perror("kevent");
614 rb_bug("unregister/kevent fails. errno:%d", errno);
615 }
616 }
617}
618
619static bool
620kqueue_already_registered(int fd)
621{
622 rb_thread_t *wth, *found_wth = NULL;
623 ccan_list_for_each(&timer_th.waiting, wth, sched.waiting_reason.node) {
624 // Similar to EEXIST in epoll_ctl, but more strict because it checks fd rather than flags
625 // for simplicity
626 if (wth->sched.waiting_reason.flags && wth->sched.waiting_reason.data.fd == fd) {
627 found_wth = wth;
628 break;
629 }
630 }
631 return found_wth != NULL;
632}
633
634#endif // HAVE_SYS_EVENT_H
635
636// return false if the fd is not waitable or not need to wait.
637static bool
638timer_thread_register_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags, rb_hrtime_t *rel)
639{
640 RUBY_DEBUG_LOG("th:%u fd:%d flag:%d rel:%lu", rb_th_serial(th), fd, flags, rel ? (unsigned long)*rel : 0);
641
642 VM_ASSERT(th == NULL || TH_SCHED(th)->running == th);
643 VM_ASSERT(flags != 0);
644
645 rb_hrtime_t abs = 0; // 0 means no timeout
646
647 if (rel) {
648 if (*rel > 0) {
649 flags |= thread_sched_waiting_timeout;
650 }
651 else {
652 return false;
653 }
654 }
655
656 if (rel && *rel > 0) {
657 flags |= thread_sched_waiting_timeout;
658 }
659
660#if HAVE_SYS_EVENT_H
661 struct kevent ke[2];
662 int num_events = 0;
663#else
664 uint32_t epoll_events = 0;
665#endif
666 if (flags & thread_sched_waiting_timeout) {
667 VM_ASSERT(rel != NULL);
668 abs = rb_hrtime_add(rb_hrtime_now(), *rel);
669 }
670
671 if (flags & thread_sched_waiting_io_read) {
672 if (!(flags & thread_sched_waiting_io_force) && fd_readable_nonblock(fd)) {
673 RUBY_DEBUG_LOG("fd_readable_nonblock");
674 return false;
675 }
676 else {
677 VM_ASSERT(fd >= 0);
678#if HAVE_SYS_EVENT_H
679 EV_SET(&ke[num_events], fd, EVFILT_READ, EV_ADD, 0, 0, (void *)th);
680 num_events++;
681#else
682 epoll_events |= EPOLLIN;
683#endif
684 }
685 }
686
687 if (flags & thread_sched_waiting_io_write) {
688 if (!(flags & thread_sched_waiting_io_force) && fd_writable_nonblock(fd)) {
689 RUBY_DEBUG_LOG("fd_writable_nonblock");
690 return false;
691 }
692 else {
693 VM_ASSERT(fd >= 0);
694#if HAVE_SYS_EVENT_H
695 EV_SET(&ke[num_events], fd, EVFILT_WRITE, EV_ADD, 0, 0, (void *)th);
696 num_events++;
697#else
698 epoll_events |= EPOLLOUT;
699#endif
700 }
701 }
702
703 rb_native_mutex_lock(&timer_th.waiting_lock);
704 {
705#if HAVE_SYS_EVENT_H
706 if (num_events > 0) {
707 if (kqueue_already_registered(fd)) {
708 rb_native_mutex_unlock(&timer_th.waiting_lock);
709 return false;
710 }
711
712 if (kevent(timer_th.event_fd, ke, num_events, NULL, 0, NULL) == -1) {
713 RUBY_DEBUG_LOG("failed (%d)", errno);
714
715 switch (errno) {
716 case EBADF:
717 // the fd is closed?
718 case EINTR:
719 // signal received? is there a sensible way to handle this?
720 default:
721 perror("kevent");
722 rb_bug("register/kevent failed(fd:%d, errno:%d)", fd, errno);
723 }
724 }
725 RUBY_DEBUG_LOG("kevent(add, fd:%d) success", fd);
726 }
727#else
728 if (epoll_events) {
729 struct epoll_event event = {
730 .events = epoll_events,
731 .data = {
732 .ptr = (void *)th,
733 },
734 };
735 if (epoll_ctl(timer_th.event_fd, EPOLL_CTL_ADD, fd, &event) == -1) {
736 RUBY_DEBUG_LOG("failed (%d)", errno);
737
738 switch (errno) {
739 case EBADF:
740 // the fd is closed?
741 case EPERM:
742 // the fd doesn't support epoll
743 case EEXIST:
744 // the fd is already registerred by another thread
745 rb_native_mutex_unlock(&timer_th.waiting_lock);
746 return false;
747 default:
748 perror("epoll_ctl");
749 rb_bug("register/epoll_ctl failed(fd:%d, errno:%d)", fd, errno);
750 }
751 }
752 RUBY_DEBUG_LOG("epoll_ctl(add, fd:%d, events:%d) success", fd, epoll_events);
753 }
754#endif
755
756 if (th) {
757 VM_ASSERT(th->sched.waiting_reason.flags == thread_sched_waiting_none);
758
759 // setup waiting information
760 {
761 th->sched.waiting_reason.flags = flags;
762 th->sched.waiting_reason.data.timeout = abs;
763 th->sched.waiting_reason.data.fd = fd;
764 th->sched.waiting_reason.data.result = 0;
765 }
766
767 if (abs == 0) { // no timeout
768 VM_ASSERT(!(flags & thread_sched_waiting_timeout));
769 ccan_list_add_tail(&timer_th.waiting, &th->sched.waiting_reason.node);
770 }
771 else {
772 RUBY_DEBUG_LOG("abs:%lu", abs);
773 VM_ASSERT(flags & thread_sched_waiting_timeout);
774
775 // insert th to sorted list (TODO: O(n))
776 rb_thread_t *wth, *prev_wth = NULL;
777
778 ccan_list_for_each(&timer_th.waiting, wth, sched.waiting_reason.node) {
779 if ((wth->sched.waiting_reason.flags & thread_sched_waiting_timeout) &&
780 wth->sched.waiting_reason.data.timeout < abs) {
781 prev_wth = wth;
782 }
783 else {
784 break;
785 }
786 }
787
788 if (prev_wth) {
789 ccan_list_add_after(&timer_th.waiting, &prev_wth->sched.waiting_reason.node, &th->sched.waiting_reason.node);
790 }
791 else {
792 ccan_list_add(&timer_th.waiting, &th->sched.waiting_reason.node);
793 }
794
795 verify_waiting_list();
796
797 // update timeout seconds
798 timer_thread_wakeup();
799 }
800 }
801 else {
802 VM_ASSERT(abs == 0);
803 }
804 }
805 rb_native_mutex_unlock(&timer_th.waiting_lock);
806
807 return true;
808}
809
810static void
811timer_thread_unregister_waiting(rb_thread_t *th, int fd, enum thread_sched_waiting_flag flags)
812{
813 RUBY_DEBUG_LOG("th:%u fd:%d", rb_th_serial(th), fd);
814#if HAVE_SYS_EVENT_H
815 kqueue_unregister_waiting(fd, flags);
816#else
817 // Linux 2.6.9 or later is needed to pass NULL as data.
818 if (epoll_ctl(timer_th.event_fd, EPOLL_CTL_DEL, fd, NULL) == -1) {
819 switch (errno) {
820 case EBADF:
821 // just ignore. maybe fd is closed.
822 break;
823 default:
824 perror("epoll_ctl");
825 rb_bug("unregister/epoll_ctl fails. errno:%d", errno);
826 }
827 }
828#endif
829}
830
831static void
832timer_thread_setup_mn(void)
833{
834#if HAVE_SYS_EVENT_H
835 kqueue_create();
836 RUBY_DEBUG_LOG("kqueue_fd:%d", timer_th.event_fd);
837#else
838 if ((timer_th.event_fd = epoll_create1(EPOLL_CLOEXEC)) == -1) rb_bug("epoll_create (errno:%d)", errno);
839 RUBY_DEBUG_LOG("epoll_fd:%d", timer_th.event_fd);
840#endif
841 RUBY_DEBUG_LOG("comm_fds:%d/%d", timer_th.comm_fds[0], timer_th.comm_fds[1]);
842
843 timer_thread_register_waiting(NULL, timer_th.comm_fds[0], thread_sched_waiting_io_read | thread_sched_waiting_io_force, NULL);
844}
845
846static int
847event_wait(rb_vm_t *vm)
848{
849#if HAVE_SYS_EVENT_H
850 int r = kqueue_wait(vm);
851#else
852 int r = epoll_wait(timer_th.event_fd, timer_th.finished_events, EPOLL_EVENTS_MAX, timer_thread_set_timeout(vm));
853#endif
854 return r;
855}
856
857/*
858 * The purpose of the timer thread:
859 *
860 * (1) Periodic checking
861 * (1-1) Provide time slice for active NTs
862 * (1-2) Check NT shortage
863 * (1-3) Periodic UBF (global)
864 * (1-4) Lazy GRQ deq start
865 * (2) Receive notification
866 * (2-1) async I/O termination
867 * (2-2) timeout
868 * (2-2-1) sleep(n)
869 * (2-2-2) timeout(n), I/O, ...
870 */
871static void
872timer_thread_polling(rb_vm_t *vm)
873{
874 int r = event_wait(vm);
875
876 RUBY_DEBUG_LOG("r:%d errno:%d", r, errno);
877
878 switch (r) {
879 case 0: // timeout
880 RUBY_DEBUG_LOG("timeout%s", "");
881
882 ractor_sched_lock(vm, NULL);
883 {
884 // (1-1) timeslice
885 timer_thread_check_timeslice(vm);
886
887 // (1-4) lazy grq deq
888 if (vm->ractor.sched.grq_cnt > 0) {
889 RUBY_DEBUG_LOG("GRQ cnt: %u", vm->ractor.sched.grq_cnt);
890 rb_native_cond_signal(&vm->ractor.sched.cond);
891 }
892 }
893 ractor_sched_unlock(vm, NULL);
894
895 // (1-2)
896 native_thread_check_and_create_shared(vm);
897
898 break;
899
900 case -1:
901 switch (errno) {
902 case EINTR:
903 // simply retry
904 break;
905 default:
906 perror("event_wait");
907 rb_bug("event_wait errno:%d", errno);
908 }
909 break;
910
911 default:
912 RUBY_DEBUG_LOG("%d event(s)", r);
913
914#if HAVE_SYS_EVENT_H
915 for (int i=0; i<r; i++) {
916 rb_thread_t *th = (rb_thread_t *)timer_th.finished_events[i].udata;
917 int fd = (int)timer_th.finished_events[i].ident;
918 int16_t filter = timer_th.finished_events[i].filter;
919
920 if (th == NULL) {
921 // wakeup timerthread
922 RUBY_DEBUG_LOG("comm from fd:%d", timer_th.comm_fds[1]);
923 consume_communication_pipe(timer_th.comm_fds[0]);
924 } else {
925 // wakeup specific thread by IO
926 RUBY_DEBUG_LOG("io event. wakeup_th:%u event:%s%s",
927 rb_th_serial(th),
928 (filter == EVFILT_READ) ? "read/" : "",
929 (filter == EVFILT_WRITE) ? "write/" : "");
930
931 rb_native_mutex_lock(&timer_th.waiting_lock);
932 {
933 if (th->sched.waiting_reason.flags) {
934 // delete from chain
935 ccan_list_del_init(&th->sched.waiting_reason.node);
936 timer_thread_unregister_waiting(th, fd, kqueue_translate_filter_to_flags(filter));
937
938 th->sched.waiting_reason.flags = thread_sched_waiting_none;
939 th->sched.waiting_reason.data.fd = -1;
940 th->sched.waiting_reason.data.result = filter;
941
942 timer_thread_wakeup_thread(th);
943 } else {
944 // already released
945 }
946 }
947 rb_native_mutex_unlock(&timer_th.waiting_lock);
948 }
949 }
950#else
951 for (int i=0; i<r; i++) {
952 rb_thread_t *th = (rb_thread_t *)timer_th.finished_events[i].data.ptr;
953
954 if (th == NULL) {
955 // wakeup timerthread
956 RUBY_DEBUG_LOG("comm from fd:%d", timer_th.comm_fds[1]);
957 consume_communication_pipe(timer_th.comm_fds[0]);
958 }
959 else {
960 // wakeup specific thread by IO
961 uint32_t events = timer_th.finished_events[i].events;
962
963 RUBY_DEBUG_LOG("io event. wakeup_th:%u event:%s%s%s%s%s%s",
964 rb_th_serial(th),
965 (events & EPOLLIN) ? "in/" : "",
966 (events & EPOLLOUT) ? "out/" : "",
967 (events & EPOLLRDHUP) ? "RDHUP/" : "",
968 (events & EPOLLPRI) ? "pri/" : "",
969 (events & EPOLLERR) ? "err/" : "",
970 (events & EPOLLHUP) ? "hup/" : "");
971
972 rb_native_mutex_lock(&timer_th.waiting_lock);
973 {
974 if (th->sched.waiting_reason.flags) {
975 // delete from chain
976 ccan_list_del_init(&th->sched.waiting_reason.node);
977 timer_thread_unregister_waiting(th, th->sched.waiting_reason.data.fd, th->sched.waiting_reason.flags);
978
979 th->sched.waiting_reason.flags = thread_sched_waiting_none;
980 th->sched.waiting_reason.data.fd = -1;
981 th->sched.waiting_reason.data.result = (int)events;
982
983 timer_thread_wakeup_thread(th);
984 }
985 else {
986 // already released
987 }
988 }
989 rb_native_mutex_unlock(&timer_th.waiting_lock);
990 }
991 }
992#endif
993 }
994}
995
996#else // HAVE_SYS_EPOLL_H || HAVE_SYS_EVENT_H
997
998static void
999timer_thread_setup_mn(void)
1000{
1001 // do nothing
1002}
1003
1004static void
1005timer_thread_polling(rb_vm_t *vm)
1006{
1007 int timeout = timer_thread_set_timeout(vm);
1008
1009 struct pollfd pfd = {
1010 .fd = timer_th.comm_fds[0],
1011 .events = POLLIN,
1012 };
1013
1014 int r = poll(&pfd, 1, timeout);
1015
1016 switch (r) {
1017 case 0: // timeout
1018 rb_native_mutex_lock(&vm->ractor.sched.lock);
1019 {
1020 // (1-1) timeslice
1021 timer_thread_check_timeslice(vm);
1022 }
1023 rb_native_mutex_unlock(&vm->ractor.sched.lock);
1024 break;
1025
1026 case -1: // error
1027 switch (errno) {
1028 case EINTR:
1029 // simply retry
1030 break;
1031 default:
1032 perror("poll");
1033 rb_bug("poll errno:%d", errno);
1034 break;
1035 }
1036
1037 case 1:
1038 consume_communication_pipe(timer_th.comm_fds[0]);
1039 break;
1040
1041 default:
1042 rb_bug("unreachbale");
1043 }
1044}
1045
1046#endif // HAVE_SYS_EPOLL_H || HAVE_SYS_EVENT_H
int len
Length of the buffer.
Definition io.h:8
#define RUBY_INTERNAL_THREAD_EVENT_RESUMED
Triggered when a thread successfully acquired the GVL.
Definition thread.h:212
#define RUBY_INTERNAL_THREAD_EVENT_SUSPENDED
Triggered when a thread released the GVL.
Definition thread.h:219
#define RBIMPL_ATTR_MAYBE_UNUSED()
Wraps (or simulates) [[maybe_unused]]
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_lock
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_unlock
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
Signals a condition variable.
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40