Ruby 3.3.2p78 (2024-05-30 revision e5a195edf62fe1bf7146a191da13fa1c4fecbd71)
extension.c
1#include "prism/extension.h"
2
3// NOTE: this file should contain only bindings. All non-trivial logic should be
4// in libprism so it can be shared its the various callers.
5
6VALUE rb_cPrism;
7VALUE rb_cPrismNode;
8VALUE rb_cPrismSource;
9VALUE rb_cPrismToken;
10VALUE rb_cPrismLocation;
11
12VALUE rb_cPrismComment;
13VALUE rb_cPrismInlineComment;
14VALUE rb_cPrismEmbDocComment;
15VALUE rb_cPrismMagicComment;
16VALUE rb_cPrismParseError;
17VALUE rb_cPrismParseWarning;
18VALUE rb_cPrismParseResult;
19
20ID rb_option_id_filepath;
21ID rb_option_id_encoding;
22ID rb_option_id_line;
23ID rb_option_id_frozen_string_literal;
24ID rb_option_id_verbose;
25ID rb_option_id_scopes;
26
27/******************************************************************************/
28/* IO of Ruby code */
29/******************************************************************************/
30
36static const char *
37check_string(VALUE value) {
38 // If the value is nil, then we don't need to do anything.
39 if (NIL_P(value)) {
40 return NULL;
41 }
42
43 // Check if the value is a string. If it's not, then raise a type error.
44 if (!RB_TYPE_P(value, T_STRING)) {
45 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value));
46 }
47
48 // Otherwise, return the value as a C string.
49 return RSTRING_PTR(value);
50}
51
55static void
56input_load_string(pm_string_t *input, VALUE string) {
57 // Check if the string is a string. If it's not, then raise a type error.
58 if (!RB_TYPE_P(string, T_STRING)) {
59 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
60 }
61
62 pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
63}
64
65/******************************************************************************/
66/* Building C options from Ruby options */
67/******************************************************************************/
68
72static void
73build_options_scopes(pm_options_t *options, VALUE scopes) {
74 // Check if the value is an array. If it's not, then raise a type error.
75 if (!RB_TYPE_P(scopes, T_ARRAY)) {
76 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
77 }
78
79 // Initialize the scopes array.
80 size_t scopes_count = RARRAY_LEN(scopes);
81 pm_options_scopes_init(options, scopes_count);
82
83 // Iterate over the scopes and add them to the options.
84 for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
85 VALUE scope = rb_ary_entry(scopes, scope_index);
86
87 // Check that the scope is an array. If it's not, then raise a type
88 // error.
89 if (!RB_TYPE_P(scope, T_ARRAY)) {
90 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope));
91 }
92
93 // Initialize the scope array.
94 size_t locals_count = RARRAY_LEN(scope);
95 pm_options_scope_t *options_scope = &options->scopes[scope_index];
96 pm_options_scope_init(options_scope, locals_count);
97
98 // Iterate over the locals and add them to the scope.
99 for (size_t local_index = 0; local_index < locals_count; local_index++) {
100 VALUE local = rb_ary_entry(scope, local_index);
101
102 // Check that the local is a symbol. If it's not, then raise a
103 // type error.
104 if (!RB_TYPE_P(local, T_SYMBOL)) {
105 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
106 }
107
108 // Add the local to the scope.
109 pm_string_t *scope_local = &options_scope->locals[local_index];
110 const char *name = rb_id2name(SYM2ID(local));
111 pm_string_constant_init(scope_local, name, strlen(name));
112 }
113 }
114}
115
119static int
120build_options_i(VALUE key, VALUE value, VALUE argument) {
121 pm_options_t *options = (pm_options_t *) argument;
122 ID key_id = SYM2ID(key);
123
124 if (key_id == rb_option_id_filepath) {
125 if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
126 } else if (key_id == rb_option_id_encoding) {
127 if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
128 } else if (key_id == rb_option_id_line) {
129 if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
130 } else if (key_id == rb_option_id_frozen_string_literal) {
131 if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
132 } else if (key_id == rb_option_id_verbose) {
133 pm_options_suppress_warnings_set(options, value != Qtrue);
134 } else if (key_id == rb_option_id_scopes) {
135 if (!NIL_P(value)) build_options_scopes(options, value);
136 } else {
137 rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key);
138 }
139
140 return ST_CONTINUE;
141}
142
149 pm_options_t *options;
150 VALUE keywords;
151};
152
157static VALUE
158build_options(VALUE argument) {
159 struct build_options_data *data = (struct build_options_data *) argument;
160 rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
161 return Qnil;
162}
163
167static void
168extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
169 options->line = 1; // default
170 if (!NIL_P(keywords)) {
171 struct build_options_data data = { .options = options, .keywords = keywords };
172 struct build_options_data *argument = &data;
173
174 int state = 0;
175 rb_protect(build_options, (VALUE) argument, &state);
176
177 if (state != 0) {
178 pm_options_free(options);
179 rb_jump_tag(state);
180 }
181 }
182
183 if (!NIL_P(filepath)) {
184 if (!RB_TYPE_P(filepath, T_STRING)) {
185 pm_options_free(options);
186 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
187 }
188
189 pm_options_filepath_set(options, RSTRING_PTR(filepath));
190 }
191}
192
196static void
197string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
198 VALUE string;
199 VALUE keywords;
200 rb_scan_args(argc, argv, "1:", &string, &keywords);
201
202 extract_options(options, Qnil, keywords);
203 input_load_string(input, string);
204}
205
209static bool
210file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
211 VALUE filepath;
212 VALUE keywords;
213 rb_scan_args(argc, argv, "1:", &filepath, &keywords);
214
215 extract_options(options, filepath, keywords);
216
217 if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
218 pm_options_free(options);
219 return false;
220 }
221
222 return true;
223}
224
225/******************************************************************************/
226/* Serializing the AST */
227/******************************************************************************/
228
232static VALUE
233dump_input(pm_string_t *input, const pm_options_t *options) {
234 pm_buffer_t buffer;
235 if (!pm_buffer_init(&buffer)) {
236 rb_raise(rb_eNoMemError, "failed to allocate memory");
237 }
238
239 pm_parser_t parser;
240 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
241
242 pm_node_t *node = pm_parse(&parser);
243 pm_serialize(&parser, node, &buffer);
244
245 VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
246 pm_node_destroy(&parser, node);
247 pm_buffer_free(&buffer);
248 pm_parser_free(&parser);
249
250 return result;
251}
252
260static VALUE
261dump(int argc, VALUE *argv, VALUE self) {
262 pm_string_t input;
263 pm_options_t options = { 0 };
264 string_options(argc, argv, &input, &options);
265
266#ifdef PRISM_DEBUG_MODE_BUILD
267 size_t length = pm_string_length(&input);
268 char* dup = malloc(length);
269 memcpy(dup, pm_string_source(&input), length);
270 pm_string_constant_init(&input, dup, length);
271#endif
272
273 VALUE value = dump_input(&input, &options);
274
275#ifdef PRISM_DEBUG_MODE_BUILD
276 free(dup);
277#endif
278
279 pm_string_free(&input);
280 pm_options_free(&options);
281
282 return value;
283}
284
292static VALUE
293dump_file(int argc, VALUE *argv, VALUE self) {
294 pm_string_t input;
295 pm_options_t options = { 0 };
296 if (!file_options(argc, argv, &input, &options)) return Qnil;
297
298 VALUE value = dump_input(&input, &options);
299 pm_string_free(&input);
300 pm_options_free(&options);
301
302 return value;
303}
304
305/******************************************************************************/
306/* Extracting values for the parse result */
307/******************************************************************************/
308
312static VALUE
313parser_comments(pm_parser_t *parser, VALUE source) {
314 VALUE comments = rb_ary_new();
315
316 for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
317 VALUE location_argv[] = {
318 source,
319 LONG2FIX(comment->location.start - parser->start),
320 LONG2FIX(comment->location.end - comment->location.start)
321 };
322
323 VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
324 VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
325 rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
326 }
327
328 return comments;
329}
330
334static VALUE
335parser_magic_comments(pm_parser_t *parser, VALUE source) {
336 VALUE magic_comments = rb_ary_new();
337
339 VALUE key_loc_argv[] = {
340 source,
341 LONG2FIX(magic_comment->key_start - parser->start),
342 LONG2FIX(magic_comment->key_length)
343 };
344
345 VALUE value_loc_argv[] = {
346 source,
347 LONG2FIX(magic_comment->value_start - parser->start),
348 LONG2FIX(magic_comment->value_length)
349 };
350
351 VALUE magic_comment_argv[] = {
352 rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
353 rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
354 };
355
356 rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
357 }
358
359 return magic_comments;
360}
361
366static VALUE
367parser_data_loc(const pm_parser_t *parser, VALUE source) {
368 if (parser->data_loc.end == NULL) {
369 return Qnil;
370 } else {
371 VALUE argv[] = {
372 source,
373 LONG2FIX(parser->data_loc.start - parser->start),
374 LONG2FIX(parser->data_loc.end - parser->data_loc.start)
375 };
376
377 return rb_class_new_instance(3, argv, rb_cPrismLocation);
378 }
379}
380
384static VALUE
385parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
386 VALUE errors = rb_ary_new();
387 pm_diagnostic_t *error;
388
389 for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
390 VALUE location_argv[] = {
391 source,
392 LONG2FIX(error->location.start - parser->start),
393 LONG2FIX(error->location.end - error->location.start)
394 };
395
396 VALUE error_argv[] = {
397 rb_enc_str_new_cstr(error->message, encoding),
398 rb_class_new_instance(3, location_argv, rb_cPrismLocation)
399 };
400
401 rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cPrismParseError));
402 }
403
404 return errors;
405}
406
410static VALUE
411parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
412 VALUE warnings = rb_ary_new();
413 pm_diagnostic_t *warning;
414
415 for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) {
416 VALUE location_argv[] = {
417 source,
418 LONG2FIX(warning->location.start - parser->start),
419 LONG2FIX(warning->location.end - warning->location.start)
420 };
421
422 VALUE warning_argv[] = {
423 rb_enc_str_new_cstr(warning->message, encoding),
424 rb_class_new_instance(3, location_argv, rb_cPrismLocation)
425 };
426
427 rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cPrismParseWarning));
428 }
429
430 return warnings;
431}
432
433/******************************************************************************/
434/* Lexing Ruby code */
435/******************************************************************************/
436
442typedef struct {
443 VALUE source;
444 VALUE tokens;
445 rb_encoding *encoding;
447
453static void
454parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
455 parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
456
457 VALUE yields = rb_ary_new_capa(2);
458 rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
459 rb_ary_push(yields, INT2FIX(parser->lex_state));
460
461 rb_ary_push(parse_lex_data->tokens, yields);
462}
463
469static void
470parse_lex_encoding_changed_callback(pm_parser_t *parser) {
471 parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
472 parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
473
474 // Since the encoding changed, we need to go back and change the encoding of
475 // the tokens that were already lexed. This is only going to end up being
476 // one or two tokens, since the encoding can only change at the top of the
477 // file.
478 VALUE tokens = parse_lex_data->tokens;
479 for (long index = 0; index < RARRAY_LEN(tokens); index++) {
480 VALUE yields = rb_ary_entry(tokens, index);
481 VALUE token = rb_ary_entry(yields, 0);
482
483 VALUE value = rb_ivar_get(token, rb_intern("@value"));
484 rb_enc_associate(value, parse_lex_data->encoding);
485 ENC_CODERANGE_CLEAR(value);
486 }
487}
488
493static VALUE
494parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
495 pm_parser_t parser;
496 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
497 pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
498
499 VALUE offsets = rb_ary_new();
500 VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
501 VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
502
503 parse_lex_data_t parse_lex_data = {
504 .source = source,
505 .tokens = rb_ary_new(),
506 .encoding = rb_utf8_encoding()
507 };
508
509 parse_lex_data_t *data = &parse_lex_data;
510 pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
511 .data = (void *) data,
512 .callback = parse_lex_token,
513 };
514
515 parser.lex_callback = &lex_callback;
516 pm_node_t *node = pm_parse(&parser);
517
518 // Here we need to update the source range to have the correct newline
519 // offsets. We do it here because we've already created the object and given
520 // it over to all of the tokens.
521 for (size_t index = 0; index < parser.newline_list.size; index++) {
522 rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
523 }
524
525 VALUE value;
526 if (return_nodes) {
527 value = rb_ary_new_capa(2);
528 rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
529 rb_ary_push(value, parse_lex_data.tokens);
530 } else {
531 value = parse_lex_data.tokens;
532 }
533
534 VALUE result_argv[] = {
535 value,
536 parser_comments(&parser, source),
537 parser_magic_comments(&parser, source),
538 parser_data_loc(&parser, source),
539 parser_errors(&parser, parse_lex_data.encoding, source),
540 parser_warnings(&parser, parse_lex_data.encoding, source),
541 source
542 };
543
544 pm_node_destroy(&parser, node);
545 pm_parser_free(&parser);
546 return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
547}
548
556static VALUE
557lex(int argc, VALUE *argv, VALUE self) {
558 pm_string_t input;
559 pm_options_t options = { 0 };
560 string_options(argc, argv, &input, &options);
561
562 VALUE result = parse_lex_input(&input, &options, false);
563 pm_string_free(&input);
564 pm_options_free(&options);
565
566 return result;
567}
568
576static VALUE
577lex_file(int argc, VALUE *argv, VALUE self) {
578 pm_string_t input;
579 pm_options_t options = { 0 };
580 if (!file_options(argc, argv, &input, &options)) return Qnil;
581
582 VALUE value = parse_lex_input(&input, &options, false);
583 pm_string_free(&input);
584 pm_options_free(&options);
585
586 return value;
587}
588
589/******************************************************************************/
590/* Parsing Ruby code */
591/******************************************************************************/
592
596static VALUE
597parse_input(pm_string_t *input, const pm_options_t *options) {
598 pm_parser_t parser;
599 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
600
601 pm_node_t *node = pm_parse(&parser);
602 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
603
604 VALUE source = pm_source_new(&parser, encoding);
605 VALUE result_argv[] = {
606 pm_ast_new(&parser, node, encoding),
607 parser_comments(&parser, source),
608 parser_magic_comments(&parser, source),
609 parser_data_loc(&parser, source),
610 parser_errors(&parser, encoding, source),
611 parser_warnings(&parser, encoding, source),
612 source
613 };
614
615 VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
616
617 pm_node_destroy(&parser, node);
618 pm_parser_free(&parser);
619
620 return result;
621}
622
643static VALUE
644parse(int argc, VALUE *argv, VALUE self) {
645 pm_string_t input;
646 pm_options_t options = { 0 };
647 string_options(argc, argv, &input, &options);
648
649#ifdef PRISM_DEBUG_MODE_BUILD
650 size_t length = pm_string_length(&input);
651 char* dup = malloc(length);
652 memcpy(dup, pm_string_source(&input), length);
653 pm_string_constant_init(&input, dup, length);
654#endif
655
656 VALUE value = parse_input(&input, &options);
657
658#ifdef PRISM_DEBUG_MODE_BUILD
659 free(dup);
660#endif
661
662 pm_string_free(&input);
663 pm_options_free(&options);
664 return value;
665}
666
674static VALUE
675parse_file(int argc, VALUE *argv, VALUE self) {
676 pm_string_t input;
677 pm_options_t options = { 0 };
678 if (!file_options(argc, argv, &input, &options)) return Qnil;
679
680 VALUE value = parse_input(&input, &options);
681 pm_string_free(&input);
682 pm_options_free(&options);
683
684 return value;
685}
686
690static VALUE
691parse_input_comments(pm_string_t *input, const pm_options_t *options) {
692 pm_parser_t parser;
693 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
694
695 pm_node_t *node = pm_parse(&parser);
696 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
697
698 VALUE source = pm_source_new(&parser, encoding);
699 VALUE comments = parser_comments(&parser, source);
700
701 pm_node_destroy(&parser, node);
702 pm_parser_free(&parser);
703
704 return comments;
705}
706
714static VALUE
715parse_comments(int argc, VALUE *argv, VALUE self) {
716 pm_string_t input;
717 pm_options_t options = { 0 };
718 string_options(argc, argv, &input, &options);
719
720 VALUE result = parse_input_comments(&input, &options);
721 pm_string_free(&input);
722 pm_options_free(&options);
723
724 return result;
725}
726
734static VALUE
735parse_file_comments(int argc, VALUE *argv, VALUE self) {
736 pm_string_t input;
737 pm_options_t options = { 0 };
738 if (!file_options(argc, argv, &input, &options)) return Qnil;
739
740 VALUE value = parse_input_comments(&input, &options);
741 pm_string_free(&input);
742 pm_options_free(&options);
743
744 return value;
745}
746
761static VALUE
762parse_lex(int argc, VALUE *argv, VALUE self) {
763 pm_string_t input;
764 pm_options_t options = { 0 };
765 string_options(argc, argv, &input, &options);
766
767 VALUE value = parse_lex_input(&input, &options, true);
768 pm_string_free(&input);
769 pm_options_free(&options);
770
771 return value;
772}
773
788static VALUE
789parse_lex_file(int argc, VALUE *argv, VALUE self) {
790 pm_string_t input;
791 pm_options_t options = { 0 };
792 if (!file_options(argc, argv, &input, &options)) return Qnil;
793
794 VALUE value = parse_lex_input(&input, &options, true);
795 pm_string_free(&input);
796 pm_options_free(&options);
797
798 return value;
799}
800
804static VALUE
805parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
806 pm_parser_t parser;
807 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
808
809 pm_node_t *node = pm_parse(&parser);
810 pm_node_destroy(&parser, node);
811
812 VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
813 pm_parser_free(&parser);
814
815 return result;
816}
817
825static VALUE
826parse_success_p(int argc, VALUE *argv, VALUE self) {
827 pm_string_t input;
828 pm_options_t options = { 0 };
829 string_options(argc, argv, &input, &options);
830
831 VALUE result = parse_input_success_p(&input, &options);
832 pm_string_free(&input);
833 pm_options_free(&options);
834
835 return result;
836}
837
845static VALUE
846parse_file_success_p(int argc, VALUE *argv, VALUE self) {
847 pm_string_t input;
848 pm_options_t options = { 0 };
849 if (!file_options(argc, argv, &input, &options)) return Qnil;
850
851 VALUE result = parse_input_success_p(&input, &options);
852 pm_string_free(&input);
853 pm_options_free(&options);
854
855 return result;
856}
857
858/******************************************************************************/
859/* Utility functions exposed to make testing easier */
860/******************************************************************************/
861
870static VALUE
871named_captures(VALUE self, VALUE source) {
872 pm_string_list_t string_list = { 0 };
873
874 if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) {
875 pm_string_list_free(&string_list);
876 return Qnil;
877 }
878
879 VALUE names = rb_ary_new();
880 for (size_t index = 0; index < string_list.length; index++) {
881 const pm_string_t *string = &string_list.strings[index];
882 rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string)));
883 }
884
885 pm_string_list_free(&string_list);
886 return names;
887}
888
895static VALUE
896memsize(VALUE self, VALUE string) {
897 pm_parser_t parser;
898 size_t length = RSTRING_LEN(string);
899 pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL);
900
901 pm_node_t *node = pm_parse(&parser);
902 pm_memsize_t memsize;
903 pm_node_memsize(node, &memsize);
904
905 pm_node_destroy(&parser, node);
906 pm_parser_free(&parser);
907
908 VALUE result = rb_hash_new();
909 rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
910 rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
911 rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
912 return result;
913}
914
922static VALUE
923profile_file(VALUE self, VALUE filepath) {
924 pm_string_t input;
925
926 const char *checked = check_string(filepath);
927 if (!pm_string_mapped_init(&input, checked)) return Qnil;
928
929 pm_options_t options = { 0 };
930 pm_options_filepath_set(&options, checked);
931
932 pm_parser_t parser;
933 pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
934
935 pm_node_t *node = pm_parse(&parser);
936 pm_node_destroy(&parser, node);
937 pm_parser_free(&parser);
938 pm_options_free(&options);
939 pm_string_free(&input);
940
941 return Qnil;
942}
943
951static VALUE
952inspect_node(VALUE self, VALUE source) {
953 pm_string_t input;
954 input_load_string(&input, source);
955
956 pm_parser_t parser;
957 pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
958
959 pm_node_t *node = pm_parse(&parser);
960 pm_buffer_t buffer = { 0 };
961
962 pm_prettyprint(&buffer, &parser, node);
963
964 rb_encoding *encoding = rb_enc_find(parser.encoding->name);
965 VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
966
967 pm_buffer_free(&buffer);
968 pm_node_destroy(&parser, node);
969 pm_parser_free(&parser);
970
971 return string;
972}
973
974/******************************************************************************/
975/* Initialization of the extension */
976/******************************************************************************/
977
981RUBY_FUNC_EXPORTED void
982Init_prism(void) {
983 // Make sure that the prism library version matches the expected version.
984 // Otherwise something was compiled incorrectly.
985 if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
986 rb_raise(
988 "The prism library version (%s) does not match the expected version (%s)",
989 pm_version(),
990 EXPECTED_PRISM_VERSION
991 );
992 }
993
994 // Grab up references to all of the constants that we're going to need to
995 // reference throughout this extension.
996 rb_cPrism = rb_define_module("Prism");
997 rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
998 rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
999 rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
1000 rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
1001 rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
1002 rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
1003 rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
1004 rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
1005 rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
1006 rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
1007 rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
1008
1009 // Intern all of the options that we support so that we don't have to do it
1010 // every time we parse.
1011 rb_option_id_filepath = rb_intern_const("filepath");
1012 rb_option_id_encoding = rb_intern_const("encoding");
1013 rb_option_id_line = rb_intern_const("line");
1014 rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal");
1015 rb_option_id_verbose = rb_intern_const("verbose");
1016 rb_option_id_scopes = rb_intern_const("scopes");
1017
1021 rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION));
1022
1028 rb_define_const(rb_cPrism, "BACKEND", ID2SYM(rb_intern("CEXT")));
1029
1030 // First, the functions that have to do with lexing and parsing.
1031 rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
1032 rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
1033 rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
1034 rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
1035 rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
1036 rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
1037 rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
1038 rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
1039 rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
1040 rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
1041 rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
1042 rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
1043
1044 // Next, the functions that will be called by the parser to perform various
1045 // internal tasks. We expose these to make them easier to test.
1046 VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
1047 rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
1048 rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
1049 rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
1050 rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1051
1052 // Next, initialize the other APIs.
1053 Init_prism_api_node();
1054 Init_prism_pack();
1055}
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:1002
VALUE rb_define_module(const char *name)
Defines a top-level module.
Definition class.c:1085
VALUE rb_define_module_under(VALUE outer, const char *name)
Defines a module under the namespace of outer.
Definition class.c:1109
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition class.c:2626
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1675
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define LONG2FIX
Old name of RB_INT2FIX.
Definition long.h:49
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
Definition coderange.h:187
VALUE rb_eNoMemError
NoMemoryError exception.
Definition error.c:1355
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1344
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1342
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
Definition object.c:2099
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:215
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.c:962
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
Definition variable.c:1340
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
Definition symbol.h:276
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
Definition variable.c:3690
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:229
#define RARRAY_LEN
Just another name of rb_array_len
Definition rarray.h:51
We need a struct here to pass through rb_protect and it has to be a single value.
Definition extension.c:148
This struct gets stored in the parser and passed in to the lex callback any time a new token is found...
Definition extension.c:442
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:21
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:387
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:22
pm_location_t location
The location of the diagnostic in the source.
Definition diagnostic.h:27
const char * message
The message associated with the diagnostic.
Definition diagnostic.h:30
pm_list_node_t node
The embedded base node.
Definition diagnostic.h:24
const char * name
The name of the encoding.
Definition encoding.h:56
When you are lexing through a file, the lexer needs all of the information that the parser additional...
Definition parser.h:435
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:441
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:545
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:548
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:404
This struct stores the information gathered by the pm_node_memsize function.
Definition node.h:33
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1061
A scope of locals surrounding the code that is being parsed.
Definition options.h:19
pm_string_t * locals
The names of the locals in the scope.
Definition options.h:24
The options that can be passed to the parser.
Definition options.h:30
pm_options_scope_t * scopes
The scopes surrounding the code that is being parsed.
Definition options.h:56
int32_t line
The line within the file that the parse starts on.
Definition options.h:38
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:32
This struct represents the overall parser.
Definition parser.h:489
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:491
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:584
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:563
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:603
pm_location_t data_loc
The optional location of the END keyword and its contents.
Definition parser.h:566
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:533
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:572
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:569
int32_t start_line
The line number at the start of the parse.
Definition parser.h:638
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:560
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:618
A list of strings.
pm_string_t * strings
A pointer to the start of the string list.
size_t length
The length of the string list.
A generic string type that can have various ownership semantics.
Definition pm_string.h:30
This struct represents a token in the Ruby source.
Definition ast.h:528
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40