1#include "prism/extension.h"
10VALUE rb_cPrismLocation;
12VALUE rb_cPrismComment;
13VALUE rb_cPrismInlineComment;
14VALUE rb_cPrismEmbDocComment;
15VALUE rb_cPrismMagicComment;
16VALUE rb_cPrismParseError;
17VALUE rb_cPrismParseWarning;
18VALUE rb_cPrismParseResult;
20ID rb_option_id_filepath;
21ID rb_option_id_encoding;
23ID rb_option_id_frozen_string_literal;
24ID rb_option_id_verbose;
25ID rb_option_id_scopes;
37check_string(
VALUE value) {
49 return RSTRING_PTR(value);
62 pm_string_constant_init(input, RSTRING_PTR(
string), RSTRING_LEN(
string));
75 if (!RB_TYPE_P(scopes,
T_ARRAY)) {
81 pm_options_scopes_init(options, scopes_count);
84 for (
size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
85 VALUE scope = rb_ary_entry(scopes, scope_index);
89 if (!RB_TYPE_P(scope,
T_ARRAY)) {
96 pm_options_scope_init(options_scope, locals_count);
99 for (
size_t local_index = 0; local_index < locals_count; local_index++) {
100 VALUE local = rb_ary_entry(scope, local_index);
110 const char *name = rb_id2name(
SYM2ID(local));
111 pm_string_constant_init(scope_local, name, strlen(name));
124 if (key_id == rb_option_id_filepath) {
125 if (!
NIL_P(value)) pm_options_filepath_set(options, check_string(value));
126 }
else if (key_id == rb_option_id_encoding) {
127 if (!
NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
128 }
else if (key_id == rb_option_id_line) {
129 if (!
NIL_P(value)) pm_options_line_set(options,
NUM2INT(value));
130 }
else if (key_id == rb_option_id_frozen_string_literal) {
131 if (!
NIL_P(value)) pm_options_frozen_string_literal_set(options, value ==
Qtrue);
132 }
else if (key_id == rb_option_id_verbose) {
133 pm_options_suppress_warnings_set(options, value !=
Qtrue);
134 }
else if (key_id == rb_option_id_scopes) {
135 if (!
NIL_P(value)) build_options_scopes(options, value);
137 rb_raise(rb_eArgError,
"unknown keyword: %"PRIsVALUE, key);
158build_options(
VALUE argument) {
170 if (!
NIL_P(keywords)) {
175 rb_protect(build_options, (
VALUE) argument, &state);
178 pm_options_free(options);
183 if (!
NIL_P(filepath)) {
184 if (!RB_TYPE_P(filepath,
T_STRING)) {
185 pm_options_free(options);
189 pm_options_filepath_set(options, RSTRING_PTR(filepath));
202 extract_options(options,
Qnil, keywords);
203 input_load_string(input,
string);
215 extract_options(options, filepath, keywords);
217 if (!pm_string_mapped_init(input, (
const char *) pm_string_source(&options->
filepath))) {
218 pm_options_free(options);
235 if (!pm_buffer_init(&buffer)) {
240 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
243 pm_serialize(&parser, node, &buffer);
245 VALUE result =
rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
246 pm_node_destroy(&parser, node);
247 pm_buffer_free(&buffer);
248 pm_parser_free(&parser);
264 string_options(argc, argv, &input, &options);
266#ifdef PRISM_DEBUG_MODE_BUILD
267 size_t length = pm_string_length(&input);
268 char* dup = malloc(length);
269 memcpy(dup, pm_string_source(&input), length);
270 pm_string_constant_init(&input, dup, length);
273 VALUE value = dump_input(&input, &options);
275#ifdef PRISM_DEBUG_MODE_BUILD
279 pm_string_free(&input);
280 pm_options_free(&options);
296 if (!file_options(argc, argv, &input, &options))
return Qnil;
298 VALUE value = dump_input(&input, &options);
299 pm_string_free(&input);
300 pm_options_free(&options);
314 VALUE comments = rb_ary_new();
317 VALUE location_argv[] = {
320 LONG2FIX(comment->location.end - comment->location.start)
323 VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
336 VALUE magic_comments = rb_ary_new();
339 VALUE key_loc_argv[] = {
345 VALUE value_loc_argv[] = {
351 VALUE magic_comment_argv[] = {
359 return magic_comments;
386 VALUE errors = rb_ary_new();
390 VALUE location_argv[] = {
396 VALUE error_argv[] = {
412 VALUE warnings = rb_ary_new();
416 VALUE location_argv[] = {
422 VALUE warning_argv[] = {
457 VALUE yields = rb_ary_new_capa(2);
458 rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
461 rb_ary_push(parse_lex_data->tokens, yields);
470parse_lex_encoding_changed_callback(
pm_parser_t *parser) {
472 parse_lex_data->encoding = rb_enc_find(parser->
encoding->
name);
478 VALUE tokens = parse_lex_data->tokens;
479 for (
long index = 0; index <
RARRAY_LEN(tokens); index++) {
480 VALUE yields = rb_ary_entry(tokens, index);
481 VALUE token = rb_ary_entry(yields, 0);
484 rb_enc_associate(value, parse_lex_data->encoding);
496 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
497 pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
499 VALUE offsets = rb_ary_new();
505 .tokens = rb_ary_new(),
506 .encoding = rb_utf8_encoding()
511 .
data = (
void *) data,
512 .callback = parse_lex_token,
527 value = rb_ary_new_capa(2);
528 rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
529 rb_ary_push(value, parse_lex_data.tokens);
531 value = parse_lex_data.tokens;
534 VALUE result_argv[] = {
536 parser_comments(&parser, source),
537 parser_magic_comments(&parser, source),
538 parser_data_loc(&parser, source),
539 parser_errors(&parser, parse_lex_data.encoding, source),
540 parser_warnings(&parser, parse_lex_data.encoding, source),
544 pm_node_destroy(&parser, node);
545 pm_parser_free(&parser);
560 string_options(argc, argv, &input, &options);
562 VALUE result = parse_lex_input(&input, &options,
false);
563 pm_string_free(&input);
564 pm_options_free(&options);
580 if (!file_options(argc, argv, &input, &options))
return Qnil;
582 VALUE value = parse_lex_input(&input, &options,
false);
583 pm_string_free(&input);
584 pm_options_free(&options);
599 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
604 VALUE source = pm_source_new(&parser, encoding);
605 VALUE result_argv[] = {
606 pm_ast_new(&parser, node, encoding),
607 parser_comments(&parser, source),
608 parser_magic_comments(&parser, source),
609 parser_data_loc(&parser, source),
610 parser_errors(&parser, encoding, source),
611 parser_warnings(&parser, encoding, source),
617 pm_node_destroy(&parser, node);
618 pm_parser_free(&parser);
647 string_options(argc, argv, &input, &options);
649#ifdef PRISM_DEBUG_MODE_BUILD
650 size_t length = pm_string_length(&input);
651 char* dup = malloc(length);
652 memcpy(dup, pm_string_source(&input), length);
653 pm_string_constant_init(&input, dup, length);
656 VALUE value = parse_input(&input, &options);
658#ifdef PRISM_DEBUG_MODE_BUILD
662 pm_string_free(&input);
663 pm_options_free(&options);
675parse_file(
int argc,
VALUE *argv,
VALUE self) {
678 if (!file_options(argc, argv, &input, &options))
return Qnil;
680 VALUE value = parse_input(&input, &options);
681 pm_string_free(&input);
682 pm_options_free(&options);
693 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
698 VALUE source = pm_source_new(&parser, encoding);
699 VALUE comments = parser_comments(&parser, source);
701 pm_node_destroy(&parser, node);
702 pm_parser_free(&parser);
715parse_comments(
int argc,
VALUE *argv,
VALUE self) {
718 string_options(argc, argv, &input, &options);
720 VALUE result = parse_input_comments(&input, &options);
721 pm_string_free(&input);
722 pm_options_free(&options);
735parse_file_comments(
int argc,
VALUE *argv,
VALUE self) {
738 if (!file_options(argc, argv, &input, &options))
return Qnil;
740 VALUE value = parse_input_comments(&input, &options);
741 pm_string_free(&input);
742 pm_options_free(&options);
765 string_options(argc, argv, &input, &options);
767 VALUE value = parse_lex_input(&input, &options,
true);
768 pm_string_free(&input);
769 pm_options_free(&options);
789parse_lex_file(
int argc,
VALUE *argv,
VALUE self) {
792 if (!file_options(argc, argv, &input, &options))
return Qnil;
794 VALUE value = parse_lex_input(&input, &options,
true);
795 pm_string_free(&input);
796 pm_options_free(&options);
807 pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
810 pm_node_destroy(&parser, node);
813 pm_parser_free(&parser);
826parse_success_p(
int argc,
VALUE *argv,
VALUE self) {
829 string_options(argc, argv, &input, &options);
831 VALUE result = parse_input_success_p(&input, &options);
832 pm_string_free(&input);
833 pm_options_free(&options);
846parse_file_success_p(
int argc,
VALUE *argv,
VALUE self) {
849 if (!file_options(argc, argv, &input, &options))
return Qnil;
851 VALUE result = parse_input_success_p(&input, &options);
852 pm_string_free(&input);
853 pm_options_free(&options);
874 if (!pm_regexp_named_capture_group_names((
const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list,
false,
PM_ENCODING_UTF_8_ENTRY)) {
875 pm_string_list_free(&string_list);
879 VALUE names = rb_ary_new();
880 for (
size_t index = 0; index < string_list.
length; index++) {
882 rb_ary_push(names,
rb_str_new((
const char *) pm_string_source(
string), pm_string_length(
string)));
885 pm_string_list_free(&string_list);
898 size_t length = RSTRING_LEN(
string);
899 pm_parser_init(&parser, (
const uint8_t *) RSTRING_PTR(
string), length, NULL);
903 pm_node_memsize(node, &memsize);
905 pm_node_destroy(&parser, node);
906 pm_parser_free(&parser);
908 VALUE result = rb_hash_new();
909 rb_hash_aset(result,
ID2SYM(rb_intern(
"length")),
INT2FIX(length));
910 rb_hash_aset(result,
ID2SYM(rb_intern(
"memsize")),
INT2FIX(memsize.memsize));
911 rb_hash_aset(result,
ID2SYM(rb_intern(
"node_count")),
INT2FIX(memsize.node_count));
926 const char *checked = check_string(filepath);
927 if (!pm_string_mapped_init(&input, checked))
return Qnil;
930 pm_options_filepath_set(&options, checked);
933 pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options);
936 pm_node_destroy(&parser, node);
937 pm_parser_free(&parser);
938 pm_options_free(&options);
939 pm_string_free(&input);
954 input_load_string(&input, source);
957 pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
962 pm_prettyprint(&buffer, &parser, node);
965 VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
967 pm_buffer_free(&buffer);
968 pm_node_destroy(&parser, node);
969 pm_parser_free(&parser);
981RUBY_FUNC_EXPORTED
void
985 if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
988 "The prism library version (%s) does not match the expected version (%s)",
990 EXPECTED_PRISM_VERSION
1014 rb_option_id_frozen_string_literal =
rb_intern_const(
"frozen_string_literal");
1053 Init_prism_api_node();
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
VALUE rb_define_module(const char *name)
Defines a top-level module.
VALUE rb_define_module_under(VALUE outer, const char *name)
Defines a module under the namespace of outer.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define T_STRING
Old name of RUBY_T_STRING.
#define INT2FIX
Old name of RB_INT2FIX.
#define ID2SYM
Old name of RB_ID2SYM.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
#define SYM2ID
Old name of RB_SYM2ID.
#define LONG2FIX
Old name of RB_INT2FIX.
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define NIL_P
Old name of RB_NIL_P.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
VALUE rb_eNoMemError
NoMemoryError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
#define RARRAY_LEN
Just another name of rb_array_len
We need a struct here to pass through rb_protect and it has to be a single value.
This struct gets stored in the parser and passed in to the lex callback any time a new token is found...
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
This struct represents a diagnostic generated during parsing.
pm_location_t location
The location of the diagnostic in the source.
const char * message
The message associated with the diagnostic.
pm_list_node_t node
The embedded base node.
const char * name
The name of the encoding.
When you are lexing through a file, the lexer needs all of the information that the parser additional...
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
struct pm_list_node * next
A pointer to the next node in the list.
pm_list_node_t * head
A pointer to the head of the list.
size_t size
The size of the list.
const uint8_t * start
A pointer to the start location of the range in the source.
const uint8_t * end
A pointer to the end location of the range in the source.
This struct stores the information gathered by the pm_node_memsize function.
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This is the base structure that represents a node in the syntax tree.
A scope of locals surrounding the code that is being parsed.
pm_string_t * locals
The names of the locals in the scope.
The options that can be passed to the parser.
pm_options_scope_t * scopes
The scopes surrounding the code that is being parsed.
int32_t line
The line within the file that the parse starts on.
pm_string_t filepath
The name of the file that is currently being parsed.
This struct represents the overall parser.
pm_lex_state_t lex_state
The current state of the lexer.
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
pm_location_t data_loc
The optional location of the END keyword and its contents.
const uint8_t * start
The pointer to the start of the source.
pm_list_t error_list
The list of errors that have been found while parsing.
pm_list_t warning_list
The list of warnings that have been found while parsing.
int32_t start_line
The line number at the start of the parse.
pm_list_t comment_list
The list of comments that have been found while parsing.
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
pm_string_t * strings
A pointer to the start of the string list.
size_t length
The length of the string list.
A generic string type that can have various ownership semantics.
This struct represents a token in the Ruby source.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.