14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/numeric.h"
35#include "internal/object.h"
36#include "internal/proc.h"
37#include "internal/re.h"
38#include "internal/sanitizers.h"
39#include "internal/string.h"
40#include "internal/transcode.h"
45#include "ruby_assert.h"
48#if defined HAVE_CRYPT_R
49# if defined HAVE_CRYPT_H
52#elif !defined HAVE_CRYPT
53# include "missing/crypt.h"
54# define HAVE_CRYPT_R 1
57#define BEG(no) (regs->beg[(no)])
58#define END(no) (regs->end[(no)])
61#undef rb_usascii_str_new
65#undef rb_usascii_str_new_cstr
66#undef rb_utf8_str_new_cstr
67#undef rb_enc_str_new_cstr
68#undef rb_external_str_new_cstr
69#undef rb_locale_str_new_cstr
70#undef rb_str_dup_frozen
71#undef rb_str_buf_new_cstr
101#define RUBY_MAX_CHAR_LEN 16
102#define STR_SHARED_ROOT FL_USER5
103#define STR_BORROWED FL_USER6
104#define STR_TMPLOCK FL_USER7
105#define STR_NOFREE FL_USER18
106#define STR_FAKESTR FL_USER19
108#define STR_SET_NOEMBED(str) do {\
109 FL_SET((str), STR_NOEMBED);\
110 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
112#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
114#define STR_SET_LEN(str, n) do { \
115 RSTRING(str)->len = (n); \
119str_enc_fastpath(
VALUE str)
123 case ENCINDEX_ASCII_8BIT:
125 case ENCINDEX_US_ASCII:
132#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
133#define TERM_FILL(ptr, termlen) do {\
134 char *const term_fill_ptr = (ptr);\
135 const int term_fill_len = (termlen);\
136 *term_fill_ptr = '\0';\
137 if (UNLIKELY(term_fill_len > 1))\
138 memset(term_fill_ptr, 0, term_fill_len);\
141#define RESIZE_CAPA(str,capacity) do {\
142 const int termlen = TERM_LEN(str);\
143 RESIZE_CAPA_TERM(str,capacity,termlen);\
145#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
146 if (STR_EMBED_P(str)) {\
147 if (str_embed_capa(str) < capacity + termlen) {\
148 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
149 const long tlen = RSTRING_LEN(str);\
150 memcpy(tmp, RSTRING_PTR(str), tlen);\
151 RSTRING(str)->as.heap.ptr = tmp;\
152 RSTRING(str)->len = tlen;\
153 STR_SET_NOEMBED(str);\
154 RSTRING(str)->as.heap.aux.capa = (capacity);\
158 assert(!FL_TEST((str), STR_SHARED)); \
159 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
160 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
161 RSTRING(str)->as.heap.aux.capa = (capacity);\
165#define STR_SET_SHARED(str, shared_str) do { \
166 if (!FL_TEST(str, STR_FAKESTR)) { \
167 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
168 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
169 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
170 FL_SET((str), STR_SHARED); \
171 FL_SET((shared_str), STR_SHARED_ROOT); \
172 if (RBASIC_CLASS((shared_str)) == 0) \
173 FL_SET_RAW((shared_str), STR_BORROWED); \
177#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
178#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
181#define STR_ENC_GET(str) get_encoding(str)
183#if !defined SHARABLE_MIDDLE_SUBSTRING
184# define SHARABLE_MIDDLE_SUBSTRING 0
186#if !SHARABLE_MIDDLE_SUBSTRING
187#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
189#define SHARABLE_SUBSTRING_P(beg, len, end) 1
194str_embed_capa(
VALUE str)
196 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
200rb_str_reembeddable_p(
VALUE str)
202 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
206rb_str_embed_size(
long capa)
212rb_str_size_as_embedded(
VALUE str)
215 if (STR_EMBED_P(str)) {
216 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
220 else if (rb_str_reembeddable_p(str)) {
221 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
224 real_size =
sizeof(
struct RString);
230STR_EMBEDDABLE_P(
long len,
long termlen)
232 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
237static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
238static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
240static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
241static inline void str_modifiable(
VALUE str);
245str_make_independent(
VALUE str)
247 long len = RSTRING_LEN(str);
248 int termlen = TERM_LEN(str);
249 str_make_independent_expand((str),
len, 0L, termlen);
252static inline int str_dependent_p(
VALUE str);
255rb_str_make_independent(
VALUE str)
257 if (str_dependent_p(str)) {
258 str_make_independent(str);
263rb_str_make_embedded(
VALUE str)
268 char *buf =
RSTRING(str)->as.heap.ptr;
272 STR_SET_LEN(str,
len);
275 memcpy(RSTRING_PTR(str), buf,
len);
279 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
283rb_debug_rstring_null_ptr(
const char *func)
285 fprintf(stderr,
"%s is returning NULL!! "
286 "SIGSEGV is highly expected to follow immediately.\n"
287 "If you could reproduce, attach your debugger here, "
288 "and look at the passed string.\n",
293static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
296get_encoding(
VALUE str)
302mustnot_broken(
VALUE str)
304 if (is_broken_string(str)) {
305 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
310mustnot_wchar(
VALUE str)
314 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
320static VALUE register_fstring(
VALUE str,
bool copy);
327#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
335fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
345 if (rb_objspace_garbage_object_p(str)) {
357 rb_enc_copy(new_str, str);
370 if (STR_SHARED_P(str)) {
372 str_make_independent(str);
375 if (!BARE_STRING_P(str)) {
379 RBASIC(str)->flags |= RSTRING_FSTR;
381 *key = *value = arg->fstr = str;
395 if (
FL_TEST(str, RSTRING_FSTR))
398 bare = BARE_STRING_P(str);
400 if (STR_EMBED_P(str)) {
405 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
412 rb_str_resize(str, RSTRING_LEN(str));
414 fstr = register_fstring(str, FALSE);
417 str_replace_shared_without_enc(str, fstr);
425register_fstring(
VALUE str,
bool copy)
432 st_table *frozen_strings = rb_vm_fstring_table();
435 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
436 }
while (UNDEF_P(args.fstr));
448setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
464 return (
VALUE)fake_str;
473 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
482rb_fstring_new(
const char *ptr,
long len)
485 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
492 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
496rb_fstring_cstr(
const char *
ptr)
498 return rb_fstring_new(
ptr, strlen(
ptr));
502fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
512 const char *aptr, *bptr;
515 return (alen != blen ||
517 memcmp(aptr, bptr, alen) != 0);
521single_byte_optimizable(
VALUE str)
529 enc = STR_ENC_GET(str);
540static inline const char *
541search_nonascii(
const char *p,
const char *e)
543 const uintptr_t *s, *t;
545#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
546# if SIZEOF_UINTPTR_T == 8
547# define NONASCII_MASK UINT64_C(0x8080808080808080)
548# elif SIZEOF_UINTPTR_T == 4
549# define NONASCII_MASK UINT32_C(0x80808080)
551# error "don't know what to do."
554# if SIZEOF_UINTPTR_T == 8
555# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
556# elif SIZEOF_UINTPTR_T == 4
557# define NONASCII_MASK 0x80808080UL
559# error "don't know what to do."
563 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
564#if !UNALIGNED_WORD_ACCESS
565 if ((uintptr_t)p % SIZEOF_VOIDP) {
566 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
571 case 7:
if (p[-7]&0x80)
return p-7;
572 case 6:
if (p[-6]&0x80)
return p-6;
573 case 5:
if (p[-5]&0x80)
return p-5;
574 case 4:
if (p[-4]&0x80)
return p-4;
576 case 3:
if (p[-3]&0x80)
return p-3;
577 case 2:
if (p[-2]&0x80)
return p-2;
578 case 1:
if (p[-1]&0x80)
return p-1;
583#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
584#define aligned_ptr(value) \
585 __builtin_assume_aligned((value), sizeof(uintptr_t))
587#define aligned_ptr(value) (uintptr_t *)(value)
590 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
593 if (*s & NONASCII_MASK) {
594#ifdef WORDS_BIGENDIAN
595 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
597 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
607 case 7:
if (e[-7]&0x80)
return e-7;
608 case 6:
if (e[-6]&0x80)
return e-6;
609 case 5:
if (e[-5]&0x80)
return e-5;
610 case 4:
if (e[-4]&0x80)
return e-4;
612 case 3:
if (e[-3]&0x80)
return e-3;
613 case 2:
if (e[-2]&0x80)
return e-2;
614 case 1:
if (e[-1]&0x80)
return e-1;
622 const char *e = p +
len;
624 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
626 p = search_nonascii(p, e);
630 if (rb_enc_asciicompat(enc)) {
631 p = search_nonascii(p, e);
634 int ret = rb_enc_precise_mbclen(p, e, enc);
638 p = search_nonascii(p, e);
644 int ret = rb_enc_precise_mbclen(p, e, enc);
660 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
663 p = search_nonascii(p, e);
667 else if (rb_enc_asciicompat(enc)) {
668 p = search_nonascii(p, e);
674 int ret = rb_enc_precise_mbclen(p, e, enc);
681 p = search_nonascii(p, e);
687 int ret = rb_enc_precise_mbclen(p, e, enc);
712 rb_enc_set_index(str1, rb_enc_get_index(str2));
720rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
725 str_enc_copy(dest, src);
726 if (RSTRING_LEN(dest) == 0) {
727 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
738 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
739 search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
750rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
752 str_enc_copy(dest, src);
759 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
765 return enc_coderange_scan(str, enc);
774 cr = enc_coderange_scan(str, get_encoding(str));
785 if (!rb_enc_asciicompat(enc))
787 else if (is_ascii_string(str))
793str_mod_check(
VALUE s,
const char *p,
long len)
795 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
801str_capacity(
VALUE str,
const int termlen)
803 if (STR_EMBED_P(str)) {
804 return str_embed_capa(str) - termlen;
806 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
810 return RSTRING(str)->as.heap.aux.capa;
817 return str_capacity(str, TERM_LEN(str));
821must_not_null(
const char *
ptr)
824 rb_raise(rb_eArgError,
"NULL pointer given");
831 size_t size = rb_str_embed_size(
capa);
833 assert(rb_gc_size_allocatable_p(size));
842str_alloc_heap(
VALUE klass)
851empty_str_alloc(
VALUE klass)
853 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
854 VALUE str = str_alloc_embed(klass, 0);
855 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
860str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
865 rb_raise(rb_eArgError,
"negative string size (or size too big)");
868 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
870 if (STR_EMBEDDABLE_P(
len, termlen)) {
871 str = str_alloc_embed(klass,
len + termlen);
877 str = str_alloc_heap(klass);
883 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
886 memcpy(RSTRING_PTR(str),
ptr,
len);
888 STR_SET_LEN(str,
len);
889 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
896 return str_new0(klass,
ptr,
len, 1);
917 rb_enc_associate_index(str, rb_utf8_encindex());
929 rb_enc_associate(str, enc);
941 __msan_unpoison_string(
ptr);
957 rb_enc_associate_index(str, rb_utf8_encindex());
966 rb_raise(rb_eArgError,
"wchar encoding given");
968 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
972str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
977 rb_raise(rb_eArgError,
"negative string size (or size too big)");
981 rb_encoding *enc = rb_enc_get_from_index(encindex);
985 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
986 str = str_alloc_heap(klass);
990 RBASIC(str)->flags |= STR_NOFREE;
992 rb_enc_associate_index(str, encindex);
1020static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1022 int ecflags,
VALUE ecopts);
1027 int encidx = rb_enc_to_index(enc);
1028 if (rb_enc_get_index(str) == encidx)
1029 return is_ascii_string(str);
1040 if (!to)
return str;
1041 if (!from) from = rb_enc_get(str);
1042 if (from == to)
return str;
1043 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1044 rb_is_ascii8bit_enc(to)) {
1045 if (STR_ENC_GET(str) != to) {
1046 str = rb_str_dup(str);
1047 rb_enc_associate(str, to);
1053 newstr = str_cat_conv_enc_opts(rb_str_buf_new(
len), 0,
ptr,
len,
1054 from, to, ecflags, ecopts);
1055 if (
NIL_P(newstr)) {
1063rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1068 olen = RSTRING_LEN(newstr);
1069 if (ofs < -olen || olen < ofs)
1071 if (ofs < 0) ofs += olen;
1073 STR_SET_LEN(newstr, ofs);
1074 return rb_str_cat(newstr,
ptr,
len);
1077 rb_str_modify(newstr);
1078 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1086 STR_SET_LEN(str, 0);
1087 rb_enc_associate(str, enc);
1088 rb_str_cat(str,
ptr,
len);
1093str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1095 int ecflags,
VALUE ecopts)
1100 VALUE econv_wrapper;
1101 const unsigned char *start, *sp;
1102 unsigned char *dest, *dp;
1103 size_t converted_output = (size_t)ofs;
1108 RBASIC_CLEAR_CLASS(econv_wrapper);
1110 if (!ec)
return Qnil;
1113 sp = (
unsigned char*)
ptr;
1115 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1116 (dp = dest + converted_output),
1120 size_t converted_input = sp - start;
1121 size_t rest =
len - converted_input;
1122 converted_output = dp - dest;
1123 rb_str_set_len(newstr, converted_output);
1124 if (converted_input && converted_output &&
1125 rest < (LONG_MAX / converted_output)) {
1126 rest = (rest * converted_output) / converted_input;
1131 olen += rest < 2 ? 2 : rest;
1132 rb_str_resize(newstr, olen);
1138 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1139 rb_str_set_len(newstr,
len);
1140 rb_enc_associate(newstr, to);
1159 const int eidx = rb_enc_to_index(eenc);
1162 return rb_enc_str_new(
ptr,
len, eenc);
1166 if ((eidx == rb_ascii8bit_encindex()) ||
1167 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1171 ienc = rb_default_internal_encoding();
1172 if (!ienc || eenc == ienc) {
1173 return rb_enc_str_new(
ptr,
len, eenc);
1177 if ((eidx == rb_ascii8bit_encindex()) ||
1178 (eidx == rb_usascii_encindex()) ||
1179 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1180 return rb_enc_str_new(
ptr,
len, ienc);
1183 str = rb_enc_str_new(NULL, 0, ienc);
1186 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1187 rb_str_initialize(str,
ptr,
len, eenc);
1195 int eidx = rb_enc_to_index(eenc);
1196 if (eidx == rb_usascii_encindex() &&
1197 !is_ascii_string(str)) {
1198 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1201 rb_enc_associate_index(str, eidx);
1236rb_filesystem_str_new_cstr(
const char *
ptr)
1260str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1262 const int termlen = TERM_LEN(str);
1267 if (str_embed_capa(str2) >=
len + termlen) {
1268 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1269 STR_SET_EMBED(str2);
1270 memcpy(ptr2, RSTRING_PTR(str),
len);
1271 TERM_FILL(ptr2+
len, termlen);
1275 if (STR_SHARED_P(str)) {
1276 root =
RSTRING(str)->as.heap.aux.shared;
1280 root = rb_str_new_frozen(str);
1284 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1286 rb_fatal(
"about to free a possible shared root");
1288 char *ptr2 = STR_HEAP_PTR(str2);
1290 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1293 FL_SET(str2, STR_NOEMBED);
1295 STR_SET_SHARED(str2, root);
1298 STR_SET_LEN(str2,
len);
1306 str_replace_shared_without_enc(str2, str);
1307 rb_enc_cr_str_exact_copy(str2, str);
1314 return str_replace_shared(str_alloc_heap(klass), str);
1331rb_str_new_frozen_String(
VALUE orig)
1338rb_str_tmp_frozen_acquire(
VALUE orig)
1341 return str_new_frozen_buffer(0, orig, FALSE);
1345rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1347 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1348 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1350 VALUE str = str_alloc_heap(0);
1353 FL_SET(str, STR_SHARED_ROOT);
1355 size_t capa = str_capacity(orig, TERM_LEN(orig));
1361 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1362 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1369 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1370 RBASIC(orig)->flags &= ~STR_NOFREE;
1371 STR_SET_SHARED(orig, str);
1381rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1386 if (STR_EMBED_P(tmp)) {
1395 assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1399 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1400 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1405 STR_SET_LEN(tmp, 0);
1413 return str_new_frozen_buffer(klass, orig, TRUE);
1419 assert(!STR_EMBED_P(orig));
1420 assert(!STR_SHARED_P(orig));
1422 VALUE str = str_alloc_heap(klass);
1423 STR_SET_LEN(str, RSTRING_LEN(orig));
1424 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1425 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1426 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1427 RBASIC(orig)->flags &= ~STR_NOFREE;
1428 STR_SET_SHARED(orig, str);
1435str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1439 long len = RSTRING_LEN(orig);
1440 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1442 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1443 str = str_new0(klass, RSTRING_PTR(orig),
len, termlen);
1444 assert(STR_EMBED_P(str));
1449 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1450 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1453 assert(ofs + rest <= RSTRING_LEN(
shared));
1456 if ((ofs > 0) || (rest > 0) ||
1459 str = str_new_shared(klass,
shared);
1460 assert(!STR_EMBED_P(str));
1461 RSTRING(str)->as.heap.ptr += ofs;
1462 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1470 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1471 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1473 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1474 STR_SET_LEN(str, RSTRING_LEN(orig));
1475 TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
1478 str = heap_str_make_shared(klass, orig);
1482 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1494str_new_empty_String(
VALUE str)
1497 rb_enc_copy(v, str);
1501#define STR_BUF_MIN_SIZE 63
1506 if (STR_EMBEDDABLE_P(
capa, 1)) {
1514 RSTRING(str)->as.heap.ptr[0] =
'\0';
1525 str = rb_str_buf_new(
len);
1534 return str_new(0, 0,
len);
1540 if (
FL_TEST(str, RSTRING_FSTR)) {
1541 st_data_t fstr = (st_data_t)str;
1545 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1546 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1551 if (STR_EMBED_P(str)) {
1552 RB_DEBUG_COUNTER_INC(obj_str_embed);
1554 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1555 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1556 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1559 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1560 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1564RUBY_FUNC_EXPORTED
size_t
1565rb_str_memsize(
VALUE str)
1567 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1568 return STR_HEAP_SIZE(str);
1578 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1581static inline void str_discard(
VALUE str);
1582static void str_shared_replace(
VALUE str,
VALUE str2);
1587 if (str != str2) str_shared_replace(str, str2);
1598 enc = STR_ENC_GET(str2);
1603 STR_SET_LEN(str, RSTRING_LEN(str2));
1605 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1607 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1608 rb_enc_associate(str, enc);
1612 if (STR_EMBED_P(str2)) {
1613 assert(!
FL_TEST(str2, STR_SHARED));
1614 long len = RSTRING_LEN(str2);
1615 assert(
len + termlen <= str_embed_capa(str2));
1617 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1618 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1619 RSTRING(str2)->as.heap.ptr = new_ptr;
1620 STR_SET_LEN(str2,
len);
1622 STR_SET_NOEMBED(str2);
1625 STR_SET_NOEMBED(str);
1627 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1629 if (
FL_TEST(str2, STR_SHARED)) {
1631 STR_SET_SHARED(str,
shared);
1634 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1638 STR_SET_EMBED(str2);
1639 RSTRING_PTR(str2)[0] = 0;
1640 STR_SET_LEN(str2, 0);
1641 rb_enc_associate(str, enc);
1655 return rb_obj_as_string_result(str, obj);
1671 len = RSTRING_LEN(str2);
1672 if (STR_SHARED_P(str2)) {
1675 STR_SET_NOEMBED(str);
1676 STR_SET_LEN(str,
len);
1677 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1678 STR_SET_SHARED(str,
shared);
1679 rb_enc_cr_str_exact_copy(str, str2);
1682 str_replace_shared(str, str2);
1691 size_t size = rb_str_embed_size(
capa);
1693 assert(rb_gc_size_allocatable_p(size));
1713 const VALUE flag_mask =
1719 if (STR_EMBED_P(str)) {
1720 long len = RSTRING_LEN(str);
1722 assert(STR_EMBED_P(dup));
1723 assert(str_embed_capa(dup) >=
len + 1);
1729 root =
RSTRING(str)->as.heap.aux.shared;
1731 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1732 root = str = str_new_frozen(klass, str);
1735 assert(!STR_SHARED_P(root));
1738 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1739 FL_SET(root, STR_SHARED_ROOT);
1741 flags |= RSTRING_NOEMBED | STR_SHARED;
1744 STR_SET_LEN(dup, RSTRING_LEN(str));
1747 encidx = rb_enc_get_index(str);
1748 flags &= ~ENCODING_MASK;
1751 if (encidx) rb_enc_associate_index(dup, encidx);
1759 if (STR_EMBED_P(str)) {
1760 dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
1763 dup = ec_str_alloc_heap(ec, klass);
1766 return str_duplicate_setup(klass, str, dup);
1773 if (STR_EMBED_P(str)) {
1774 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1777 dup = str_alloc_heap(klass);
1780 return str_duplicate_setup(klass, str, dup);
1791rb_str_dup_m(
VALUE str)
1793 if (LIKELY(BARE_STRING_P(str))) {
1804 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1811 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1812 return ec_str_duplicate(ec,
rb_cString, str);
1827 static ID keyword_ids[2];
1828 VALUE orig, opt, venc, vcapa;
1833 if (!keyword_ids[0]) {
1834 keyword_ids[0] = rb_id_encoding();
1835 CONST_ID(keyword_ids[1],
"capacity");
1843 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1844 enc = rb_to_encoding(venc);
1846 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1851 if (
capa < STR_BUF_MIN_SIZE) {
1852 capa = STR_BUF_MIN_SIZE;
1856 len = RSTRING_LEN(orig);
1860 if (orig == str) n = 0;
1862 str_modifiable(str);
1863 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1865 const size_t size = (size_t)
capa + termlen;
1866 const char *
const old_ptr = RSTRING_PTR(str);
1867 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
1868 char *new_ptr =
ALLOC_N(
char, size);
1869 if (STR_EMBED_P(str))
RUBY_ASSERT(osize <= str_embed_capa(str));
1870 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1872 RSTRING(str)->as.heap.ptr = new_ptr;
1874 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1875 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1876 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1878 STR_SET_LEN(str,
len);
1881 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
1882 rb_enc_cr_str_exact_copy(str, orig);
1884 FL_SET(str, STR_NOEMBED);
1891 rb_enc_associate(str, enc);
1903rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
1909 static ID keyword_ids[2];
1919 keyword_ids[0] = rb_id_encoding();
1920 CONST_ID(keyword_ids[1],
"capacity");
1922 encoding = kwargs[0];
1923 capacity = kwargs[1];
1934 if (UNDEF_P(encoding)) {
1936 encoding = rb_obj_encoding(orig);
1940 if (!UNDEF_P(encoding)) {
1941 enc = rb_to_encoding(encoding);
1946 if (UNDEF_P(capacity)) {
1948 VALUE empty_str = str_new(klass,
"", 0);
1950 rb_enc_associate(empty_str, enc);
1954 VALUE copy = str_duplicate(klass, orig);
1955 rb_enc_associate(copy, enc);
1968 if (orig_capa >
capa) {
1973 long fake_len =
capa - termlen;
1978 VALUE str = str_new0(klass, NULL, fake_len, termlen);
1979 STR_SET_LEN(str, 0);
1980 TERM_FILL(RSTRING_PTR(str), termlen);
1983 rb_enc_associate(str, enc);
1987 rb_str_buf_append(str, orig);
1994#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2009static inline uintptr_t
2010count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2015 d = (d>>6) | (~d>>7);
2016 d &= NONASCII_MASK >> 7;
2019#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2021 return rb_popcount_intptr(d);
2025# if SIZEOF_VOIDP == 8
2034enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2040 long diff = (long)(e - p);
2046 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2047 const uintptr_t *s, *t;
2048 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2049 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2050 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2051 while (p < (
const char *)s) {
2052 if (is_utf8_lead_byte(*p))
len++;
2056 len += count_utf8_lead_bytes_with_word(s);
2059 p = (
const char *)s;
2062 if (is_utf8_lead_byte(*p))
len++;
2068 else if (rb_enc_asciicompat(enc)) {
2073 q = search_nonascii(p, e);
2079 p += rb_enc_fast_mbclen(p, e, enc);
2086 q = search_nonascii(p, e);
2092 p += rb_enc_mbclen(p, e, enc);
2099 for (c=0; p<e; c++) {
2100 p += rb_enc_mbclen(p, e, enc);
2115rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2123 long diff = (long)(e - p);
2126 else if (rb_enc_asciicompat(enc)) {
2130 q = search_nonascii(p, e);
2138 ret = rb_enc_precise_mbclen(p, e, enc);
2153 for (c=0; p<e; c++) {
2154 ret = rb_enc_precise_mbclen(p, e, enc);
2178 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2179 if (!enc) enc = STR_ENC_GET(str);
2180 p = RSTRING_PTR(str);
2181 e = RSTRING_END(str);
2185 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2190 return enc_strlen(p, e, enc, cr);
2197 return str_strlen(str, NULL);
2211 return LONG2NUM(str_strlen(str, NULL));
2223rb_str_bytesize(
VALUE str)
2241rb_str_empty(
VALUE str)
2243 return RBOOL(RSTRING_LEN(str) == 0);
2261 char *ptr1, *ptr2, *ptr3;
2266 enc = rb_enc_check_str(str1, str2);
2270 if (len1 > LONG_MAX - len2) {
2271 rb_raise(rb_eArgError,
"string size too big");
2273 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2274 ptr3 = RSTRING_PTR(str3);
2275 memcpy(ptr3, ptr1, len1);
2276 memcpy(ptr3+len1, ptr2, len2);
2277 TERM_FILL(&ptr3[len1+len2], termlen);
2293 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2296 int enc1 = rb_enc_get_index(str1);
2297 int enc2 = rb_enc_get_index(str2);
2302 else if (enc2 < 0) {
2305 else if (enc1 != enc2) {
2308 else if (len1 > LONG_MAX - len2) {
2341 rb_enc_copy(str2, str);
2346 rb_raise(rb_eArgError,
"negative argument");
2348 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2349 if (STR_EMBEDDABLE_P(
len, 1)) {
2351 memset(RSTRING_PTR(str2), 0,
len + 1);
2358 STR_SET_LEN(str2,
len);
2359 rb_enc_copy(str2, str);
2362 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2363 rb_raise(rb_eArgError,
"argument too big");
2366 len *= RSTRING_LEN(str);
2367 termlen = TERM_LEN(str);
2369 ptr2 = RSTRING_PTR(str2);
2371 n = RSTRING_LEN(str);
2372 memcpy(ptr2, RSTRING_PTR(str), n);
2373 while (n <=
len/2) {
2374 memcpy(ptr2 + n, ptr2, n);
2377 memcpy(ptr2 + n, ptr2,
len-n);
2379 STR_SET_LEN(str2,
len);
2380 TERM_FILL(&ptr2[
len], termlen);
2381 rb_enc_cr_str_copy_for_substr(str2, str);
2407 VALUE tmp = rb_check_array_type(arg);
2416rb_check_lockedtmp(
VALUE str)
2418 if (
FL_TEST(str, STR_TMPLOCK)) {
2424str_modifiable(
VALUE str)
2426 rb_check_lockedtmp(str);
2431str_dependent_p(
VALUE str)
2433 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2442str_independent(
VALUE str)
2444 str_modifiable(str);
2445 return !str_dependent_p(str);
2449str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2457 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2462 STR_SET_LEN(str,
len);
2467 oldptr = RSTRING_PTR(str);
2469 memcpy(
ptr, oldptr,
len);
2471 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2474 STR_SET_NOEMBED(str);
2475 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2476 TERM_FILL(
ptr +
len, termlen);
2478 STR_SET_LEN(str,
len);
2485 if (!str_independent(str))
2486 str_make_independent(str);
2493 int termlen = TERM_LEN(str);
2494 long len = RSTRING_LEN(str);
2497 rb_raise(rb_eArgError,
"negative expanding string size");
2499 if (expand >= LONG_MAX -
len) {
2500 rb_raise(rb_eArgError,
"string size too big");
2503 if (!str_independent(str)) {
2504 str_make_independent_expand(str,
len, expand, termlen);
2506 else if (expand > 0) {
2507 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2514str_modify_keep_cr(
VALUE str)
2516 if (!str_independent(str))
2517 str_make_independent(str);
2524str_discard(
VALUE str)
2526 str_modifiable(str);
2527 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2528 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2529 RSTRING(str)->as.heap.ptr = 0;
2530 STR_SET_LEN(str, 0);
2541 if (!rb_enc_asciicompat(enc)) {
2561 return RSTRING_PTR(str);
2565zero_filled(
const char *s,
int n)
2567 for (; n > 0; --n) {
2574str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2576 const char *e = s +
len;
2578 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2579 if (zero_filled(s, minlen))
return s;
2585str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2590 if (str_dependent_p(str)) {
2591 if (!zero_filled(s +
len, termlen))
2592 str_make_independent_expand(str,
len, 0L, termlen);
2595 TERM_FILL(s +
len, termlen);
2598 return RSTRING_PTR(str);
2602rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2604 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2605 long len = RSTRING_LEN(str);
2609 rb_check_lockedtmp(str);
2610 str_make_independent_expand(str,
len, 0L, termlen);
2612 else if (str_dependent_p(str)) {
2613 if (termlen > oldtermlen)
2614 str_make_independent_expand(str,
len, 0L, termlen);
2617 if (!STR_EMBED_P(str)) {
2619 assert(!
FL_TEST((str), STR_SHARED));
2622 if (termlen > oldtermlen) {
2623 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2631str_null_check(
VALUE str,
int *w)
2633 char *s = RSTRING_PTR(str);
2634 long len = RSTRING_LEN(str);
2640 if (str_null_char(s,
len, minlen, enc)) {
2643 return str_fill_term(str, s,
len, minlen);
2646 if (!s || memchr(s, 0,
len)) {
2650 s = str_fill_term(str, s,
len, minlen);
2656rb_str_to_cstr(
VALUE str)
2659 return str_null_check(str, &w);
2667 char *s = str_null_check(str, &w);
2670 rb_raise(rb_eArgError,
"string contains null char");
2672 rb_raise(rb_eArgError,
"string contains null byte");
2678rb_str_fill_terminator(
VALUE str,
const int newminlen)
2680 char *s = RSTRING_PTR(str);
2681 long len = RSTRING_LEN(str);
2682 return str_fill_term(str, s,
len, newminlen);
2688 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2712str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2721 else if (rb_enc_asciicompat(enc)) {
2722 const char *p2, *e2;
2725 while (p < e && 0 < nth) {
2732 p2 = search_nonascii(p, e2);
2741 n = rb_enc_mbclen(p, e, enc);
2752 while (p < e && nth--) {
2753 p += rb_enc_mbclen(p, e, enc);
2764 return str_nth_len(p, e, &nth, enc);
2768str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2773 p = str_nth_len(p, e, &nth, enc);
2782str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2784 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2785 if (!pp)
return e - p;
2792 return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2793 STR_ENC_GET(str), single_byte_optimizable(str));
2798str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2801 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2802 const uintptr_t *s, *t;
2803 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2804 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2805 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2806 while (p < (
const char *)s) {
2807 if (is_utf8_lead_byte(*p)) nth--;
2811 nth -= count_utf8_lead_bytes_with_word(s);
2813 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2817 if (is_utf8_lead_byte(*p)) {
2818 if (nth == 0)
break;
2828str_utf8_offset(
const char *p,
const char *e,
long nth)
2830 const char *pp = str_utf8_nth(p, e, &nth);
2839 if (single_byte_optimizable(str) || pos < 0)
2842 char *p = RSTRING_PTR(str);
2843 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2848str_subseq(
VALUE str,
long beg,
long len)
2854 assert(beg+
len <= RSTRING_LEN(str));
2856 const int termlen = TERM_LEN(str);
2857 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
2864 if (str_embed_capa(str2) >=
len + termlen) {
2865 char *ptr2 =
RSTRING(str2)->as.embed.ary;
2866 STR_SET_EMBED(str2);
2867 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
2868 TERM_FILL(ptr2+
len, termlen);
2870 STR_SET_LEN(str2,
len);
2874 str_replace_shared(str2, str);
2875 assert(!STR_EMBED_P(str2));
2877 RSTRING(str2)->as.heap.ptr += beg;
2878 if (RSTRING_LEN(str2) >
len) {
2879 STR_SET_LEN(str2,
len);
2889 VALUE str2 = str_subseq(str, beg,
len);
2890 rb_enc_cr_str_copy_for_substr(str2, str);
2899 long blen = RSTRING_LEN(str);
2901 char *p, *s = RSTRING_PTR(str), *e = s + blen;
2903 if (
len < 0)
return 0;
2907 if (single_byte_optimizable(str)) {
2908 if (beg > blen)
return 0;
2911 if (beg < 0)
return 0;
2913 if (
len > blen - beg)
2915 if (
len < 0)
return 0;
2920 if (
len > -beg)
len = -beg;
2923 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
2926 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
2932 slen = str_strlen(str, enc);
2934 if (beg < 0)
return 0;
2936 if (
len == 0)
goto end;
2939 else if (beg > 0 && beg > RSTRING_LEN(str)) {
2943 if (beg > str_strlen(str, enc))
return 0;
2948 enc == rb_utf8_encoding()) {
2949 p = str_utf8_nth(s, e, &beg);
2950 if (beg > 0)
return 0;
2951 len = str_utf8_offset(p, e,
len);
2957 p = s + beg * char_sz;
2961 else if (
len * char_sz > e - p)
2966 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2967 if (beg > 0)
return 0;
2971 len = str_offset(p, e,
len, enc, 0);
2979static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2984 return str_substr(str, beg,
len, TRUE);
2988str_substr(
VALUE str,
long beg,
long len,
int empty)
2992 if (!p)
return Qnil;
2993 if (!
len && !empty)
return Qnil;
2995 beg = p - RSTRING_PTR(str);
2997 VALUE str2 = str_subseq(str, beg,
len);
2998 rb_enc_cr_str_copy_for_substr(str2, str);
3007 rb_str_resize(str, RSTRING_LEN(str));
3008 return rb_obj_freeze(str);
3024 return rb_str_dup(str);
3053str_uminus(
VALUE str)
3056 str = rb_str_dup(str);
3058 return rb_fstring(str);
3062#define rb_str_dup_frozen rb_str_new_frozen
3067 if (
FL_TEST(str, STR_TMPLOCK)) {
3070 FL_SET(str, STR_TMPLOCK);
3077 if (!
FL_TEST(str, STR_TMPLOCK)) {
3084RUBY_FUNC_EXPORTED
VALUE
3095 const int termlen = TERM_LEN(str);
3097 str_modifiable(str);
3098 if (STR_SHARED_P(str)) {
3101 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3102 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3109 else if (
len > RSTRING_LEN(str)) {
3112 const char *
const prev_end = RSTRING_END(str);
3113 const char *
const new_end = RSTRING_PTR(str) +
len;
3123 else if (
len < RSTRING_LEN(str)) {
3131 STR_SET_LEN(str,
len);
3132 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3139 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3142 int independent = str_independent(str);
3143 long slen = RSTRING_LEN(str);
3151 const int termlen = TERM_LEN(str);
3152 if (STR_EMBED_P(str)) {
3153 if (
len == slen)
return str;
3154 if (str_embed_capa(str) >=
len + termlen) {
3155 STR_SET_LEN(str,
len);
3159 str_make_independent_expand(str, slen,
len - slen, termlen);
3161 else if (str_embed_capa(str) >=
len + termlen) {
3162 char *
ptr = STR_HEAP_PTR(str);
3164 if (slen >
len) slen =
len;
3167 STR_SET_LEN(str,
len);
3168 if (independent) ruby_xfree(
ptr);
3171 else if (!independent) {
3172 if (
len == slen)
return str;
3173 str_make_independent_expand(str, slen,
len - slen, termlen);
3177 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3178 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3181 else if (
len == slen)
return str;
3182 STR_SET_LEN(str,
len);
3189str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3192 str_modify_keep_cr(str);
3197 if (
len == 0)
return 0;
3199 long total, olen,
off = -1;
3201 const int termlen = TERM_LEN(str);
3204 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3208 long capa = str_capacity(str, termlen);
3210 if (olen > LONG_MAX -
len) {
3211 rb_raise(rb_eArgError,
"string sizes too big");
3215 if (total >= LONG_MAX / 2) {
3218 while (total >
capa) {
3221 RESIZE_CAPA_TERM(str,
capa, termlen);
3222 sptr = RSTRING_PTR(str);
3227 memcpy(sptr + olen,
ptr,
len);
3228 STR_SET_LEN(str, total);
3229 TERM_FILL(sptr + total, termlen);
3234#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3235#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3240 if (
len == 0)
return str;
3242 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3244 return str_buf_cat(str,
ptr,
len);
3259rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3260 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3269 if (str_encindex == ptr_encindex) {
3271 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3275 str_enc = rb_enc_from_index(str_encindex);
3276 ptr_enc = rb_enc_from_index(ptr_encindex);
3277 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3280 if (RSTRING_LEN(str) == 0) {
3289 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3298 *ptr_cr_ret = ptr_cr;
3300 if (str_encindex != ptr_encindex &&
3303 str_enc = rb_enc_from_index(str_encindex);
3304 ptr_enc = rb_enc_from_index(ptr_encindex);
3309 res_encindex = str_encindex;
3314 res_encindex = str_encindex;
3318 res_encindex = ptr_encindex;
3323 res_encindex = str_encindex;
3330 res_encindex = str_encindex;
3336 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3338 str_buf_cat(str,
ptr,
len);
3344 rb_enc_name(str_enc), rb_enc_name(ptr_enc));
3351 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3361 if (rb_enc_asciicompat(enc)) {
3362 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3368 unsigned int c = (
unsigned char)*
ptr;
3369 int len = rb_enc_codelen(c, enc);
3370 rb_enc_mbcput(c, buf, enc);
3371 rb_enc_cr_str_buf_cat(str, buf,
len,
3384 if (str_enc_fastpath(str)) {
3388 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3394 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3405 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3417 return rb_str_buf_append(str, str2);
3421rb_str_concat_literals(
size_t num,
const VALUE *strary)
3425 unsigned long len = 1;
3430 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3431 str = rb_str_buf_new(
len);
3432 str_enc_copy_direct(str, strary[0]);
3434 for (i = s; i < num; ++i) {
3435 const VALUE v = strary[i];
3438 rb_str_buf_append(str, v);
3439 if (encidx != ENCINDEX_US_ASCII) {
3441 rb_enc_set_index(str, encidx);
3466rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3468 str_modifiable(str);
3473 else if (argc > 1) {
3476 rb_enc_copy(arg_str, str);
3477 for (i = 0; i < argc; i++) {
3480 rb_str_buf_append(str, arg_str);
3512 if (rb_num_to_uint(str2, &code) == 0) {
3525 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3528 buf[0] = (char)code;
3529 rb_str_cat(str1, buf, 1);
3530 if (encidx != rb_enc_to_index(enc)) {
3531 rb_enc_associate_index(str1, encidx);
3536 long pos = RSTRING_LEN(str1);
3541 switch (
len = rb_enc_codelen(code, enc)) {
3542 case ONIGERR_INVALID_CODE_POINT_VALUE:
3543 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3545 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3551 rb_enc_mbcput(code, buf, enc);
3552 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3553 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3555 rb_str_resize(str1, pos+
len);
3556 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
3569rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3571 int encidx = rb_enc_to_index(enc);
3573 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3578 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3579 return ENCINDEX_ASCII_8BIT;
3602rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3604 str_modifiable(str);
3609 else if (argc > 1) {
3612 rb_enc_copy(arg_str, str);
3613 for (i = 0; i < argc; i++) {
3625 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
3627 if (e && !is_ascii_string(str)) {
3637 const char *ptr1, *ptr2;
3640 return (len1 != len2 ||
3642 memcmp(ptr1, ptr2, len1) != 0);
3656rb_str_hash_m(
VALUE str)
3662#define lesser(a,b) (((a)>(b))?(b):(a))
3670 if (RSTRING_LEN(str1) == 0)
return TRUE;
3671 if (RSTRING_LEN(str2) == 0)
return TRUE;
3674 if (idx1 == idx2)
return TRUE;
3679 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
3683 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
3693 const char *ptr1, *ptr2;
3696 if (str1 == str2)
return 0;
3699 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3708 if (len1 > len2)
return 1;
3711 if (retval > 0)
return 1;
3738 if (str1 == str2)
return Qtrue;
3745 return rb_str_eql_internal(str1, str2);
3769 if (str1 == str2)
return Qtrue;
3771 return rb_str_eql_internal(str1, str2);
3802 return rb_invcmp(str1, str2);
3844 return str_casecmp(str1, s);
3852 const char *p1, *p1end, *p2, *p2end;
3854 enc = rb_enc_compatible(str1, str2);
3859 p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3860 p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3861 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3862 while (p1 < p1end && p2 < p2end) {
3864 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3865 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3867 return INT2FIX(c1 < c2 ? -1 : 1);
3874 while (p1 < p1end && p2 < p2end) {
3875 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3876 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3878 if (0 <= c1 && 0 <= c2) {
3882 return INT2FIX(c1 < c2 ? -1 : 1);
3886 l1 = rb_enc_mbclen(p1, p1end, enc);
3887 l2 = rb_enc_mbclen(p2, p2end, enc);
3888 len = l1 < l2 ? l1 : l2;
3889 r = memcmp(p1, p2,
len);
3891 return INT2FIX(r < 0 ? -1 : 1);
3893 return INT2FIX(l1 < l2 ? -1 : 1);
3899 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
3900 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
3934 return str_casecmp_p(str1, s);
3941 VALUE folded_str1, folded_str2;
3942 VALUE fold_opt = sym_fold;
3944 enc = rb_enc_compatible(str1, str2);
3949 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3950 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3952 return rb_str_eql(folded_str1, folded_str2);
3956strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3957 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3959 const char *search_start = str_ptr;
3960 long pos, search_len = str_len - offset;
3964 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3965 if (pos < 0)
return pos;
3967 if (t == search_start + pos)
break;
3968 search_len -= t - search_start;
3969 if (search_len <= 0)
return -1;
3970 offset += t - search_start;
3973 return pos + offset;
3977#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3978#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
3981rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3983 const char *str_ptr, *str_ptr_end, *sub_ptr;
3984 long str_len, sub_len;
3987 enc = rb_enc_check(str, sub);
3988 if (is_broken_string(sub))
return -1;
3990 str_ptr = RSTRING_PTR(str);
3991 str_ptr_end = RSTRING_END(str);
3992 str_len = RSTRING_LEN(str);
3993 sub_ptr = RSTRING_PTR(sub);
3994 sub_len = RSTRING_LEN(sub);
3996 if (str_len < sub_len)
return -1;
3999 long str_len_char, sub_len_char;
4000 int single_byte = single_byte_optimizable(str);
4001 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4002 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4004 offset += str_len_char;
4005 if (offset < 0)
return -1;
4007 if (str_len_char - offset < sub_len_char)
return -1;
4008 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4011 if (sub_len == 0)
return offset;
4014 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4028rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4035 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4036 long slen = str_strlen(str, enc);
4038 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4050 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4051 enc, single_byte_optimizable(str));
4062 pos = rb_str_index(str, sub, pos);
4076str_ensure_byte_pos(
VALUE str,
long pos)
4078 const char *s = RSTRING_PTR(str);
4079 const char *e = RSTRING_END(str);
4080 const char *p = s + pos;
4081 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4083 "offset %ld does not land on character boundary", pos);
4129rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4135 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4136 long slen = RSTRING_LEN(str);
4138 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4149 str_ensure_byte_pos(str, pos);
4161 pos = rb_str_byteindex(str, sub, pos);
4162 if (pos >= 0)
return LONG2NUM(pos);
4171 char *hit, *adjusted;
4173 long slen, searchlen;
4176 sbeg = RSTRING_PTR(str);
4177 slen = RSTRING_LEN(sub);
4178 if (slen == 0)
return s - sbeg;
4179 e = RSTRING_END(str);
4180 t = RSTRING_PTR(sub);
4182 searchlen = s - sbeg + 1;
4185 hit = memrchr(sbeg, c, searchlen);
4188 if (hit != adjusted) {
4189 searchlen = adjusted - sbeg;
4192 if (memcmp(hit, t, slen) == 0)
4194 searchlen = adjusted - sbeg;
4195 }
while (searchlen > 0);
4206 sbeg = RSTRING_PTR(str);
4207 e = RSTRING_END(str);
4208 t = RSTRING_PTR(sub);
4209 slen = RSTRING_LEN(sub);
4212 if (memcmp(s, t, slen) == 0) {
4215 if (s <= sbeg)
break;
4216 s = rb_enc_prev_char(sbeg, s, e, enc);
4232 enc = rb_enc_check(str, sub);
4233 if (is_broken_string(sub))
return -1;
4234 singlebyte = single_byte_optimizable(str);
4235 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4236 slen = str_strlen(sub, enc);
4239 if (
len < slen)
return -1;
4240 if (
len - pos < slen) pos =
len - slen;
4241 if (
len == 0)
return pos;
4243 sbeg = RSTRING_PTR(str);
4246 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4252 s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
4253 return str_rindex(str, sub, s, enc);
4314rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4319 long pos,
len = str_strlen(str, enc);
4321 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4323 if (pos < 0 && (pos +=
len) < 0) {
4329 if (pos >
len) pos =
len;
4337 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4338 enc, single_byte_optimizable(str));
4349 pos = rb_str_rindex(str, sub, pos);
4359rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4365 enc = rb_enc_check(str, sub);
4366 if (is_broken_string(sub))
return -1;
4367 len = RSTRING_LEN(str);
4368 slen = RSTRING_LEN(sub);
4371 if (
len < slen)
return -1;
4372 if (
len - pos < slen) pos =
len - slen;
4373 if (
len == 0)
return pos;
4375 sbeg = RSTRING_PTR(str);
4378 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4385 return str_rindex(str, sub, s, enc);
4450rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4454 long pos,
len = RSTRING_LEN(str);
4456 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4458 if (pos < 0 && (pos +=
len) < 0) {
4464 if (pos >
len) pos =
len;
4470 str_ensure_byte_pos(str, pos);
4482 pos = rb_str_byterindex(str, sub, pos);
4483 if (pos >= 0)
return LONG2NUM(pos);
4519 switch (OBJ_BUILTIN_TYPE(y)) {
4571rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4578 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
4610rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4614 re = get_pat(argv[0]);
4615 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4624static enum neighbor_char
4632 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4634 return NEIGHBOR_NOT_CHAR;
4638 if (!l)
return NEIGHBOR_NOT_CHAR;
4639 if (l !=
len)
return NEIGHBOR_WRAPPED;
4640 rb_enc_mbcput(c, p, enc);
4641 r = rb_enc_precise_mbclen(p, p +
len, enc);
4643 return NEIGHBOR_NOT_CHAR;
4645 return NEIGHBOR_FOUND;
4648 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4651 return NEIGHBOR_WRAPPED;
4652 ++((
unsigned char*)p)[i];
4653 l = rb_enc_precise_mbclen(p, p+
len, enc);
4657 return NEIGHBOR_FOUND;
4660 memset(p+l, 0xff,
len-l);
4666 for (len2 =
len-1; 0 < len2; len2--) {
4667 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4671 memset(p+len2+1, 0xff,
len-(len2+1));
4676static enum neighbor_char
4683 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4685 return NEIGHBOR_NOT_CHAR;
4688 if (!c)
return NEIGHBOR_NOT_CHAR;
4691 if (!l)
return NEIGHBOR_NOT_CHAR;
4692 if (l !=
len)
return NEIGHBOR_WRAPPED;
4693 rb_enc_mbcput(c, p, enc);
4694 r = rb_enc_precise_mbclen(p, p +
len, enc);
4696 return NEIGHBOR_NOT_CHAR;
4698 return NEIGHBOR_FOUND;
4701 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4704 return NEIGHBOR_WRAPPED;
4705 --((
unsigned char*)p)[i];
4706 l = rb_enc_precise_mbclen(p, p+
len, enc);
4710 return NEIGHBOR_FOUND;
4713 memset(p+l, 0,
len-l);
4719 for (len2 =
len-1; 0 < len2; len2--) {
4720 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4724 memset(p+len2+1, 0,
len-(len2+1));
4738static enum neighbor_char
4739enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4741 enum neighbor_char ret;
4745 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4749 const int max_gaps = 1;
4753 ctype = ONIGENC_CTYPE_DIGIT;
4755 ctype = ONIGENC_CTYPE_ALPHA;
4757 return NEIGHBOR_NOT_CHAR;
4760 for (
try = 0;
try <= max_gaps; ++
try) {
4761 ret = enc_succ_char(p,
len, enc);
4762 if (ret == NEIGHBOR_FOUND) {
4765 return NEIGHBOR_FOUND;
4772 ret = enc_pred_char(p,
len, enc);
4773 if (ret == NEIGHBOR_FOUND) {
4787 return NEIGHBOR_NOT_CHAR;
4790 if (ctype != ONIGENC_CTYPE_DIGIT) {
4792 return NEIGHBOR_WRAPPED;
4796 enc_succ_char(carry,
len, enc);
4797 return NEIGHBOR_WRAPPED;
4865 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
4866 rb_enc_cr_str_copy_for_substr(str, orig);
4867 return str_succ(str);
4874 char *sbeg, *s, *e, *last_alnum = 0;
4875 int found_alnum = 0;
4877 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4878 long carry_pos = 0, carry_len = 1;
4879 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4881 slen = RSTRING_LEN(str);
4882 if (slen == 0)
return str;
4884 enc = STR_ENC_GET(str);
4885 sbeg = RSTRING_PTR(str);
4886 s = e = sbeg + slen;
4888 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4889 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4895 l = rb_enc_precise_mbclen(s, e, enc);
4896 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4897 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4898 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4900 case NEIGHBOR_NOT_CHAR:
4902 case NEIGHBOR_FOUND:
4904 case NEIGHBOR_WRAPPED:
4909 carry_pos = s - sbeg;
4914 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4915 enum neighbor_char neighbor;
4916 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4917 l = rb_enc_precise_mbclen(s, e, enc);
4918 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4919 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4921 neighbor = enc_succ_char(tmp, l, enc);
4923 case NEIGHBOR_FOUND:
4927 case NEIGHBOR_WRAPPED:
4930 case NEIGHBOR_NOT_CHAR:
4933 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4935 enc_succ_char(s, l, enc);
4937 if (!rb_enc_asciicompat(enc)) {
4938 MEMCPY(carry, s,
char, l);
4941 carry_pos = s - sbeg;
4945 RESIZE_CAPA(str, slen + carry_len);
4946 sbeg = RSTRING_PTR(str);
4947 s = sbeg + carry_pos;
4948 memmove(s + carry_len, s, slen - carry_pos);
4949 memmove(s, carry, carry_len);
4951 STR_SET_LEN(str, slen);
4966rb_str_succ_bang(
VALUE str)
4974all_digits_p(
const char *s,
long len)
5028 VALUE end, exclusive;
5032 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5038 VALUE current, after_end;
5045 enc = rb_enc_check(beg, end);
5046 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5048 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5049 char c = RSTRING_PTR(beg)[0];
5050 char e = RSTRING_PTR(end)[0];
5052 if (c > e || (excl && c == e))
return beg;
5054 if ((*each)(rb_enc_str_new(&c, 1, enc), arg))
break;
5055 if (!excl && c == e)
break;
5057 if (excl && c == e)
break;
5062 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5063 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5064 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5069 b = rb_str_to_inum(beg, 10, FALSE);
5070 e = rb_str_to_inum(end, 10, FALSE);
5077 if (excl && bi == ei)
break;
5078 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5083 ID op = excl ?
'<' : idLE;
5084 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5089 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5090 b = rb_funcallv(b, succ, 0, 0);
5097 if (n > 0 || (excl && n == 0))
return beg;
5099 after_end = rb_funcallv(end, succ, 0, 0);
5104 next = rb_funcallv(current, succ, 0, 0);
5105 if ((*each)(current, arg))
break;
5106 if (
NIL_P(next))
break;
5110 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5125 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5126 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5127 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5129 b = rb_str_to_inum(beg, 10, FALSE);
5135 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5143 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5144 b = rb_funcallv(b, succ, 0, 0);
5150 VALUE next = rb_funcallv(current, succ, 0, 0);
5151 if ((*each)(current, arg))
break;
5154 if (RSTRING_LEN(current) == 0)
5165 if (!
rb_equal(str, *argp))
return 0;
5173 beg = rb_str_new_frozen(beg);
5175 end = rb_str_new_frozen(end);
5179 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5180 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5181 rb_enc_asciicompat(STR_ENC_GET(val))) {
5182 const char *bp = RSTRING_PTR(beg);
5183 const char *ep = RSTRING_PTR(end);
5184 const char *vp = RSTRING_PTR(val);
5185 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5186 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5194 if (b <= v && v < e)
return Qtrue;
5195 return RBOOL(!
RTEST(exclusive) && v == e);
5202 all_digits_p(bp, RSTRING_LEN(beg)) &&
5203 all_digits_p(ep, RSTRING_LEN(end))) {
5208 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5210 return RBOOL(
NIL_P(val));
5232 else if (RB_TYPE_P(indx,
T_REGEXP)) {
5233 return rb_str_subpat(str, indx,
INT2FIX(0));
5235 else if (RB_TYPE_P(indx,
T_STRING)) {
5236 if (rb_str_index(str, indx, 0) != -1)
5242 long beg,
len = str_strlen(str, NULL);
5254 return str_substr(str, idx, 1, FALSE);
5273rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5276 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5277 return rb_str_subpat(str, argv[0], argv[1]);
5286 return rb_str_aref(str, argv[0]);
5292 char *ptr = RSTRING_PTR(str);
5293 long olen = RSTRING_LEN(str), nlen;
5295 str_modifiable(str);
5296 if (
len > olen)
len = olen;
5298 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5300 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5302 ptr =
RSTRING(str)->as.embed.ary;
5303 memmove(ptr, oldptr +
len, nlen);
5304 if (fl == STR_NOEMBED)
xfree(oldptr);
5307 if (!STR_SHARED_P(str)) {
5309 rb_enc_cr_str_exact_copy(shared, str);
5314 STR_SET_LEN(str, nlen);
5316 if (!SHARABLE_MIDDLE_SUBSTRING) {
5317 TERM_FILL(ptr + nlen, TERM_LEN(str));
5324rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5330 if (beg == 0 && vlen == 0) {
5335 str_modify_keep_cr(str);
5339 RESIZE_CAPA(str, slen + vlen -
len);
5340 sptr = RSTRING_PTR(str);
5349 memmove(sptr + beg + vlen,
5351 slen - (beg +
len));
5353 if (vlen < beg &&
len < 0) {
5357 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5360 STR_SET_LEN(str, slen);
5361 TERM_FILL(&sptr[slen], TERM_LEN(str));
5368 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5377 int singlebyte = single_byte_optimizable(str);
5383 enc = rb_enc_check(str, val);
5384 slen = str_strlen(str, enc);
5386 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5393 assert(beg <= slen);
5394 if (
len > slen - beg) {
5397 p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
5398 if (!p) p = RSTRING_END(str);
5399 e = str_nth(p, RSTRING_END(str),
len, enc, singlebyte);
5400 if (!e) e = RSTRING_END(str);
5402 beg = p - RSTRING_PTR(str);
5404 rb_str_update_0(str, beg,
len, val);
5405 rb_enc_associate(str, enc);
5416 long start, end,
len;
5426 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5430 nth += regs->num_regs;
5440 enc = rb_enc_check_str(str, val);
5441 rb_str_update_0(str, start,
len, val);
5442 rb_enc_associate(str, enc);
5450 switch (
TYPE(indx)) {
5452 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5456 beg = rb_str_index(str, indx, 0);
5510rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5513 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5514 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5522 return rb_str_aset(str, argv[0], argv[1]);
5582rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5590 str_modify_keep_cr(str);
5598 if ((nth += regs->num_regs) <= 0)
return Qnil;
5600 else if (nth >= regs->num_regs)
return Qnil;
5602 len = END(nth) - beg;
5605 else if (argc == 2) {
5614 beg = p - RSTRING_PTR(str);
5617 else if (RB_TYPE_P(indx,
T_STRING)) {
5618 beg = rb_str_index(str, indx, 0);
5619 if (beg == -1)
return Qnil;
5620 len = RSTRING_LEN(indx);
5632 beg = p - RSTRING_PTR(str);
5641 beg = p - RSTRING_PTR(str);
5645 rb_enc_cr_str_copy_for_substr(result, str);
5653 char *sptr = RSTRING_PTR(str);
5654 long slen = RSTRING_LEN(str);
5655 if (beg +
len > slen)
5659 slen - (beg +
len));
5661 STR_SET_LEN(str, slen);
5662 TERM_FILL(&sptr[slen], TERM_LEN(str));
5673 switch (OBJ_BUILTIN_TYPE(pat)) {
5692get_pat_quoted(
VALUE pat,
int check)
5696 switch (OBJ_BUILTIN_TYPE(pat)) {
5710 if (check && is_broken_string(pat)) {
5711 rb_exc_raise(rb_reg_check_preprocess(pat));
5717rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5720 pos = rb_str_byteindex(str, pat, pos);
5721 if (set_backref_str) {
5723 str = rb_str_new_frozen_String(str);
5724 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
5733 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5753rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5767 hash = rb_check_hash_type(argv[1]);
5773 pat = get_pat_quoted(argv[0], 1);
5775 str_modifiable(str);
5776 beg = rb_pat_search(pat, str, 0, 1);
5790 end0 = beg0 + RSTRING_LEN(pat);
5799 if (iter || !
NIL_P(hash)) {
5800 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5803 repl = rb_obj_as_string(
rb_yield(match0));
5806 repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5807 repl = rb_obj_as_string(repl);
5809 str_mod_check(str, p,
len);
5816 enc = rb_enc_compatible(str, repl);
5819 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5823 rb_enc_name(str_enc),
5824 rb_enc_name(STR_ENC_GET(repl)));
5826 enc = STR_ENC_GET(repl);
5829 rb_enc_associate(str, enc);
5839 rlen = RSTRING_LEN(repl);
5840 len = RSTRING_LEN(str);
5842 RESIZE_CAPA(str,
len + rlen - plen);
5844 p = RSTRING_PTR(str);
5846 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
5848 rp = RSTRING_PTR(repl);
5849 memmove(p + beg0, rp, rlen);
5851 STR_SET_LEN(str,
len);
5852 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
5881 rb_str_sub_bang(argc, argv, str);
5886str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5889 long beg, beg0, end0;
5890 long offset, blen, slen,
len, last;
5891 enum {STR, ITER, MAP} mode = STR;
5893 int need_backref = -1;
5903 hash = rb_check_hash_type(argv[1]);
5912 rb_error_arity(argc, 1, 2);
5915 pat = get_pat_quoted(argv[0], 1);
5916 beg = rb_pat_search(pat, str, 0, need_backref);
5918 if (bang)
return Qnil;
5923 blen = RSTRING_LEN(str) + 30;
5924 dest = rb_str_buf_new(blen);
5925 sp = RSTRING_PTR(str);
5926 slen = RSTRING_LEN(str);
5928 str_enc = STR_ENC_GET(str);
5929 rb_enc_associate(dest, str_enc);
5937 end0 = beg0 + RSTRING_LEN(pat);
5948 val = rb_obj_as_string(
rb_yield(match0));
5951 val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5952 val = rb_obj_as_string(val);
5954 str_mod_check(str, sp, slen);
5959 else if (need_backref) {
5961 if (need_backref < 0) {
5962 need_backref = val != repl;
5969 len = beg0 - offset;
5971 rb_enc_str_buf_cat(dest, cp,
len, str_enc);
5974 rb_str_buf_append(dest, val);
5983 if (RSTRING_LEN(str) <= end0)
break;
5984 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
5985 rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0,
len, str_enc);
5986 offset = end0 +
len;
5988 cp = RSTRING_PTR(str) + offset;
5989 if (offset > RSTRING_LEN(str))
break;
5990 beg = rb_pat_search(pat, str, offset, need_backref);
5994 if (RSTRING_LEN(str) > offset) {
5995 rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
5997 rb_pat_search(pat, str, last, 1);
5999 str_shared_replace(str, dest);
6027rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6029 str_modify_keep_cr(str);
6030 return str_gsub(argc, argv, str, 1);
6053 return str_gsub(argc, argv, str, 0);
6071 str_modifiable(str);
6072 if (str == str2)
return str;
6076 return str_replace(str, str2);
6091rb_str_clear(
VALUE str)
6095 STR_SET_LEN(str, 0);
6096 RSTRING_PTR(str)[0] = 0;
6097 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6116rb_str_chr(
VALUE str)
6140 pos += RSTRING_LEN(str);
6141 if (pos < 0 || RSTRING_LEN(str) <= pos)
6144 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6163 long len = RSTRING_LEN(str);
6164 char *ptr, *head, *left = 0;
6168 if (pos < -
len ||
len <= pos)
6175 char byte = (char)(
NUM2INT(w) & 0xFF);
6177 if (!str_independent(str))
6178 str_make_independent(str);
6179 enc = STR_ENC_GET(str);
6180 head = RSTRING_PTR(str);
6182 if (!STR_EMBED_P(str)) {
6189 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6197 width = rb_enc_precise_mbclen(left, head+
len, enc);
6199 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6215str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6217 long n = RSTRING_LEN(str);
6219 if (beg > n ||
len < 0)
return Qnil;
6222 if (beg < 0)
return Qnil;
6227 if (!empty)
return Qnil;
6231 VALUE str2 = str_subseq(str, beg,
len);
6233 str_enc_copy_direct(str2, str);
6235 if (RSTRING_LEN(str2) == 0) {
6236 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6264 long beg,
len = RSTRING_LEN(str);
6272 return str_byte_substr(str, beg,
len, TRUE);
6277 return str_byte_substr(str, idx, 1, FALSE);
6324rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6329 return str_byte_substr(str, beg,
len, TRUE);
6332 return str_byte_aref(str, argv[0]);
6336str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6338 long end, slen = RSTRING_LEN(str);
6341 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6348 assert(*beg <= slen);
6349 if (*
len > slen - *beg) {
6353 str_ensure_byte_pos(str, *beg);
6354 str_ensure_byte_pos(str, end);
6379rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6381 long beg,
len, vbeg, vlen;
6387 if (!(argc == 2 || argc == 3 || argc == 5)) {
6388 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6392 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6393 rb_builtin_class_name(argv[0]));
6400 vlen = RSTRING_LEN(val);
6405 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6406 rb_builtin_class_name(argv[2]));
6418 vlen = RSTRING_LEN(val);
6426 str_check_beg_len(str, &beg, &
len);
6427 str_check_beg_len(val, &vbeg, &vlen);
6428 enc = rb_enc_check(str, val);
6429 str_modify_keep_cr(str);
6430 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6431 rb_enc_associate(str, enc);
6449rb_str_reverse(
VALUE str)
6456 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6457 enc = STR_ENC_GET(str);
6459 s = RSTRING_PTR(str); e = RSTRING_END(str);
6460 p = RSTRING_END(rev);
6463 if (RSTRING_LEN(str) > 1) {
6464 if (single_byte_optimizable(str)) {
6471 int clen = rb_enc_fast_mbclen(s, e, enc);
6479 cr = rb_enc_asciicompat(enc) ?
6482 int clen = rb_enc_mbclen(s, e, enc);
6491 STR_SET_LEN(rev, RSTRING_LEN(str));
6492 str_enc_copy_direct(rev, str);
6512rb_str_reverse_bang(
VALUE str)
6514 if (RSTRING_LEN(str) > 1) {
6515 if (single_byte_optimizable(str)) {
6518 str_modify_keep_cr(str);
6519 s = RSTRING_PTR(str);
6520 e = RSTRING_END(str) - 1;
6528 str_shared_replace(str, rb_str_reverse(str));
6532 str_modify_keep_cr(str);
6557 i = rb_str_index(str, arg, 0);
6559 return RBOOL(i != -1);
6601 rb_raise(rb_eArgError,
"invalid radix %d", base);
6603 return rb_str_to_inum(str, base, FALSE);
6627rb_str_to_f(
VALUE str)
6642rb_str_to_s(
VALUE str)
6654 char s[RUBY_MAX_CHAR_LEN];
6655 int n = rb_enc_codelen(c, enc);
6657 rb_enc_mbcput(c, s, enc);
6658 rb_enc_str_buf_cat(str, s, n, enc);
6662#define CHAR_ESC_LEN 13
6665rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6667 char buf[CHAR_ESC_LEN + 1];
6675 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6677 else if (c < 0x10000) {
6678 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6681 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6686 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6689 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6692 l = (int)strlen(buf);
6698ruby_escaped_char(
int c)
6701 case '\0':
return "\\0";
6702 case '\n':
return "\\n";
6703 case '\r':
return "\\r";
6704 case '\t':
return "\\t";
6705 case '\f':
return "\\f";
6706 case '\013':
return "\\v";
6707 case '\010':
return "\\b";
6708 case '\007':
return "\\a";
6709 case '\033':
return "\\e";
6710 case '\x7f':
return "\\c?";
6716rb_str_escape(
VALUE str)
6720 const char *p = RSTRING_PTR(str);
6721 const char *pend = RSTRING_END(str);
6722 const char *prev = p;
6723 char buf[CHAR_ESC_LEN + 1];
6724 VALUE result = rb_str_buf_new(0);
6725 int unicode_p = rb_enc_unicode_p(enc);
6726 int asciicompat = rb_enc_asciicompat(enc);
6731 int n = rb_enc_precise_mbclen(p, pend, enc);
6733 if (p > prev) str_buf_cat(result, prev, p - prev);
6736 n = (int)(pend - p);
6738 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6739 str_buf_cat(result, buf, strlen(buf));
6747 cc = ruby_escaped_char(c);
6749 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6750 str_buf_cat(result, cc, strlen(cc));
6756 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6757 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6761 if (p > prev) str_buf_cat(result, prev, p - prev);
6785 const char *p, *pend, *prev;
6786 char buf[CHAR_ESC_LEN + 1];
6787 VALUE result = rb_str_buf_new(0);
6788 rb_encoding *resenc = rb_default_internal_encoding();
6789 int unicode_p = rb_enc_unicode_p(enc);
6790 int asciicompat = rb_enc_asciicompat(enc);
6792 if (resenc == NULL) resenc = rb_default_external_encoding();
6793 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
6794 rb_enc_associate(result, resenc);
6795 str_buf_cat2(result,
"\"");
6797 p = RSTRING_PTR(str); pend = RSTRING_END(str);
6803 n = rb_enc_precise_mbclen(p, pend, enc);
6805 if (p > prev) str_buf_cat(result, prev, p - prev);
6808 n = (int)(pend - p);
6810 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6811 str_buf_cat(result, buf, strlen(buf));
6819 if ((asciicompat || unicode_p) &&
6820 (c ==
'"'|| c ==
'\\' ||
6825 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6826 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6827 str_buf_cat2(result,
"\\");
6828 if (asciicompat || enc == resenc) {
6834 case '\n': cc =
'n';
break;
6835 case '\r': cc =
'r';
break;
6836 case '\t': cc =
't';
break;
6837 case '\f': cc =
'f';
break;
6838 case '\013': cc =
'v';
break;
6839 case '\010': cc =
'b';
break;
6840 case '\007': cc =
'a';
break;
6841 case 033: cc =
'e';
break;
6842 default: cc = 0;
break;
6845 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6848 str_buf_cat(result, buf, 2);
6865 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6866 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6871 if (p > prev) str_buf_cat(result, prev, p - prev);
6872 str_buf_cat2(result,
"\"");
6877#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6897 int encidx = rb_enc_get_index(str);
6900 const char *p, *pend;
6903 int u8 = (encidx == rb_utf8_encindex());
6904 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6907 if (!rb_enc_asciicompat(enc)) {
6909 len += strlen(enc->name);
6912 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6915 unsigned char c = *p++;
6918 case '"':
case '\\':
6919 case '\n':
case '\r':
6920 case '\t':
case '\f':
6921 case '\013':
case '\010':
case '\007':
case '\033':
6926 clen = IS_EVSTR(p, pend) ? 2 : 1;
6934 if (u8 && c > 0x7F) {
6935 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6940 else if (cc <= 0xFFFFF)
6953 if (clen > LONG_MAX -
len) {
6960 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6961 q = RSTRING_PTR(result); qend = q +
len + 1;
6965 unsigned char c = *p++;
6967 if (c ==
'"' || c ==
'\\') {
6971 else if (c ==
'#') {
6972 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6975 else if (c ==
'\n') {
6979 else if (c ==
'\r') {
6983 else if (c ==
'\t') {
6987 else if (c ==
'\f') {
6991 else if (c ==
'\013') {
6995 else if (c ==
'\010') {
6999 else if (c ==
'\007') {
7003 else if (c ==
'\033') {
7013 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7018 snprintf(q, qend-q,
"u%04X", cc);
7020 snprintf(q, qend-q,
"u{%X}", cc);
7025 snprintf(q, qend-q,
"x%02X", c);
7031 if (!rb_enc_asciicompat(enc)) {
7032 snprintf(q, qend-q, nonascii_suffix, enc->name);
7033 encidx = rb_ascii8bit_encindex();
7036 rb_enc_associate_index(result, encidx);
7042unescape_ascii(
unsigned int c)
7066undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7068 const char *s = *ss;
7072 unsigned char buf[6];
7079 rb_str_cat(undumped, s, 1);
7090 *buf = unescape_ascii(*s);
7091 rb_str_cat(undumped, (
char *)buf, 1);
7102 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7103 if (*penc != enc_utf8) {
7105 rb_enc_associate(undumped, enc_utf8);
7122 if (hexlen == 0 || hexlen > 6) {
7128 if (0xd800 <= c && c <= 0xdfff) {
7131 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7132 rb_str_cat(undumped, (
char *)buf, codelen);
7141 if (0xd800 <= c && c <= 0xdfff) {
7144 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7145 rb_str_cat(undumped, (
char *)buf, codelen);
7161 rb_str_cat(undumped, (
char *)buf, 1);
7165 rb_str_cat(undumped, s-1, 2);
7172static VALUE rb_str_is_ascii_only_p(
VALUE str);
7190str_undump(
VALUE str)
7192 const char *s = RSTRING_PTR(str);
7193 const char *s_end = RSTRING_END(str);
7195 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7197 bool binary =
false;
7201 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7204 if (!str_null_check(str, &w)) {
7207 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7208 if (*s !=
'"')
goto invalid_format;
7226 static const char force_encoding_suffix[] =
".force_encoding(\"";
7227 static const char dup_suffix[] =
".dup";
7228 const char *encname;
7233 size =
sizeof(dup_suffix) - 1;
7234 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7236 size =
sizeof(force_encoding_suffix) - 1;
7237 if (s_end - s <= size)
goto invalid_format;
7238 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7242 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7246 s = memchr(s,
'"', s_end-s);
7248 if (!s)
goto invalid_format;
7249 if (s_end - s != 2)
goto invalid_format;
7250 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7252 encidx = rb_enc_find_index2(encname, (
long)size);
7256 rb_enc_associate_index(undumped, encidx);
7266 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7269 rb_str_cat(undumped, s++, 1);
7277 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7283 if (rb_enc_dummy_p(enc)) {
7290str_true_enc(
VALUE str)
7293 rb_str_check_dummy_enc(enc);
7297static OnigCaseFoldType
7298check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7303 rb_raise(rb_eArgError,
"too many options");
7304 if (argv[0]==sym_turkic) {
7305 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7307 if (argv[1]==sym_lithuanian)
7308 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7310 rb_raise(rb_eArgError,
"invalid second option");
7313 else if (argv[0]==sym_lithuanian) {
7314 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7316 if (argv[1]==sym_turkic)
7317 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7319 rb_raise(rb_eArgError,
"invalid second option");
7323 rb_raise(rb_eArgError,
"too many options");
7324 else if (argv[0]==sym_ascii)
7325 flags |= ONIGENC_CASE_ASCII_ONLY;
7326 else if (argv[0]==sym_fold) {
7327 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7328 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7330 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7333 rb_raise(rb_eArgError,
"invalid option");
7340 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7346#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7347#ifndef CASEMAP_DEBUG
7348# define CASEMAP_DEBUG 0
7356 OnigUChar space[FLEX_ARY_LEN];
7360mapping_buffer_free(
void *p)
7364 while (current_buffer) {
7365 previous_buffer = current_buffer;
7366 current_buffer = current_buffer->next;
7367 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7373 {0, mapping_buffer_free,},
7374 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7382 const OnigUChar *source_current, *source_end;
7383 int target_length = 0;
7384 VALUE buffer_anchor;
7387 size_t buffer_count = 0;
7388 int buffer_length_or_invalid;
7390 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7392 source_current = (OnigUChar*)RSTRING_PTR(source);
7393 source_end = (OnigUChar*)RSTRING_END(source);
7397 while (source_current < source_end) {
7399 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7400 if (CASEMAP_DEBUG) {
7401 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7404 *pre_buffer = current_buffer;
7405 pre_buffer = ¤t_buffer->next;
7406 current_buffer->next = NULL;
7407 current_buffer->capa =
capa;
7408 buffer_length_or_invalid = enc->case_map(flags,
7409 &source_current, source_end,
7410 current_buffer->space,
7411 current_buffer->space+current_buffer->capa,
7413 if (buffer_length_or_invalid < 0) {
7414 current_buffer =
DATA_PTR(buffer_anchor);
7416 mapping_buffer_free(current_buffer);
7417 rb_raise(rb_eArgError,
"input string invalid");
7419 target_length += current_buffer->used = buffer_length_or_invalid;
7421 if (CASEMAP_DEBUG) {
7422 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7425 if (buffer_count==1) {
7426 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7429 char *target_current;
7432 target_current = RSTRING_PTR(target);
7433 current_buffer =
DATA_PTR(buffer_anchor);
7434 while (current_buffer) {
7435 memcpy(target_current, current_buffer->space, current_buffer->used);
7436 target_current += current_buffer->used;
7437 current_buffer = current_buffer->next;
7440 current_buffer =
DATA_PTR(buffer_anchor);
7442 mapping_buffer_free(current_buffer);
7447 str_enc_copy_direct(target, source);
7456 const OnigUChar *source_current, *source_end;
7457 OnigUChar *target_current, *target_end;
7458 long old_length = RSTRING_LEN(source);
7459 int length_or_invalid;
7461 if (old_length == 0)
return Qnil;
7463 source_current = (OnigUChar*)RSTRING_PTR(source);
7464 source_end = (OnigUChar*)RSTRING_END(source);
7465 if (source == target) {
7466 target_current = (OnigUChar*)source_current;
7467 target_end = (OnigUChar*)source_end;
7470 target_current = (OnigUChar*)RSTRING_PTR(target);
7471 target_end = (OnigUChar*)RSTRING_END(target);
7474 length_or_invalid = onigenc_ascii_only_case_map(flags,
7475 &source_current, source_end,
7476 target_current, target_end, enc);
7477 if (length_or_invalid < 0)
7478 rb_raise(rb_eArgError,
"input string invalid");
7479 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7480 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7481 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7482 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7483 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7486 str_enc_copy(target, source);
7492upcase_single(
VALUE str)
7494 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7495 bool modified =
false;
7498 unsigned int c = *(
unsigned char*)s;
7500 if (
'a' <= c && c <=
'z') {
7501 *s =
'A' + (c -
'a');
7529rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7532 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7534 flags = check_case_options(argc, argv, flags);
7535 str_modify_keep_cr(str);
7536 enc = str_true_enc(str);
7537 if (case_option_single_p(flags, enc, str)) {
7538 if (upcase_single(str))
7539 flags |= ONIGENC_CASE_MODIFIED;
7541 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7542 rb_str_ascii_casemap(str, str, &flags, enc);
7544 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7546 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7568rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7571 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7574 flags = check_case_options(argc, argv, flags);
7575 enc = str_true_enc(str);
7576 if (case_option_single_p(flags, enc, str)) {
7577 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7578 str_enc_copy_direct(ret, str);
7581 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7583 rb_str_ascii_casemap(str, ret, &flags, enc);
7586 ret = rb_str_casemap(str, &flags, enc);
7593downcase_single(
VALUE str)
7595 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7596 bool modified =
false;
7599 unsigned int c = *(
unsigned char*)s;
7601 if (
'A' <= c && c <=
'Z') {
7602 *s =
'a' + (c -
'A');
7631rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7634 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7636 flags = check_case_options(argc, argv, flags);
7637 str_modify_keep_cr(str);
7638 enc = str_true_enc(str);
7639 if (case_option_single_p(flags, enc, str)) {
7640 if (downcase_single(str))
7641 flags |= ONIGENC_CASE_MODIFIED;
7643 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7644 rb_str_ascii_casemap(str, str, &flags, enc);
7646 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7648 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7670rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7673 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7676 flags = check_case_options(argc, argv, flags);
7677 enc = str_true_enc(str);
7678 if (case_option_single_p(flags, enc, str)) {
7679 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7680 str_enc_copy_direct(ret, str);
7681 downcase_single(ret);
7683 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7685 rb_str_ascii_casemap(str, ret, &flags, enc);
7688 ret = rb_str_casemap(str, &flags, enc);
7716rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7719 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7721 flags = check_case_options(argc, argv, flags);
7722 str_modify_keep_cr(str);
7723 enc = str_true_enc(str);
7724 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7725 if (flags&ONIGENC_CASE_ASCII_ONLY)
7726 rb_str_ascii_casemap(str, str, &flags, enc);
7728 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7730 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7754rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7757 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7760 flags = check_case_options(argc, argv, flags);
7761 enc = str_true_enc(str);
7762 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
7763 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7765 rb_str_ascii_casemap(str, ret, &flags, enc);
7768 ret = rb_str_casemap(str, &flags, enc);
7795rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7798 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7800 flags = check_case_options(argc, argv, flags);
7801 str_modify_keep_cr(str);
7802 enc = str_true_enc(str);
7803 if (flags&ONIGENC_CASE_ASCII_ONLY)
7804 rb_str_ascii_casemap(str, str, &flags, enc);
7806 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7808 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7832rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7835 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7838 flags = check_case_options(argc, argv, flags);
7839 enc = str_true_enc(str);
7840 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
7841 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7843 rb_str_ascii_casemap(str, ret, &flags, enc);
7846 ret = rb_str_casemap(str, &flags, enc);
7851typedef unsigned char *USTR;
7855 unsigned int now, max;
7867 if (t->p == t->pend)
return -1;
7868 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7871 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7873 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7875 if (t->p < t->pend) {
7876 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7879 if (t->now < 0x80 && c < 0x80) {
7880 rb_raise(rb_eArgError,
7881 "invalid range \"%c-%c\" in string transliteration",
7885 rb_raise(rb_eArgError,
"invalid range in string transliteration");
7889 else if (t->now < c) {
7898 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7899 if (t->now == t->max) {
7904 if (t->now < t->max) {
7920 const unsigned int errc = -1;
7921 unsigned int trans[256];
7923 struct tr trsrc, trrepl;
7925 unsigned int c, c0, last = 0;
7926 int modify = 0, i, l;
7927 unsigned char *s, *send;
7929 int singlebyte = single_byte_optimizable(str);
7933#define CHECK_IF_ASCII(c) \
7934 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7935 (cr = ENC_CODERANGE_VALID) : 0)
7939 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7940 if (RSTRING_LEN(repl) == 0) {
7941 return rb_str_delete_bang(1, &src, str);
7945 e1 = rb_enc_check(str, src);
7946 e2 = rb_enc_check(str, repl);
7951 enc = rb_enc_check(src, repl);
7953 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
7954 if (RSTRING_LEN(src) > 1 &&
7955 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7956 trsrc.p + l < trsrc.pend) {
7960 trrepl.p = RSTRING_PTR(repl);
7961 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
7962 trsrc.gen = trrepl.gen = 0;
7963 trsrc.now = trrepl.now = 0;
7964 trsrc.max = trrepl.max = 0;
7967 for (i=0; i<256; i++) {
7970 while ((c = trnext(&trsrc, enc)) != errc) {
7975 if (!hash) hash = rb_hash_new();
7979 while ((c = trnext(&trrepl, enc)) != errc)
7982 for (i=0; i<256; i++) {
7983 if (trans[i] != errc) {
7991 for (i=0; i<256; i++) {
7994 while ((c = trnext(&trsrc, enc)) != errc) {
7995 r = trnext(&trrepl, enc);
7996 if (r == errc) r = trrepl.now;
7999 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8002 if (!hash) hash = rb_hash_new();
8010 str_modify_keep_cr(str);
8011 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)RSTRING_END(str);
8015 long offset, max = RSTRING_LEN(str);
8016 unsigned int save = -1;
8017 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8022 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8023 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8032 if (cflag) c = last;
8035 else if (cflag) c = errc;
8041 if (c != (
unsigned int)-1) {
8047 tlen = rb_enc_codelen(c, enc);
8053 if (enc != e1) may_modify = 1;
8055 if ((offset = t - buf) + tlen > max) {
8056 size_t MAYBE_UNUSED(old) = max + termlen;
8057 max = offset + tlen + (send - s);
8058 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8061 rb_enc_mbcput(c, t, enc);
8062 if (may_modify && memcmp(s, t, tlen) != 0) {
8068 if (!STR_EMBED_P(str)) {
8069 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8071 TERM_FILL((
char *)t, termlen);
8072 RSTRING(str)->as.heap.ptr = (
char *)buf;
8073 STR_SET_LEN(str, t - buf);
8074 STR_SET_NOEMBED(str);
8075 RSTRING(str)->as.heap.aux.capa = max;
8079 c = (
unsigned char)*s;
8080 if (trans[c] != errc) {
8097 long offset, max = (long)((send - s) * 1.2);
8098 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8102 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8103 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8111 if (cflag) c = last;
8114 else if (cflag) c = errc;
8118 c = cflag ? last : errc;
8121 tlen = rb_enc_codelen(c, enc);
8126 if (enc != e1) may_modify = 1;
8128 if ((offset = t - buf) + tlen > max) {
8129 size_t MAYBE_UNUSED(old) = max + termlen;
8130 max = offset + tlen + (long)((send - s) * 1.2);
8131 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8135 rb_enc_mbcput(c, t, enc);
8136 if (may_modify && memcmp(s, t, tlen) != 0) {
8144 if (!STR_EMBED_P(str)) {
8145 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8147 TERM_FILL((
char *)t, termlen);
8148 RSTRING(str)->as.heap.ptr = (
char *)buf;
8149 STR_SET_LEN(str, t - buf);
8150 STR_SET_NOEMBED(str);
8151 RSTRING(str)->as.heap.aux.capa = max;
8157 rb_enc_associate(str, enc);
8176 return tr_trans(str, src, repl, 0);
8223 tr_trans(str, src, repl, 0);
8227#define TR_TABLE_MAX (UCHAR_MAX+1)
8228#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8230tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8233 const unsigned int errc = -1;
8234 char buf[TR_TABLE_MAX];
8237 VALUE table = 0, ptable = 0;
8238 int i, l, cflag = 0;
8240 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8241 tr.gen =
tr.now =
tr.max = 0;
8243 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8248 for (i=0; i<TR_TABLE_MAX; i++) {
8251 stable[TR_TABLE_MAX] = cflag;
8253 else if (stable[TR_TABLE_MAX] && !cflag) {
8254 stable[TR_TABLE_MAX] = 0;
8256 for (i=0; i<TR_TABLE_MAX; i++) {
8260 while ((c = trnext(&
tr, enc)) != errc) {
8261 if (c < TR_TABLE_MAX) {
8262 buf[(
unsigned char)c] = !cflag;
8267 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8270 table = ptable ? ptable : rb_hash_new();
8274 table = rb_hash_new();
8279 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8280 rb_hash_aset(table, key,
Qtrue);
8284 for (i=0; i<TR_TABLE_MAX; i++) {
8285 stable[i] = stable[i] && buf[i];
8287 if (!table && !cflag) {
8294tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8296 if (c < TR_TABLE_MAX) {
8297 return table[c] != 0;
8303 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8304 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8308 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8311 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8325rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8327 char squeez[TR_TABLE_SIZE];
8330 VALUE del = 0, nodel = 0;
8332 int i, ascompat, cr;
8334 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8336 for (i=0; i<argc; i++) {
8340 enc = rb_enc_check(str, s);
8341 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8344 str_modify_keep_cr(str);
8345 ascompat = rb_enc_asciicompat(enc);
8346 s = t = RSTRING_PTR(str);
8347 send = RSTRING_END(str);
8353 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8364 c = rb_enc_codepoint_len(s, send, &clen, enc);
8366 if (tr_find(c, squeez, del, nodel)) {
8370 if (t != s) rb_enc_mbcput(c, t, enc);
8377 TERM_FILL(t, TERM_LEN(str));
8378 STR_SET_LEN(str, t - RSTRING_PTR(str));
8381 if (modify)
return str;
8401rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8404 rb_str_delete_bang(argc, argv, str);
8418rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8420 char squeez[TR_TABLE_SIZE];
8422 VALUE del = 0, nodel = 0;
8423 unsigned char *s, *send, *t;
8425 int ascompat, singlebyte = single_byte_optimizable(str);
8429 enc = STR_ENC_GET(str);
8432 for (i=0; i<argc; i++) {
8436 enc = rb_enc_check(str, s);
8437 if (singlebyte && !single_byte_optimizable(s))
8439 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8443 str_modify_keep_cr(str);
8444 s = t = (
unsigned char *)RSTRING_PTR(str);
8445 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8446 send = (
unsigned char *)RSTRING_END(str);
8448 ascompat = rb_enc_asciicompat(enc);
8452 unsigned int c = *s++;
8453 if (c != save || (argc > 0 && !squeez[c])) {
8463 if (ascompat && (c = *s) < 0x80) {
8464 if (c != save || (argc > 0 && !squeez[c])) {
8470 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8472 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8473 if (t != s) rb_enc_mbcput(c, t, enc);
8482 TERM_FILL((
char *)t, TERM_LEN(str));
8483 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8484 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8488 if (modify)
return str;
8511rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8514 rb_str_squeeze_bang(argc, argv, str);
8532 return tr_trans(str, src, repl, 1);
8555 tr_trans(str, src, repl, 1);
8584rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8586 char table[TR_TABLE_SIZE];
8588 VALUE del = 0, nodel = 0, tstr;
8598 enc = rb_enc_check(str, tstr);
8601 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8602 (ptstr = RSTRING_PTR(tstr),
8603 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8604 !is_broken_string(str)) {
8606 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8608 s = RSTRING_PTR(str);
8609 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8610 send = RSTRING_END(str);
8612 if (*(
unsigned char*)s++ == c) n++;
8618 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8619 for (i=1; i<argc; i++) {
8622 enc = rb_enc_check(str, tstr);
8623 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8626 s = RSTRING_PTR(str);
8627 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8628 send = RSTRING_END(str);
8629 ascompat = rb_enc_asciicompat(enc);
8633 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8641 c = rb_enc_codepoint_len(s, send, &clen, enc);
8642 if (tr_find(c, table, del, nodel)) {
8653rb_fs_check(
VALUE val)
8657 if (
NIL_P(val))
return 0;
8662static const char isspacetable[256] = {
8663 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8665 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8681#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8684split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8686 if (empty_count >= 0 &&
len == 0) {
8687 return empty_count + 1;
8689 if (empty_count > 0) {
8693 rb_ary_push(result, str_new_empty_String(str));
8694 }
while (--empty_count > 0);
8698 rb_yield(str_new_empty_String(str));
8699 }
while (--empty_count > 0);
8702 str = rb_str_subseq(str, beg,
len);
8704 rb_ary_push(result, str);
8713 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8717literal_split_pattern(
VALUE spat, split_type_t default_type)
8725 return SPLIT_TYPE_CHARS;
8727 else if (rb_enc_asciicompat(enc)) {
8728 if (
len == 1 && ptr[0] ==
' ') {
8729 return SPLIT_TYPE_AWK;
8734 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
8735 return SPLIT_TYPE_AWK;
8738 return default_type;
8751rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8756 split_type_t split_type;
8757 long beg, end, i = 0, empty_count = -1;
8762 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8764 if (lim <= 0) limit =
Qnil;
8765 else if (lim == 1) {
8766 if (RSTRING_LEN(str) == 0)
8777 if (
NIL_P(limit) && !lim) empty_count = 0;
8779 enc = STR_ENC_GET(str);
8780 split_type = SPLIT_TYPE_REGEXP;
8782 spat = get_pat_quoted(spat, 0);
8784 else if (
NIL_P(spat = rb_fs)) {
8785 split_type = SPLIT_TYPE_AWK;
8787 else if (!(spat = rb_fs_check(spat))) {
8788 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
8793 if (split_type != SPLIT_TYPE_AWK) {
8798 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8799 if (split_type == SPLIT_TYPE_AWK) {
8801 split_type = SPLIT_TYPE_STRING;
8806 mustnot_broken(spat);
8807 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8815#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8818 char *ptr = RSTRING_PTR(str);
8819 char *eptr = RSTRING_END(str);
8820 if (split_type == SPLIT_TYPE_AWK) {
8825 if (result) result = rb_ary_new();
8827 if (is_ascii_string(str)) {
8828 while (ptr < eptr) {
8829 c = (
unsigned char)*ptr++;
8831 if (ascii_isspace(c)) {
8837 if (!
NIL_P(limit) && lim <= i)
break;
8840 else if (ascii_isspace(c)) {
8841 SPLIT_STR(beg, end-beg);
8844 if (!
NIL_P(limit)) ++i;
8852 while (ptr < eptr) {
8855 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8864 if (!
NIL_P(limit) && lim <= i)
break;
8868 SPLIT_STR(beg, end-beg);
8871 if (!
NIL_P(limit)) ++i;
8879 else if (split_type == SPLIT_TYPE_STRING) {
8880 char *str_start = ptr;
8881 char *substr_start = ptr;
8882 char *sptr = RSTRING_PTR(spat);
8883 long slen = RSTRING_LEN(spat);
8885 if (result) result = rb_ary_new();
8886 mustnot_broken(str);
8887 enc = rb_enc_check(str, spat);
8888 while (ptr < eptr &&
8889 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8892 if (t != ptr + end) {
8896 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8899 if (!
NIL_P(limit) && lim <= ++i)
break;
8901 beg = ptr - str_start;
8903 else if (split_type == SPLIT_TYPE_CHARS) {
8904 char *str_start = ptr;
8907 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
8908 mustnot_broken(str);
8909 enc = rb_enc_get(str);
8910 while (ptr < eptr &&
8911 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8912 SPLIT_STR(ptr - str_start, n);
8914 if (!
NIL_P(limit) && lim <= ++i)
break;
8916 beg = ptr - str_start;
8919 if (result) result = rb_ary_new();
8920 long len = RSTRING_LEN(str);
8928 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8933 if (start == end && BEG(0) == END(0)) {
8938 else if (last_null == 1) {
8939 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8946 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8952 SPLIT_STR(beg, end-beg);
8953 beg = start = END(0);
8957 for (idx=1; idx < regs->num_regs; idx++) {
8958 if (BEG(idx) == -1)
continue;
8959 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8961 if (!
NIL_P(limit) && lim <= ++i)
break;
8963 if (match) rb_match_unbusy(match);
8965 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
8966 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
8969 return result ? result : str;
8979 return rb_str_split_m(1, &sep, str);
8982#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8988 rb_ary_push(ary, e);
8997#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9000chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
9002 const char *prev = rb_enc_prev_char(p, e, e, enc);
9005 prev = rb_enc_prev_char(p, e, e, enc);
9006 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9018 RSTRING_LEN(rs) != 1 ||
9019 RSTRING_PTR(rs)[0] !=
'\n')) {
9025#define rb_rs get_rs()
9032 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9033 long pos,
len, rslen;
9039 static ID keywords[1];
9044 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9048 if (!ENUM_ELEM(ary, str)) {
9056 if (!RSTRING_LEN(str))
goto end;
9057 str = rb_str_new_frozen(str);
9058 ptr = subptr = RSTRING_PTR(str);
9059 pend = RSTRING_END(str);
9060 len = RSTRING_LEN(str);
9062 rslen = RSTRING_LEN(rs);
9064 if (rs == rb_default_rs)
9065 enc = rb_enc_get(str);
9067 enc = rb_enc_check(str, rs);
9072 const char *eol = NULL;
9074 while (subend < pend) {
9075 long chomp_rslen = 0;
9077 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9079 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9081 if (eol == subend)
break;
9085 chomp_rslen = -rslen;
9089 if (!subptr) subptr = subend;
9093 }
while (subend < pend);
9095 if (rslen == 0) chomp_rslen = 0;
9096 line = rb_str_subseq(str, subptr - ptr,
9097 subend - subptr + (chomp ? chomp_rslen : rslen));
9098 if (ENUM_ELEM(ary, line)) {
9099 str_mod_check(str, ptr,
len);
9101 subptr = eol = NULL;
9106 rsptr = RSTRING_PTR(rs);
9113 if ((rs == rb_default_rs) && !rb_enc_asciicompat(enc)) {
9116 rsptr = RSTRING_PTR(rs);
9117 rslen = RSTRING_LEN(rs);
9120 while (subptr < pend) {
9121 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9125 if (hit != adjusted) {
9129 subend = hit += rslen;
9132 subend = chomp_newline(subptr, subend, enc);
9138 line = rb_str_subseq(str, subptr - ptr, subend - subptr);
9139 if (ENUM_ELEM(ary, line)) {
9140 str_mod_check(str, ptr,
len);
9145 if (subptr != pend) {
9148 pend = chomp_newline(subptr, pend, enc);
9150 else if (pend - subptr >= rslen &&
9151 memcmp(pend - rslen, rsptr, rslen) == 0) {
9155 line = rb_str_subseq(str, subptr - ptr, pend - subptr);
9156 ENUM_ELEM(ary, line);
9177rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9180 return rb_str_enumerate_lines(argc, argv, str, 0);
9193rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9195 VALUE ary = WANTARRAY(
"lines", 0);
9196 return rb_str_enumerate_lines(argc, argv, str, ary);
9210 for (i=0; i<RSTRING_LEN(str); i++) {
9211 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9229rb_str_each_byte(
VALUE str)
9232 return rb_str_enumerate_bytes(str, 0);
9244rb_str_bytes(
VALUE str)
9246 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9247 return rb_str_enumerate_bytes(str, ary);
9264 str = rb_str_new_frozen(str);
9265 ptr = RSTRING_PTR(str);
9266 len = RSTRING_LEN(str);
9267 enc = rb_enc_get(str);
9270 for (i = 0; i <
len; i += n) {
9271 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9272 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9276 for (i = 0; i <
len; i += n) {
9277 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9278 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9298rb_str_each_char(
VALUE str)
9301 return rb_str_enumerate_chars(str, 0);
9313rb_str_chars(
VALUE str)
9316 return rb_str_enumerate_chars(str, ary);
9320rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9325 const char *ptr, *end;
9328 if (single_byte_optimizable(str))
9329 return rb_str_enumerate_bytes(str, ary);
9331 str = rb_str_new_frozen(str);
9332 ptr = RSTRING_PTR(str);
9333 end = RSTRING_END(str);
9334 enc = STR_ENC_GET(str);
9337 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9358rb_str_each_codepoint(
VALUE str)
9361 return rb_str_enumerate_codepoints(str, 0);
9373rb_str_codepoints(
VALUE str)
9376 return rb_str_enumerate_codepoints(str, ary);
9382 int encidx = rb_enc_to_index(enc);
9384 const OnigUChar source_ascii[] =
"\\X";
9385 const OnigUChar *source = source_ascii;
9386 size_t source_len =
sizeof(source_ascii) - 1;
9389#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9390#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9391#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9392#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9393#define CASE_UTF(e) \
9394 case ENCINDEX_UTF_##e: { \
9395 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9396 source = source_UTF_##e; \
9397 source_len = sizeof(source_UTF_##e); \
9400 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9408 regex_t *reg_grapheme_cluster;
9410 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9411 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9413 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9414 onig_error_code_to_str(message, r, &einfo);
9415 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9418 return reg_grapheme_cluster;
9424 int encidx = rb_enc_to_index(enc);
9425 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9427 if (encidx == rb_utf8_encindex()) {
9428 if (!reg_grapheme_cluster_utf8) {
9429 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9432 return reg_grapheme_cluster_utf8;
9441 size_t grapheme_cluster_count = 0;
9443 const char *ptr, *end;
9445 if (!rb_enc_unicode_p(enc)) {
9449 bool cached_reg_grapheme_cluster =
true;
9450 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9451 if (!reg_grapheme_cluster) {
9452 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9453 cached_reg_grapheme_cluster =
false;
9456 ptr = RSTRING_PTR(str);
9457 end = RSTRING_END(str);
9460 OnigPosition
len = onig_match(reg_grapheme_cluster,
9461 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9462 (
const OnigUChar *)ptr, NULL, 0);
9463 if (
len <= 0)
break;
9464 grapheme_cluster_count++;
9468 if (!cached_reg_grapheme_cluster) {
9469 onig_free(reg_grapheme_cluster);
9472 return SIZET2NUM(grapheme_cluster_count);
9476rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9480 const char *ptr0, *ptr, *end;
9482 if (!rb_enc_unicode_p(enc)) {
9483 return rb_str_enumerate_chars(str, ary);
9486 if (!ary) str = rb_str_new_frozen(str);
9488 bool cached_reg_grapheme_cluster =
true;
9489 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9490 if (!reg_grapheme_cluster) {
9491 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9492 cached_reg_grapheme_cluster =
false;
9495 ptr0 = ptr = RSTRING_PTR(str);
9496 end = RSTRING_END(str);
9499 OnigPosition
len = onig_match(reg_grapheme_cluster,
9500 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9501 (
const OnigUChar *)ptr, NULL, 0);
9502 if (
len <= 0)
break;
9503 ENUM_ELEM(ary, rb_str_subseq(str, ptr-ptr0,
len));
9507 if (!cached_reg_grapheme_cluster) {
9508 onig_free(reg_grapheme_cluster);
9528rb_str_each_grapheme_cluster(
VALUE str)
9531 return rb_str_enumerate_grapheme_clusters(str, 0);
9543rb_str_grapheme_clusters(
VALUE str)
9546 return rb_str_enumerate_grapheme_clusters(str, ary);
9550chopped_length(
VALUE str)
9553 const char *p, *p2, *beg, *end;
9555 beg = RSTRING_PTR(str);
9556 end = beg + RSTRING_LEN(str);
9557 if (beg >= end)
return 0;
9558 p = rb_enc_prev_char(beg, end, end, enc);
9560 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9561 p2 = rb_enc_prev_char(beg, p, end, enc);
9562 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9578rb_str_chop_bang(
VALUE str)
9580 str_modify_keep_cr(str);
9581 if (RSTRING_LEN(str) > 0) {
9583 len = chopped_length(str);
9584 STR_SET_LEN(str,
len);
9585 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9604rb_str_chop(
VALUE str)
9606 return rb_str_subseq(str, 0, chopped_length(str));
9610smart_chomp(
VALUE str,
const char *e,
const char *p)
9621 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9629 if (--e > p && *(e-1) ==
'\r') {
9646 char *pp, *e, *rsptr;
9648 char *
const p = RSTRING_PTR(str);
9649 long len = RSTRING_LEN(str);
9651 if (
len == 0)
return 0;
9653 if (rs == rb_default_rs) {
9654 return smart_chomp(str, e, p);
9657 enc = rb_enc_get(str);
9668 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9675 while (e > p && *(e-1) ==
'\n') {
9677 if (e > p && *(e-1) ==
'\r')
9683 if (rslen >
len)
return len;
9685 enc = rb_enc_get(rs);
9686 newline = rsptr[rslen-1];
9689 if (newline ==
'\n')
9690 return smart_chomp(str, e, p);
9694 return smart_chomp(str, e, p);
9698 enc = rb_enc_check(str, rs);
9699 if (is_broken_string(rs)) {
9703 if (p[
len-1] == newline &&
9705 memcmp(rsptr, pp, rslen) == 0)) {
9706 if (at_char_boundary(p, pp, e, enc))
9719chomp_rs(
int argc,
const VALUE *argv)
9735 long olen = RSTRING_LEN(str);
9736 long len = chompped_length(str, rs);
9738 str_modify_keep_cr(str);
9739 STR_SET_LEN(str,
len);
9740 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9757rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9760 str_modifiable(str);
9761 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
9762 rs = chomp_rs(argc, argv);
9764 return rb_str_chomp_string(str, rs);
9777rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9779 VALUE rs = chomp_rs(argc, argv);
9781 return rb_str_subseq(str, 0, chompped_length(str, rs));
9787 const char *
const start = s;
9789 if (!s || s >= e)
return 0;
9792 if (single_byte_optimizable(str)) {
9793 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9798 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9818rb_str_lstrip_bang(
VALUE str)
9824 str_modify_keep_cr(str);
9825 enc = STR_ENC_GET(str);
9827 loffset = lstrip_offset(str, start, start+olen, enc);
9829 long len = olen-loffset;
9830 s = start + loffset;
9831 memmove(start, s,
len);
9832 STR_SET_LEN(str,
len);
9856rb_str_lstrip(
VALUE str)
9861 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
9862 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9863 return rb_str_subseq(str, loffset,
len - loffset);
9871 rb_str_check_dummy_enc(enc);
9875 if (!s || s >= e)
return 0;
9879 if (single_byte_optimizable(str)) {
9881 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9886 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
9906rb_str_rstrip_bang(
VALUE str)
9912 str_modify_keep_cr(str);
9913 enc = STR_ENC_GET(str);
9915 roffset = rstrip_offset(str, start, start+olen, enc);
9917 long len = olen - roffset;
9919 STR_SET_LEN(str,
len);
9943rb_str_rstrip(
VALUE str)
9949 enc = STR_ENC_GET(str);
9951 roffset = rstrip_offset(str, start, start+olen, enc);
9953 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9954 return rb_str_subseq(str, 0, olen-roffset);
9969rb_str_strip_bang(
VALUE str)
9972 long olen, loffset, roffset;
9975 str_modify_keep_cr(str);
9976 enc = STR_ENC_GET(str);
9978 loffset = lstrip_offset(str, start, start+olen, enc);
9979 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9981 if (loffset > 0 || roffset > 0) {
9982 long len = olen-roffset;
9985 memmove(start, start + loffset,
len);
9987 STR_SET_LEN(str,
len);
10011rb_str_strip(
VALUE str)
10014 long olen, loffset, roffset;
10018 loffset = lstrip_offset(str, start, start+olen, enc);
10019 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10021 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10022 return rb_str_subseq(str, loffset, olen-loffset-roffset);
10026scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10029 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10035 end = pos + RSTRING_LEN(pat);
10049 if (RSTRING_LEN(str) > end)
10050 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10051 RSTRING_END(str), enc);
10059 if (!regs || regs->num_regs == 1) {
10060 result = rb_str_subseq(str, pos, end - pos);
10065 for (
int i = 1; i < regs->num_regs; i++) {
10068 s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
10071 rb_ary_push(result, s);
10126 long last = -1, prev = 0;
10127 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10129 pat = get_pat_quoted(pat, 1);
10130 mustnot_broken(str);
10132 VALUE ary = rb_ary_new();
10134 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10137 rb_ary_push(ary, result);
10139 if (last >= 0) rb_pat_search(pat, str, last, 1);
10144 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10148 str_mod_check(str, p,
len);
10150 if (last >= 0) rb_pat_search(pat, str, last, 1);
10174rb_str_hex(
VALUE str)
10176 return rb_str_to_inum(str, 16, FALSE);
10201rb_str_oct(
VALUE str)
10203 return rb_str_to_inum(str, -8, FALSE);
10206#ifndef HAVE_CRYPT_R
10211 rb_nativethread_lock_t lock;
10212} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10215crypt_mutex_initialize(
void)
10286# define CRYPT_END() ALLOCV_END(databuf)
10288 extern char *crypt(
const char *,
const char *);
10289# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10292 const char *s, *saltp;
10295 char salt_8bit_clean[3];
10299 mustnot_wchar(str);
10300 mustnot_wchar(salt);
10302 saltp = RSTRING_PTR(salt);
10303 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10304 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10308 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10309 salt_8bit_clean[0] = saltp[0] & 0x7f;
10310 salt_8bit_clean[1] = saltp[1] & 0x7f;
10311 salt_8bit_clean[2] =
'\0';
10312 saltp = salt_8bit_clean;
10317# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10318 data->initialized = 0;
10320 res = crypt_r(s, saltp, data);
10322 crypt_mutex_initialize();
10324 res = crypt(s, saltp);
10365 char *ptr, *p, *pend;
10368 unsigned long sum0 = 0;
10373 ptr = p = RSTRING_PTR(str);
10374 len = RSTRING_LEN(str);
10380 str_mod_check(str, ptr,
len);
10383 sum0 += (
unsigned char)*p;
10394 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10395 sum0 &= (((
unsigned long)1)<<bits)-1;
10415rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10419 long width,
len, flen = 1, fclen = 1;
10422 const char *f =
" ";
10423 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10425 int singlebyte = 1, cr;
10429 enc = STR_ENC_GET(str);
10434 enc = rb_enc_check(str, pad);
10435 f = RSTRING_PTR(pad);
10436 flen = RSTRING_LEN(pad);
10437 fclen = str_strlen(pad, enc);
10438 singlebyte = single_byte_optimizable(pad);
10439 if (flen == 0 || fclen == 0) {
10440 rb_raise(rb_eArgError,
"zero width padding");
10443 len = str_strlen(str, enc);
10444 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10446 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10450 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10451 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10453 size = RSTRING_LEN(str);
10454 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10455 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10456 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10457 rb_raise(rb_eArgError,
"argument too big");
10461 p = RSTRING_PTR(res);
10463 memset(p, *f, llen);
10467 while (llen >= fclen) {
10473 memcpy(p, f, llen2);
10477 memcpy(p, RSTRING_PTR(str), size);
10480 memset(p, *f, rlen);
10484 while (rlen >= fclen) {
10490 memcpy(p, f, rlen2);
10494 TERM_FILL(p, termlen);
10495 STR_SET_LEN(res, p-RSTRING_PTR(res));
10496 rb_enc_associate(res, enc);
10518rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10520 return rb_str_justify(argc, argv, str,
'l');
10534rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10536 return rb_str_justify(argc, argv, str,
'r');
10551rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10553 return rb_str_justify(argc, argv, str,
'c');
10569 sep = get_pat_quoted(sep, 0);
10578 sep = rb_str_subseq(str, pos, END(0) - pos);
10581 pos = rb_str_index(str, sep, 0);
10582 if (pos < 0)
goto failed;
10584 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10586 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10587 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10590 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10604 long pos = RSTRING_LEN(str);
10606 sep = get_pat_quoted(sep, 0);
10615 sep = rb_str_subseq(str, pos, END(0) - pos);
10619 pos = rb_str_rindex(str, sep, pos);
10625 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10627 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10628 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10630 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10642rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10646 for (i=0; i<argc; i++) {
10647 VALUE tmp = argv[i];
10649 if (rb_reg_start_with_p(tmp, str))
10653 const char *p, *s, *e;
10658 enc = rb_enc_check(str, tmp);
10659 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10660 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10661 p = RSTRING_PTR(str);
10664 if (!at_char_right_boundary(p, s, e, enc))
10666 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
10682rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10686 for (i=0; i<argc; i++) {
10687 VALUE tmp = argv[i];
10688 const char *p, *s, *e;
10693 enc = rb_enc_check(str, tmp);
10694 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10695 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10696 p = RSTRING_PTR(str);
10699 if (!at_char_boundary(p, s, e, enc))
10701 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
10717deleted_prefix_length(
VALUE str,
VALUE prefix)
10719 const char *strptr, *prefixptr;
10720 long olen, prefixlen;
10725 if (!is_broken_string(prefix) ||
10726 !rb_enc_asciicompat(enc) ||
10727 !rb_enc_asciicompat(rb_enc_get(prefix))) {
10728 enc = rb_enc_check(str, prefix);
10732 prefixlen = RSTRING_LEN(prefix);
10733 if (prefixlen <= 0)
return 0;
10734 olen = RSTRING_LEN(str);
10735 if (olen < prefixlen)
return 0;
10736 strptr = RSTRING_PTR(str);
10737 prefixptr = RSTRING_PTR(prefix);
10738 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10739 if (is_broken_string(prefix)) {
10740 if (!is_broken_string(str)) {
10744 const char *strend = strptr + olen;
10745 const char *after_prefix = strptr + prefixlen;
10746 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
10766rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10769 str_modify_keep_cr(str);
10771 prefixlen = deleted_prefix_length(str, prefix);
10772 if (prefixlen <= 0)
return Qnil;
10786rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10790 prefixlen = deleted_prefix_length(str, prefix);
10791 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10793 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
10806deleted_suffix_length(
VALUE str,
VALUE suffix)
10808 const char *strptr, *suffixptr;
10809 long olen, suffixlen;
10813 if (is_broken_string(suffix))
return 0;
10814 enc = rb_enc_check(str, suffix);
10817 suffixlen = RSTRING_LEN(suffix);
10818 if (suffixlen <= 0)
return 0;
10819 olen = RSTRING_LEN(str);
10820 if (olen < suffixlen)
return 0;
10821 strptr = RSTRING_PTR(str);
10822 suffixptr = RSTRING_PTR(suffix);
10823 const char *strend = strptr + olen;
10824 const char *before_suffix = strend - suffixlen;
10825 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
10826 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
10841rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10843 long olen, suffixlen,
len;
10844 str_modifiable(str);
10846 suffixlen = deleted_suffix_length(str, suffix);
10847 if (suffixlen <= 0)
return Qnil;
10849 olen = RSTRING_LEN(str);
10850 str_modify_keep_cr(str);
10851 len = olen - suffixlen;
10852 STR_SET_LEN(str,
len);
10853 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10869rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10873 suffixlen = deleted_suffix_length(str, suffix);
10874 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10876 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
10883 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
10891 val = rb_fs_check(val);
10894 "value of %"PRIsVALUE
" must be String or Regexp",
10898 rb_warn_deprecated(
"`$;'", NULL);
10915 str_modifiable(str);
10918 int idx = rb_enc_to_index(encoding);
10925 rb_enc_associate_index(str, idx);
10949 if (STR_EMBED_P(str)) {
10950 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
10955 str_replace_shared_without_enc(str2, str);
10957 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
10990rb_str_valid_encoding_p(
VALUE str)
11010rb_str_is_ascii_only_p(
VALUE str)
11020 static const char ellipsis[] =
"...";
11021 const long ellipsislen =
sizeof(ellipsis) - 1;
11023 const long blen = RSTRING_LEN(str);
11024 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11025 VALUE estr, ret = 0;
11032 else if (
len <= ellipsislen ||
11034 if (rb_enc_asciicompat(enc)) {
11036 rb_enc_associate(ret, enc);
11043 else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11044 rb_str_cat(ret, ellipsis, ellipsislen);
11048 rb_enc_from_encoding(enc), 0,
Qnil);
11061 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11067 rb_enc_name(enc), rb_enc_name(e));
11086 if (enc == STR_ENC_GET(str)) {
11091 return enc_str_scrub(enc, str, repl, cr);
11099 const char *rep, *p, *e, *p1, *sp;
11105 rb_raise(rb_eArgError,
"both of block and replacement given");
11112 if (!
NIL_P(repl)) {
11113 repl = str_compat_and_valid(repl, enc);
11116 if (rb_enc_dummy_p(enc)) {
11119 encidx = rb_enc_to_index(enc);
11121#define DEFAULT_REPLACE_CHAR(str) do { \
11122 static const char replace[sizeof(str)-1] = str; \
11123 rep = replace; replen = (int)sizeof(replace); \
11126 slen = RSTRING_LEN(str);
11127 p = RSTRING_PTR(str);
11128 e = RSTRING_END(str);
11132 if (rb_enc_asciicompat(enc)) {
11138 else if (!
NIL_P(repl)) {
11139 rep = RSTRING_PTR(repl);
11140 replen = RSTRING_LEN(repl);
11143 else if (encidx == rb_utf8_encindex()) {
11144 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11148 DEFAULT_REPLACE_CHAR(
"?");
11153 p = search_nonascii(p, e);
11158 int ret = rb_enc_precise_mbclen(p, e, enc);
11172 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11177 if (e - p < clen) clen = e - p;
11184 for (; clen > 1; clen--) {
11185 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11196 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11197 str_mod_check(str, sp, slen);
11198 repl = str_compat_and_valid(repl, enc);
11205 p = search_nonascii(p, e);
11220 buf = rb_str_buf_new(RSTRING_LEN(str));
11231 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11232 str_mod_check(str, sp, slen);
11233 repl = str_compat_and_valid(repl, enc);
11246 else if (!
NIL_P(repl)) {
11247 rep = RSTRING_PTR(repl);
11248 replen = RSTRING_LEN(repl);
11250 else if (encidx == ENCINDEX_UTF_16BE) {
11251 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11253 else if (encidx == ENCINDEX_UTF_16LE) {
11254 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11256 else if (encidx == ENCINDEX_UTF_32BE) {
11257 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11259 else if (encidx == ENCINDEX_UTF_32LE) {
11260 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11263 DEFAULT_REPLACE_CHAR(
"?");
11267 int ret = rb_enc_precise_mbclen(p, e, enc);
11277 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11280 if (e - p < clen) clen = e - p;
11281 if (clen <= mbminlen * 2) {
11286 for (; clen > mbminlen; clen-=mbminlen) {
11287 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11297 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11298 str_mod_check(str, sp, slen);
11299 repl = str_compat_and_valid(repl, enc);
11314 buf = rb_str_buf_new(RSTRING_LEN(str));
11324 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11325 str_mod_check(str, sp, slen);
11326 repl = str_compat_and_valid(repl, enc);
11362str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11370static ID id_normalize;
11371static ID id_normalized_p;
11372static VALUE mUnicodeNormalize;
11375unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11377 static int UnicodeNormalizeRequired = 0;
11380 if (!UnicodeNormalizeRequired) {
11381 rb_require(
"unicode_normalize/normalize.rb");
11382 UnicodeNormalizeRequired = 1;
11386 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11423rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11425 return unicode_normalize_common(argc, argv, str, id_normalize);
11439rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11441 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11468rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11470 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11602#define sym_equal rb_obj_equal
11605sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11609 int c = rb_enc_precise_mbclen(s, send, enc);
11621rb_str_symname_p(
VALUE sym)
11626 rb_encoding *resenc = rb_default_internal_encoding();
11628 if (resenc == NULL) resenc = rb_default_external_encoding();
11629 enc = STR_ENC_GET(sym);
11630 ptr = RSTRING_PTR(sym);
11631 len = RSTRING_LEN(sym);
11632 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
11640rb_str_quote_unprintable(
VALUE str)
11648 resenc = rb_default_internal_encoding();
11649 if (resenc == NULL) resenc = rb_default_external_encoding();
11650 enc = STR_ENC_GET(str);
11651 ptr = RSTRING_PTR(str);
11652 len = RSTRING_LEN(str);
11653 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11654 !sym_printable(ptr, ptr +
len, enc)) {
11655 return rb_str_escape(str);
11661rb_id_quote_unprintable(
ID id)
11663 VALUE str = rb_id2str(
id);
11664 if (!rb_str_symname_p(str)) {
11665 return rb_str_escape(str);
11683sym_inspect(
VALUE sym)
11690 if (!rb_str_symname_p(str)) {
11692 len = RSTRING_LEN(str);
11693 rb_str_resize(str,
len + 1);
11694 dest = RSTRING_PTR(str);
11695 memmove(dest + 1, dest,
len);
11700 VALUE orig_str = str;
11703 str = rb_enc_str_new(0,
len + 1, enc);
11704 dest = RSTRING_PTR(str);
11705 memcpy(dest + 1, ptr,
len);
11731rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11736 rb_raise(rb_eArgError,
"no receiver given");
11833 return rb_str_match(
rb_sym2str(sym), other);
11848sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11850 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11863sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11865 return rb_str_match_m_p(argc, argv, sym);
11883 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11894sym_length(
VALUE sym)
11908sym_empty(
VALUE sym)
11926 return rb_str_intern(rb_str_upcase(argc, argv,
rb_sym2str(sym)));
11942sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11944 return rb_str_intern(rb_str_downcase(argc, argv,
rb_sym2str(sym)));
11958sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11960 return rb_str_intern(rb_str_capitalize(argc, argv,
rb_sym2str(sym)));
11974sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11976 return rb_str_intern(rb_str_swapcase(argc, argv,
rb_sym2str(sym)));
11988sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11990 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12003sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12005 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12017sym_encoding(
VALUE sym)
12023string_for_symbol(
VALUE name)
12042 name = string_for_symbol(name);
12043 return rb_intern_str(name);
12052 name = string_for_symbol(name);
12053 return rb_str_intern(name);
12076 return rb_fstring(str);
12083 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
12095 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12096 rb_enc_autoload(enc);
12100 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
12113 assert(rb_vm_fstring_table());
12114 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
12279 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "defaultexternal" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "defaultexternal" encoding.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.