Ruby 3.3.2p78 (2024-05-30 revision e5a195edf62fe1bf7146a191da13fa1c4fecbd71)
regcomp.c
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 xfree(uslist->us);
199}
200
201static int
202unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
203{
204 UnsetAddr* p;
205 int size;
206
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
209 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
212 uslist->us = p;
213 }
214
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
217 uslist->num++;
218 return 0;
219}
220#endif /* USE_SUBEXP_CALL */
221
222
223static int
224add_opcode(regex_t* reg, int opcode)
225{
226 BBUF_ADD1(reg, opcode);
227 return 0;
228}
229
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
231static int
232add_state_check_num(regex_t* reg, int num)
233{
234 StateCheckNumType n = (StateCheckNumType )num;
235
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
237 return 0;
238}
239#endif
240
241static int
242add_rel_addr(regex_t* reg, int addr)
243{
244 RelAddrType ra = (RelAddrType )addr;
245
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
247 return 0;
248}
249
250static int
251add_abs_addr(regex_t* reg, int addr)
252{
253 AbsAddrType ra = (AbsAddrType )addr;
254
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
256 return 0;
257}
258
259static int
260add_length(regex_t* reg, OnigDistance len)
261{
262 LengthType l = (LengthType )len;
263
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
265 return 0;
266}
267
268static int
269add_mem_num(regex_t* reg, int num)
270{
271 MemNumType n = (MemNumType )num;
272
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
274 return 0;
275}
276
277#if 0
278static int
279add_pointer(regex_t* reg, void* addr)
280{
281 PointerType ptr = (PointerType )addr;
282
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
284 return 0;
285}
286#endif
287
288static int
289add_option(regex_t* reg, OnigOptionType option)
290{
291 BBUF_ADD(reg, &option, SIZE_OPTION);
292 return 0;
293}
294
295static int
296add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
297{
298 int r;
299
300 r = add_opcode(reg, opcode);
301 if (r) return r;
302 r = add_rel_addr(reg, addr);
303 return r;
304}
305
306static int
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
308{
309 BBUF_ADD(reg, bytes, len);
310 return 0;
311}
312
313static int
314add_bitset(regex_t* reg, BitSetRef bs)
315{
316 BBUF_ADD(reg, bs, SIZE_BITSET);
317 return 0;
318}
319
320static int
321add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
322{
323 int r;
324
325 r = add_opcode(reg, opcode);
326 if (r) return r;
327 r = add_option(reg, option);
328 return r;
329}
330
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
333
334
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
338
339static int
340select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
341{
342 int op;
343 OnigDistance str_len = roomof(byte_len, mb_len);
344
345 if (ignore_case) {
346 switch (str_len) {
347 case 1: op = OP_EXACT1_IC; break;
348 default: op = OP_EXACTN_IC; break;
349 }
350 }
351 else {
352 switch (mb_len) {
353 case 1:
354 switch (str_len) {
355 case 1: op = OP_EXACT1; break;
356 case 2: op = OP_EXACT2; break;
357 case 3: op = OP_EXACT3; break;
358 case 4: op = OP_EXACT4; break;
359 case 5: op = OP_EXACT5; break;
360 default: op = OP_EXACTN; break;
361 }
362 break;
363
364 case 2:
365 switch (str_len) {
366 case 1: op = OP_EXACTMB2N1; break;
367 case 2: op = OP_EXACTMB2N2; break;
368 case 3: op = OP_EXACTMB2N3; break;
369 default: op = OP_EXACTMB2N; break;
370 }
371 break;
372
373 case 3:
374 op = OP_EXACTMB3N;
375 break;
376
377 default:
378 op = OP_EXACTMBN;
379 break;
380 }
381 }
382 return op;
383}
384
385static int
386compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
387{
388 int r;
389 int saved_num_null_check = reg->num_null_check;
390
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
393 if (r) return r;
394 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
395 if (r) return r;
396 reg->num_null_check++;
397 }
398
399 r = compile_tree(node, reg);
400 if (r) return r;
401
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
409
410 if (r) return r;
411 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
412 }
413 return r;
414}
415
416#ifdef USE_SUBEXP_CALL
417static int
418compile_call(CallNode* node, regex_t* reg)
419{
420 int r;
421
422 r = add_opcode(reg, OP_CALL);
423 if (r) return r;
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
425 node->target);
426 if (r) return r;
427 r = add_abs_addr(reg, 0 /*dummy addr.*/);
428 return r;
429}
430#endif
431
432static int
433compile_tree_n_times(Node* node, int n, regex_t* reg)
434{
435 int i, r;
436
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
439 if (r) return r;
440 }
441 return 0;
442}
443
444static int
445add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED, int ignore_case)
447{
448 int len;
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
450
451 len = SIZE_OPCODE;
452
453 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
455 len += SIZE_LENGTH;
456
457 len += (int )byte_len;
458 return len;
459}
460
461static int
462add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
463 regex_t* reg, int ignore_case)
464{
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
466 add_opcode(reg, op);
467
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
470
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
474 else
475 add_length(reg, byte_len / mb_len);
476 }
477
478 add_bytes(reg, s, byte_len);
479 return 0;
480}
481
482
483static int
484compile_length_string_node(Node* node, regex_t* reg)
485{
486 int rlen, r, len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
488 UChar *p, *prev;
489 StrNode* sn;
490
491 sn = NSTR(node);
492 if (sn->end <= sn->s)
493 return 0;
494
495 ambig = NSTRING_IS_AMBIG(node);
496
497 p = prev = sn->s;
498 prev_len = enclen(enc, p, sn->end);
499 p += prev_len;
500 blen = prev_len;
501 rlen = 0;
502
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (len == prev_len || ambig) {
506 blen += len;
507 }
508 else {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
510 rlen += r;
511 prev = p;
512 blen = len;
513 prev_len = len;
514 }
515 p += len;
516 }
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 rlen += r;
519 return rlen;
520}
521
522static int
523compile_length_string_raw_node(StrNode* sn, regex_t* reg)
524{
525 if (sn->end <= sn->s)
526 return 0;
527
528 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529}
530
531static int
532compile_string_node(Node* node, regex_t* reg)
533{
534 int r, len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
537 StrNode* sn;
538
539 sn = NSTR(node);
540 if (sn->end <= sn->s)
541 return 0;
542
543 end = sn->end;
544 ambig = NSTRING_IS_AMBIG(node);
545
546 p = prev = sn->s;
547 prev_len = enclen(enc, p, end);
548 p += prev_len;
549 blen = prev_len;
550
551 for (; p < end; ) {
552 len = enclen(enc, p, end);
553 if (len == prev_len || ambig) {
554 blen += len;
555 }
556 else {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
558 if (r) return r;
559
560 prev = p;
561 blen = len;
562 prev_len = len;
563 }
564
565 p += len;
566 }
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
568}
569
570static int
571compile_string_raw_node(StrNode* sn, regex_t* reg)
572{
573 if (sn->end <= sn->s)
574 return 0;
575
576 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
577}
578
579static int
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
581{
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
585#else
586 int r, pad_size;
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
588
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
592
593 r = add_bytes(reg, mbuf->p, mbuf->used);
594
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
598 return r;
599#endif
600}
601
602static int
603compile_length_cclass_node(CClassNode* cc, regex_t* reg)
604{
605 int len;
606
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
609 }
610 else {
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
612 len = SIZE_OPCODE;
613 }
614 else {
615 len = SIZE_OPCODE + SIZE_BITSET;
616 }
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
619#else
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
621#endif
622 }
623
624 return len;
625}
626
627static int
628compile_cclass_node(CClassNode* cc, regex_t* reg)
629{
630 int r;
631
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
635 else
636 add_opcode(reg, OP_CCLASS);
637
638 r = add_bitset(reg, cc->bs);
639 }
640 else {
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
644 else
645 add_opcode(reg, OP_CCLASS_MB);
646
647 r = add_multi_byte_cclass(cc->mbuf, reg);
648 }
649 else {
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
652 else
653 add_opcode(reg, OP_CCLASS_MIX);
654
655 r = add_bitset(reg, cc->bs);
656 if (r) return r;
657 r = add_multi_byte_cclass(cc->mbuf, reg);
658 }
659 }
660
661 return r;
662}
663
664static int
665entry_repeat_range(regex_t* reg, int id, int lower, int upper)
666{
667#define REPEAT_RANGE_ALLOC 4
668
670
671 if (reg->repeat_range_alloc == 0) {
672 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
676 }
677 else if (reg->repeat_range_alloc <= id) {
678 int n;
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
680 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
681 sizeof(OnigRepeatRange) * n);
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
685 }
686 else {
687 p = reg->repeat_range;
688 }
689
690 p[id].lower = lower;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
692 return 0;
693}
694
695static int
696compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
697 regex_t* reg)
698{
699 int r;
700 int num_repeat = reg->num_repeat;
701
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
703 if (r) return r;
704 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
705 reg->num_repeat++;
706 if (r) return r;
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
708 if (r) return r;
709
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
711 if (r) return r;
712
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
714 if (r) return r;
715
716 if (
717#ifdef USE_SUBEXP_CALL
718 reg->num_call > 0 ||
719#endif
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
722 }
723 else {
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
725 }
726 if (r) return r;
727 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
728 return r;
729}
730
731static int
732is_anychar_star_quantifier(QtfrNode* qn)
733{
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
736 return 1;
737 else
738 return 0;
739}
740
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
743
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
745
746static int
747compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
748{
749 int len, mod_tlen, cklen;
750 int ckn;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
754
755 if (tlen < 0) return tlen;
756
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
758
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
760
761 /* anychar repeat */
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
766 else
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
768 }
769 }
770
771 if (empty_info != 0)
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
773 else
774 mod_tlen = tlen;
775
776 if (infinite && qn->lower <= 1) {
777 if (qn->greedy) {
778 if (qn->lower == 1)
779 len = SIZE_OP_JUMP;
780 else
781 len = 0;
782
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
784 }
785 else {
786 if (qn->lower == 0)
787 len = SIZE_OP_JUMP;
788 else
789 len = 0;
790
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
792 }
793 }
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
796 len = SIZE_OP_JUMP + tlen;
797 else
798 len = 0;
799 }
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
802 if (CKN_ON) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
804 }
805 else {
806 len = SIZE_OP_PUSH + tlen;
807 }
808 }
809 else {
810 len = tlen;
811 }
812 }
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
815 }
816 else {
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
819 if (CKN_ON)
820 len += SIZE_OP_STATE_CHECK;
821 }
822
823 return len;
824}
825
826static int
827compile_quantifier_node(QtfrNode* qn, regex_t* reg)
828{
829 int r, mod_tlen;
830 int ckn;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
834
835 if (tlen < 0) return tlen;
836
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
838
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
841 if (r) return r;
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
845 else
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
847 if (r) return r;
848 if (CKN_ON) {
849 r = add_state_check_num(reg, ckn);
850 if (r) return r;
851 }
852
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
854 }
855 else {
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
860 }
861 else {
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
864 : OP_ANYCHAR_STAR));
865 }
866 if (r) return r;
867 if (CKN_ON)
868 r = add_state_check_num(reg, ckn);
869
870 return r;
871 }
872 }
873
874 if (empty_info != 0)
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
876 else
877 mod_tlen = tlen;
878
879 if (infinite && qn->lower <= 1) {
880 if (qn->greedy) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
884 if (r) return r;
885 }
886
887 if (CKN_ON) {
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
889 if (r) return r;
890 r = add_state_check_num(reg, ckn);
891 if (r) return r;
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
893 }
894 else {
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
896 }
897 if (r) return r;
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
899 if (r) return r;
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (int )SIZE_OP_JUMP
902 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
903 }
904 else {
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
907 if (r) return r;
908 }
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
910 if (r) return r;
911 if (CKN_ON) {
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
913 if (r) return r;
914 r = add_state_check_num(reg, ckn);
915 if (r) return r;
916 r = add_rel_addr(reg,
917 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
918 }
919 else
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
921 }
922 }
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
926 if (r) return r;
927 r = compile_tree(qn->target, reg);
928 }
929 else
930 r = 0;
931 }
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
934 if (CKN_ON) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
936 if (r) return r;
937 r = add_state_check_num(reg, ckn);
938 if (r) return r;
939 r = add_rel_addr(reg, tlen);
940 }
941 else {
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
943 }
944 if (r) return r;
945 }
946
947 r = compile_tree(qn->target, reg);
948 }
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
950 if (CKN_ON) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
952 if (r) return r;
953 r = add_state_check_num(reg, ckn);
954 if (r) return r;
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
956 }
957 else {
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
959 }
960
961 if (r) return r;
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
963 if (r) return r;
964 r = compile_tree(qn->target, reg);
965 }
966 else {
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
968 if (CKN_ON) {
969 if (r) return r;
970 r = add_opcode(reg, OP_STATE_CHECK);
971 if (r) return r;
972 r = add_state_check_num(reg, ckn);
973 }
974 }
975 return r;
976}
977
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979
980static int
981compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
982{
983 int len, mod_tlen;
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
987
988 if (tlen < 0) return tlen;
989
990 /* anychar repeat */
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
995 else
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
997 }
998 }
999
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1002 else
1003 mod_tlen = tlen;
1004
1005 if (infinite &&
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1008 len = SIZE_OP_JUMP;
1009 }
1010 else {
1011 len = tlen * qn->lower;
1012 }
1013
1014 if (qn->greedy) {
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1018 else
1019#endif
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1022 else
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1024 }
1025 else
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1027 }
1028 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1029 len = SIZE_OP_JUMP + tlen;
1030 }
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1036 }
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1039 }
1040 else {
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1043 }
1044
1045 return len;
1046}
1047
1048static int
1049compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1050{
1051 int i, r, mod_tlen;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1055
1056 if (tlen < 0) return tlen;
1057
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1060 if (r) return r;
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1064 else
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1066 if (r) return r;
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1068 }
1069 else {
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1072 else
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1074 }
1075 }
1076
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1079 else
1080 mod_tlen = tlen;
1081
1082 if (infinite &&
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1085 if (qn->greedy) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1089 else
1090#endif
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1093 else
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1095 }
1096 else {
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1098 }
1099 if (r) return r;
1100 }
1101 else {
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1103 if (r) return r;
1104 }
1105
1106 if (qn->greedy) {
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1117 }
1118 else
1119#endif
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1123 if (r) return r;
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1126 if (r) return r;
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1129 }
1130 else {
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1132 if (r) return r;
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1134 if (r) return r;
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1137 }
1138 }
1139 else {
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1141 if (r) return r;
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1143 if (r) return r;
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1145 }
1146 }
1147 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1149 if (r) return r;
1150 r = compile_tree(qn->target, reg);
1151 }
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1156
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1158 if (r) return r;
1159
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1163 if (r) return r;
1164 r = compile_tree(qn->target, reg);
1165 if (r) return r;
1166 }
1167 }
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1170 if (r) return r;
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1172 if (r) return r;
1173 r = compile_tree(qn->target, reg);
1174 }
1175 else {
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1177 }
1178 return r;
1179}
1180#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1181
1182static int
1183compile_length_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int tlen;
1186 OnigOptionType prev = reg->options;
1187
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1191
1192 if (tlen < 0) return tlen;
1193
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1197 }
1198 else
1199 return tlen;
1200}
1201
1202static int
1203compile_option_node(EncloseNode* node, regex_t* reg)
1204{
1205 int r;
1206 OnigOptionType prev = reg->options;
1207
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1210 if (r) return r;
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1212 if (r) return r;
1213 r = add_opcode(reg, OP_FAIL);
1214 if (r) return r;
1215 }
1216
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1220
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1222 if (r) return r;
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1224 }
1225 return r;
1226}
1227
1228static int
1229compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1230{
1231 int len;
1232 int tlen;
1233
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1236
1237 if (node->target) {
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0) return tlen;
1240 }
1241 else
1242 tlen = 0;
1243
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1253 else
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1256 }
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1261 }
1262 else
1263#endif
1264 {
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1267 else
1268 len = SIZE_OP_MEMORY_START;
1269
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1272 }
1273 break;
1274
1275 case ENCLOSE_STOP_BACKTRACK:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0) return tlen;
1284
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1287 }
1288 else {
1289#endif
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1292 }
1293#endif
1294 break;
1295
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1300
1301 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1302 if (tlen < 0) return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1305 x = NCDR(x);
1306 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1307 if (tlen < 0) return tlen;
1308 len += tlen;
1309 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1310 }
1311 else {
1312 return ONIGERR_PARSER_BUG;
1313 }
1314 break;
1315
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1318 break;
1319
1320 default:
1321 return ONIGERR_TYPE_BUG;
1322 break;
1323 }
1324
1325 return len;
1326}
1327
1328static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1329
1330static int
1331compile_enclose_node(EncloseNode* node, regex_t* reg)
1332{
1333 int r, len;
1334
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1337
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1343 if (r) return r;
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (int )node->call_addr);
1347 if (r) return r;
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1353 else
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1356
1357 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1358 if (r) return r;
1359 }
1360#endif
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1363 else
1364 r = add_opcode(reg, OP_MEMORY_START);
1365 if (r) return r;
1366 r = add_mem_num(reg, node->regnum);
1367 if (r) return r;
1368 r = compile_tree(node->target, reg);
1369 if (r) return r;
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1375 else
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1378
1379 if (r) return r;
1380 r = add_mem_num(reg, node->regnum);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_RETURN);
1383 }
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1387 else
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1389 if (r) return r;
1390 r = add_mem_num(reg, node->regnum);
1391 }
1392 else
1393#endif
1394 {
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1397 else
1398 r = add_opcode(reg, OP_MEMORY_END);
1399 if (r) return r;
1400 r = add_mem_num(reg, node->regnum);
1401 }
1402 break;
1403
1404 case ENCLOSE_STOP_BACKTRACK:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1412 if (r) return r;
1413
1414 len = compile_length_tree(qn->target, reg);
1415 if (len < 0) return len;
1416
1417 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1418 if (r) return r;
1419 r = compile_tree(qn->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP);
1422 if (r) return r;
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1425 }
1426 else {
1427#endif
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1434 }
1435#endif
1436 break;
1437
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1440 if (r) return r;
1441 r = add_mem_num(reg, node->regnum);
1442 if (r) return r;
1443
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1446 int len2;
1447
1448 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1449 if (len < 0) return len;
1450 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1451 x = NCDR(x);
1452 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1453 if (len2 < 0) return len2;
1454 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1455
1456 x = node->target;
1457 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1458 if (r) return r;
1459 r = compile_tree(NCAR(x), reg); /* yes-node */
1460 if (r) return r;
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1462 if (r) return r;
1463 x = NCDR(x);
1464 r = compile_tree(NCAR(x), reg); /* no-node */
1465 }
1466 else {
1467 return ONIGERR_PARSER_BUG;
1468 }
1469 break;
1470
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1473 if (len < 0) return len;
1474
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1476 if (r) return r;
1477 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1478 if (r) return r;
1479 r = compile_tree(node->target, reg);
1480 if (r) return r;
1481 r = add_opcode(reg, OP_ABSENT_END);
1482 break;
1483
1484 default:
1485 return ONIGERR_TYPE_BUG;
1486 break;
1487 }
1488
1489 return r;
1490}
1491
1492static int
1493compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1494{
1495 int len;
1496 int tlen = 0;
1497
1498 if (node->target) {
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0) return tlen;
1501 }
1502
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1506 break;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1509 break;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1512 break;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1515 break;
1516
1517 default:
1518 len = SIZE_OPCODE;
1519 break;
1520 }
1521
1522 return len;
1523}
1524
1525static int
1526compile_anchor_node(AnchorNode* node, regex_t* reg)
1527{
1528 int r, len;
1529
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1537
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1541 break;
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1545 break;
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1550 break;
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1554 break;
1555#endif
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1557
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1560 if (r) return r;
1561 r = compile_tree(node->target, reg);
1562 if (r) return r;
1563 r = add_opcode(reg, OP_POP_POS);
1564 break;
1565
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1568 if (len < 0) return len;
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1570 if (r) return r;
1571 r = compile_tree(node->target, reg);
1572 if (r) return r;
1573 r = add_opcode(reg, OP_FAIL_POS);
1574 break;
1575
1576 case ANCHOR_LOOK_BEHIND:
1577 {
1578 int n;
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1580 if (r) return r;
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1584 }
1585 else
1586 n = node->char_len;
1587 r = add_length(reg, n);
1588 if (r) return r;
1589 r = compile_tree(node->target, reg);
1590 }
1591 break;
1592
1593 case ANCHOR_LOOK_BEHIND_NOT:
1594 {
1595 int n;
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1599 if (r) return r;
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1603 }
1604 else
1605 n = node->char_len;
1606 r = add_length(reg, n);
1607 if (r) return r;
1608 r = compile_tree(node->target, reg);
1609 if (r) return r;
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1611 }
1612 break;
1613
1614 default:
1615 return ONIGERR_TYPE_BUG;
1616 break;
1617 }
1618
1619 return r;
1620}
1621
1622static int
1623compile_length_tree(Node* node, regex_t* reg)
1624{
1625 int len, type, r;
1626
1627 type = NTYPE(node);
1628 switch (type) {
1629 case NT_LIST:
1630 len = 0;
1631 do {
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0) return r;
1634 len += r;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 break;
1638
1639 case NT_ALT:
1640 {
1641 int n = 0;
1642 len = 0;
1643 do {
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0) return r;
1646 len += r;
1647 n++;
1648 } while (IS_NOT_NULL(node = NCDR(node)));
1649 r = len;
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1651 }
1652 break;
1653
1654 case NT_STR:
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1657 else
1658 r = compile_length_string_node(node, reg);
1659 break;
1660
1661 case NT_CCLASS:
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1663 break;
1664
1665 case NT_CTYPE:
1666 case NT_CANY:
1667 r = SIZE_OPCODE;
1668 break;
1669
1670 case NT_BREF:
1671 {
1672 BRefNode* br = NBREF(node);
1673
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678 }
1679 else
1680#endif
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1684 }
1685 else {
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1687 }
1688 }
1689 break;
1690
1691#ifdef USE_SUBEXP_CALL
1692 case NT_CALL:
1693 r = SIZE_OP_CALL;
1694 break;
1695#endif
1696
1697 case NT_QTFR:
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1699 break;
1700
1701 case NT_ENCLOSE:
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1703 break;
1704
1705 case NT_ANCHOR:
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1707 break;
1708
1709 default:
1710 return ONIGERR_TYPE_BUG;
1711 break;
1712 }
1713
1714 return r;
1715}
1716
1717static int
1718compile_tree(Node* node, regex_t* reg)
1719{
1720 int n, type, len, pos, r = 0;
1721
1722 type = NTYPE(node);
1723 switch (type) {
1724 case NT_LIST:
1725 do {
1726 r = compile_tree(NCAR(node), reg);
1727 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1728 break;
1729
1730 case NT_ALT:
1731 {
1732 Node* x = node;
1733 len = 0;
1734 do {
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1738 }
1739 } while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used + len; /* goal position */
1741
1742 do {
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1746 if (r) break;
1747 }
1748 r = compile_tree(NCAR(node), reg);
1749 if (r) break;
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1753 if (r) break;
1754 }
1755 } while (IS_NOT_NULL(node = NCDR(node)));
1756 }
1757 break;
1758
1759 case NT_STR:
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1762 else
1763 r = compile_string_node(node, reg);
1764 break;
1765
1766 case NT_CCLASS:
1767 r = compile_cclass_node(NCCLASS(node), reg);
1768 break;
1769
1770 case NT_CTYPE:
1771 {
1772 int op;
1773
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1779 }
1780 else {
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1782 else op = OP_WORD;
1783 }
1784 break;
1785 default:
1786 return ONIGERR_TYPE_BUG;
1787 break;
1788 }
1789 r = add_opcode(reg, op);
1790 }
1791 break;
1792
1793 case NT_CANY:
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1796 else
1797 r = add_opcode(reg, OP_ANYCHAR);
1798 break;
1799
1800 case NT_BREF:
1801 {
1802 BRefNode* br = NBREF(node);
1803
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1807 if (r) return r;
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1809 if (r) return r;
1810 r = add_length(reg, br->nest_level);
1811 if (r) return r;
1812
1813 goto add_bacref_mems;
1814 }
1815 else
1816#endif
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1821 if (r) return r;
1822 r = add_mem_num(reg, n);
1823 }
1824 else {
1825 switch (n) {
1826 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1828 default:
1829 r = add_opcode(reg, OP_BACKREFN);
1830 if (r) return r;
1831 r = add_mem_num(reg, n);
1832 break;
1833 }
1834 }
1835 }
1836 else {
1837 int i;
1838 int* p;
1839
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1842 }
1843 else {
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1845 }
1846 if (r) return r;
1847
1848#ifdef USE_BACKREF_WITH_LEVEL
1849 add_bacref_mems:
1850#endif
1851 r = add_length(reg, br->back_num);
1852 if (r) return r;
1853 p = BACKREFS_P(br);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1856 if (r) return r;
1857 }
1858 }
1859 }
1860 break;
1861
1862#ifdef USE_SUBEXP_CALL
1863 case NT_CALL:
1864 r = compile_call(NCALL(node), reg);
1865 break;
1866#endif
1867
1868 case NT_QTFR:
1869 r = compile_quantifier_node(NQTFR(node), reg);
1870 break;
1871
1872 case NT_ENCLOSE:
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1874 break;
1875
1876 case NT_ANCHOR:
1877 r = compile_anchor_node(NANCHOR(node), reg);
1878 break;
1879
1880 default:
1881#ifdef ONIG_DEBUG
1882 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1883#endif
1884 break;
1885 }
1886
1887 return r;
1888}
1889
1890#ifdef USE_NAMED_GROUP
1891
1892static int
1893noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1894{
1895 int r = 0;
1896 Node* node = *plink;
1897
1898 switch (NTYPE(node)) {
1899 case NT_LIST:
1900 case NT_ALT:
1901 do {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1904 break;
1905
1906 case NT_QTFR:
1907 {
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1913 }
1914 }
1915 break;
1916
1917 case NT_ENCLOSE:
1918 {
1919 EncloseNode* en = NENCLOSE(node);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1922 (*counter)++;
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1925 }
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1931 break;
1932 }
1933 }
1934 r = noname_disable_map(&(en->target), map, counter);
1935 }
1936 break;
1937
1938 case NT_ANCHOR:
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1941 break;
1942
1943 default:
1944 break;
1945 }
1946
1947 return r;
1948}
1949
1950static int
1951renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1952{
1953 int i, pos, n, old_num;
1954 int *backs;
1955 BRefNode* bn = NBREF(node);
1956
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1959
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1963 else
1964 backs = bn->back_dynamic;
1965
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1969 if (n > 0) {
1970 backs[pos] = n;
1971 pos++;
1972 }
1973 }
1974
1975 bn->back_num = pos;
1976 return 0;
1977}
1978
1979static int
1980renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1981{
1982 int r = 0;
1983
1984 switch (NTYPE(node)) {
1985 case NT_LIST:
1986 case NT_ALT:
1987 do {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990 break;
1991 case NT_QTFR:
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1993 break;
1994 case NT_ENCLOSE:
1995 {
1996 EncloseNode* en = NENCLOSE(node);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2000 }
2001 r = renumber_by_map(en->target, map, num_mem);
2002 }
2003 break;
2004
2005 case NT_BREF:
2006 r = renumber_node_backref(node, map, num_mem);
2007 break;
2008
2009 case NT_ANCHOR:
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2012 break;
2013
2014 default:
2015 break;
2016 }
2017
2018 return r;
2019}
2020
2021static int
2022numbered_ref_check(Node* node)
2023{
2024 int r = 0;
2025
2026 switch (NTYPE(node)) {
2027 case NT_LIST:
2028 case NT_ALT:
2029 do {
2030 r = numbered_ref_check(NCAR(node));
2031 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2032 break;
2033 case NT_QTFR:
2034 r = numbered_ref_check(NQTFR(node)->target);
2035 break;
2036 case NT_ENCLOSE:
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2038 break;
2039
2040 case NT_BREF:
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2043 break;
2044
2045 case NT_ANCHOR:
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2048 break;
2049
2050 default:
2051 break;
2052 }
2053
2054 return r;
2055}
2056
2057static int
2058disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2059{
2060 int r, i, pos, counter;
2061 BitStatusType loc;
2062 GroupNumRemap* map;
2063
2064 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2067 map[i].new_val = 0;
2068 }
2069 counter = 0;
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0) return r;
2072
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0) return r;
2075
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2079 pos++;
2080 }
2081 }
2082
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2088 }
2089 }
2090
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2093
2094 return onig_renumber_name_table(reg, map);
2095}
2096#endif /* USE_NAMED_GROUP */
2097
2098#ifdef USE_SUBEXP_CALL
2099static int
2100unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2101{
2102 int i, offset;
2103 EncloseNode* en;
2104 AbsAddrType addr;
2105
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2111
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2113 }
2114 return 0;
2115}
2116#endif
2117
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2119static int
2120quantifiers_memory_node_info(Node* node)
2121{
2122 int r = 0;
2123
2124 switch (NTYPE(node)) {
2125 case NT_LIST:
2126 case NT_ALT:
2127 {
2128 int v;
2129 do {
2130 v = quantifiers_memory_node_info(NCAR(node));
2131 if (v > r) r = v;
2132 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2133 }
2134 break;
2135
2136# ifdef USE_SUBEXP_CALL
2137 case NT_CALL:
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2140 }
2141 else
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2143 break;
2144# endif
2145
2146 case NT_QTFR:
2147 {
2148 QtfrNode* qn = NQTFR(node);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2151 }
2152 }
2153 break;
2154
2155 case NT_ENCLOSE:
2156 {
2157 EncloseNode* en = NENCLOSE(node);
2158 switch (en->type) {
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2161 break;
2162
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2168 break;
2169 default:
2170 break;
2171 }
2172 }
2173 break;
2174
2175 case NT_BREF:
2176 case NT_STR:
2177 case NT_CTYPE:
2178 case NT_CCLASS:
2179 case NT_CANY:
2180 case NT_ANCHOR:
2181 default:
2182 break;
2183 }
2184
2185 return r;
2186}
2187#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2188
2189static int
2190get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2191{
2192 OnigDistance tmin;
2193 int r = 0;
2194
2195 *min = 0;
2196 switch (NTYPE(node)) {
2197 case NT_BREF:
2198 {
2199 int i;
2200 int* backs;
2201 Node** nodes = SCANENV_MEM_NODES(env);
2202 BRefNode* br = NBREF(node);
2203 if (br->state & NST_RECURSION) break;
2204
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2208 if (r != 0) break;
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2212 if (r != 0) break;
2213 if (*min > tmin) *min = tmin;
2214 }
2215 }
2216 break;
2217
2218#ifdef USE_SUBEXP_CALL
2219 case NT_CALL:
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2221 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2223 *min = en->min_len;
2224 }
2225 else
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2227 break;
2228#endif
2229
2230 case NT_LIST:
2231 do {
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2235 break;
2236
2237 case NT_ALT:
2238 {
2239 Node *x, *y;
2240 y = node;
2241 do {
2242 x = NCAR(y);
2243 r = get_min_match_length(x, &tmin, env);
2244 if (r != 0) break;
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2248 }
2249 break;
2250
2251 case NT_STR:
2252 {
2253 StrNode* sn = NSTR(node);
2254 *min = sn->end - sn->s;
2255 }
2256 break;
2257
2258 case NT_CTYPE:
2259 *min = 1;
2260 break;
2261
2262 case NT_CCLASS:
2263 case NT_CANY:
2264 *min = 1;
2265 break;
2266
2267 case NT_QTFR:
2268 {
2269 QtfrNode* qn = NQTFR(node);
2270
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2273 if (r == 0)
2274 *min = distance_multiply(*min, qn->lower);
2275 }
2276 }
2277 break;
2278
2279 case NT_ENCLOSE:
2280 {
2281 EncloseNode* en = NENCLOSE(node);
2282 switch (en->type) {
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2285 *min = en->min_len;
2286 else {
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2288 *min = 0; /* recursive */
2289 else {
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2293 if (r == 0) {
2294 en->min_len = *min;
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2296 }
2297 }
2298 }
2299 break;
2300
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2305 break;
2306
2307 case ENCLOSE_ABSENT:
2308 break;
2309 }
2310 }
2311 break;
2312
2313 case NT_ANCHOR:
2314 default:
2315 break;
2316 }
2317
2318 return r;
2319}
2320
2321static int
2322get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2323{
2324 OnigDistance tmax;
2325 int r = 0;
2326
2327 *max = 0;
2328 switch (NTYPE(node)) {
2329 case NT_LIST:
2330 do {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2332 if (r == 0)
2333 *max = distance_add(*max, tmax);
2334 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2335 break;
2336
2337 case NT_ALT:
2338 do {
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2342 break;
2343
2344 case NT_STR:
2345 {
2346 StrNode* sn = NSTR(node);
2347 *max = sn->end - sn->s;
2348 }
2349 break;
2350
2351 case NT_CTYPE:
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2353 break;
2354
2355 case NT_CCLASS:
2356 case NT_CANY:
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2358 break;
2359
2360 case NT_BREF:
2361 {
2362 int i;
2363 int* backs;
2364 Node** nodes = SCANENV_MEM_NODES(env);
2365 BRefNode* br = NBREF(node);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2368 break;
2369 }
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2374 if (r != 0) break;
2375 if (*max < tmax) *max = tmax;
2376 }
2377 }
2378 break;
2379
2380#ifdef USE_SUBEXP_CALL
2381 case NT_CALL:
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2384 else
2385 *max = ONIG_INFINITE_DISTANCE;
2386 break;
2387#endif
2388
2389 case NT_QTFR:
2390 {
2391 QtfrNode* qn = NQTFR(node);
2392
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2398 else
2399 *max = ONIG_INFINITE_DISTANCE;
2400 }
2401 }
2402 }
2403 break;
2404
2405 case NT_ENCLOSE:
2406 {
2407 EncloseNode* en = NENCLOSE(node);
2408 switch (en->type) {
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2411 *max = en->max_len;
2412 else {
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2415 else {
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2419 if (r == 0) {
2420 en->max_len = *max;
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2422 }
2423 }
2424 }
2425 break;
2426
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2431 break;
2432
2433 case ENCLOSE_ABSENT:
2434 break;
2435 }
2436 }
2437 break;
2438
2439 case NT_ANCHOR:
2440 default:
2441 break;
2442 }
2443
2444 return r;
2445}
2446
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2449
2450/* fixed size pattern node only */
2451static int
2452get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2453{
2454 int tlen;
2455 int r = 0;
2456
2457 level++;
2458 *len = 0;
2459 switch (NTYPE(node)) {
2460 case NT_LIST:
2461 do {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2463 if (r == 0)
2464 *len = (int )distance_add(*len, tlen);
2465 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2466 break;
2467
2468 case NT_ALT:
2469 {
2470 int tlen2;
2471 int varlen = 0;
2472
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2476 if (r == 0) {
2477 if (tlen != tlen2)
2478 varlen = 1;
2479 }
2480 }
2481 if (r == 0) {
2482 if (varlen != 0) {
2483 if (level == 1)
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 }
2488 else
2489 *len = tlen;
2490 }
2491 }
2492 break;
2493
2494 case NT_STR:
2495 {
2496 StrNode* sn = NSTR(node);
2497 UChar *s = sn->s;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2500 (*len)++;
2501 }
2502 }
2503 break;
2504
2505 case NT_QTFR:
2506 {
2507 QtfrNode* qn = NQTFR(node);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2510 if (r == 0)
2511 *len = (int )distance_multiply(tlen, qn->lower);
2512 }
2513 else
2514 r = GET_CHAR_LEN_VARLEN;
2515 }
2516 break;
2517
2518#ifdef USE_SUBEXP_CALL
2519 case NT_CALL:
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2522 else
2523 r = GET_CHAR_LEN_VARLEN;
2524 break;
2525#endif
2526
2527 case NT_CTYPE:
2528 *len = 1;
2529 break;
2530
2531 case NT_CCLASS:
2532 case NT_CANY:
2533 *len = 1;
2534 break;
2535
2536 case NT_ENCLOSE:
2537 {
2538 EncloseNode* en = NENCLOSE(node);
2539 switch (en->type) {
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *len = en->char_len;
2544 else {
2545 r = get_char_length_tree1(en->target, reg, len, level);
2546 if (r == 0) {
2547 en->char_len = *len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2549 }
2550 }
2551 break;
2552#endif
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg, len, level);
2557 break;
2558 case ENCLOSE_ABSENT:
2559 default:
2560 break;
2561 }
2562 }
2563 break;
2564
2565 case NT_ANCHOR:
2566 break;
2567
2568 default:
2569 r = GET_CHAR_LEN_VARLEN;
2570 break;
2571 }
2572
2573 return r;
2574}
2575
2576static int
2577get_char_length_tree(Node* node, regex_t* reg, int* len)
2578{
2579 return get_char_length_tree1(node, reg, len, 0);
2580}
2581
2582/* x is not included y ==> 1 : 0 */
2583static int
2584is_not_included(Node* x, Node* y, regex_t* reg)
2585{
2586 int i;
2587 OnigDistance len;
2588 OnigCodePoint code;
2589 UChar *p;
2590 int ytype;
2591
2592 retry:
2593 ytype = NTYPE(y);
2594 switch (NTYPE(x)) {
2595 case NT_CTYPE:
2596 {
2597 switch (ytype) {
2598 case NT_CTYPE:
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2602 return 1;
2603 else
2604 return 0;
2605 break;
2606
2607 case NT_CCLASS:
2608 swap:
2609 {
2610 Node* tmp;
2611 tmp = x; x = y; y = tmp;
2612 goto retry;
2613 }
2614 break;
2615
2616 case NT_STR:
2617 goto swap;
2618 break;
2619
2620 default:
2621 break;
2622 }
2623 }
2624 break;
2625
2626 case NT_CCLASS:
2627 {
2628 CClassNode* xc = NCCLASS(x);
2629 switch (ytype) {
2630 case NT_CTYPE:
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2639 }
2640 else {
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2642 }
2643 }
2644 }
2645 return 1;
2646 }
2647 return 0;
2648 }
2649 else {
2650 if (IS_NOT_NULL(xc->mbuf)) return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2652 int is_word;
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2655 else
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2657 if (! is_word) {
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2660 return 0;
2661 }
2662 else {
2663 if (! BITSET_AT(xc->bs, i))
2664 return 0;
2665 }
2666 }
2667 }
2668 return 1;
2669 }
2670 break;
2671
2672 default:
2673 break;
2674 }
2675 break;
2676
2677 case NT_CCLASS:
2678 {
2679 int v;
2680 CClassNode* yc = NCCLASS(y);
2681
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2689 return 0;
2690 }
2691 }
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2694 return 1;
2695 return 0;
2696 }
2697 break;
2698
2699 case NT_STR:
2700 goto swap;
2701 break;
2702
2703 default:
2704 break;
2705 }
2706 }
2707 break;
2708
2709 case NT_STR:
2710 {
2711 StrNode* xs = NSTR(x);
2712 if (NSTRING_LEN(x) == 0)
2713 break;
2714
2715 switch (ytype) {
2716 case NT_CTYPE:
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2722 else
2723 return !(NCTYPE(y)->not);
2724 }
2725 else {
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2728 else
2729 return !(NCTYPE(y)->not);
2730 }
2731 break;
2732 default:
2733 break;
2734 }
2735 break;
2736
2737 case NT_CCLASS:
2738 {
2739 CClassNode* cc = NCCLASS(y);
2740
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2744 }
2745 break;
2746
2747 case NT_STR:
2748 {
2749 UChar *q;
2750 StrNode* ys = NSTR(y);
2751 len = NSTRING_LEN(x);
2752 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2754 /* tiny version */
2755 return 0;
2756 }
2757 else {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2759 if (*p != *q) return 1;
2760 }
2761 }
2762 }
2763 break;
2764
2765 default:
2766 break;
2767 }
2768 }
2769 break;
2770
2771 default:
2772 break;
2773 }
2774
2775 return 0;
2776}
2777
2778static Node*
2779get_head_value_node(Node* node, int exact, regex_t* reg)
2780{
2781 Node* n = NULL_NODE;
2782
2783 switch (NTYPE(node)) {
2784 case NT_BREF:
2785 case NT_ALT:
2786 case NT_CANY:
2787#ifdef USE_SUBEXP_CALL
2788 case NT_CALL:
2789#endif
2790 break;
2791
2792 case NT_CTYPE:
2793 case NT_CCLASS:
2794 if (exact == 0) {
2795 n = node;
2796 }
2797 break;
2798
2799 case NT_LIST:
2800 n = get_head_value_node(NCAR(node), exact, reg);
2801 break;
2802
2803 case NT_STR:
2804 {
2805 StrNode* sn = NSTR(node);
2806
2807 if (sn->end <= sn->s)
2808 break;
2809
2810 if (exact != 0 &&
2811 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2812 }
2813 else {
2814 n = node;
2815 }
2816 }
2817 break;
2818
2819 case NT_QTFR:
2820 {
2821 QtfrNode* qn = NQTFR(node);
2822 if (qn->lower > 0) {
2823#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn->head_exact))
2825 n = qn->head_exact;
2826 else
2827#endif
2828 n = get_head_value_node(qn->target, exact, reg);
2829 }
2830 }
2831 break;
2832
2833 case NT_ENCLOSE:
2834 {
2835 EncloseNode* en = NENCLOSE(node);
2836 switch (en->type) {
2837 case ENCLOSE_OPTION:
2838 {
2839 OnigOptionType options = reg->options;
2840
2841 reg->options = NENCLOSE(node)->option;
2842 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2843 reg->options = options;
2844 }
2845 break;
2846
2847 case ENCLOSE_MEMORY:
2848 case ENCLOSE_STOP_BACKTRACK:
2849 case ENCLOSE_CONDITION:
2850 n = get_head_value_node(en->target, exact, reg);
2851 break;
2852
2853 case ENCLOSE_ABSENT:
2854 break;
2855 }
2856 }
2857 break;
2858
2859 case NT_ANCHOR:
2860 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2861 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2862 break;
2863
2864 default:
2865 break;
2866 }
2867
2868 return n;
2869}
2870
2871static int
2872check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2873{
2874 int type, r = 0;
2875
2876 type = NTYPE(node);
2877 if ((NTYPE2BIT(type) & type_mask) == 0)
2878 return 1;
2879
2880 switch (type) {
2881 case NT_LIST:
2882 case NT_ALT:
2883 do {
2884 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2885 anchor_mask);
2886 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2887 break;
2888
2889 case NT_QTFR:
2890 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2891 anchor_mask);
2892 break;
2893
2894 case NT_ENCLOSE:
2895 {
2896 EncloseNode* en = NENCLOSE(node);
2897 if ((en->type & enclose_mask) == 0)
2898 return 1;
2899
2900 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2901 }
2902 break;
2903
2904 case NT_ANCHOR:
2905 type = NANCHOR(node)->type;
2906 if ((type & anchor_mask) == 0)
2907 return 1;
2908
2909 if (NANCHOR(node)->target)
2910 r = check_type_tree(NANCHOR(node)->target,
2911 type_mask, enclose_mask, anchor_mask);
2912 break;
2913
2914 default:
2915 break;
2916 }
2917 return r;
2918}
2919
2920#ifdef USE_SUBEXP_CALL
2921
2922# define RECURSION_EXIST 1
2923# define RECURSION_INFINITE 2
2924
2925static int
2926subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2927{
2928 int type;
2929 int r = 0;
2930
2931 type = NTYPE(node);
2932 switch (type) {
2933 case NT_LIST:
2934 {
2935 Node *x;
2936 OnigDistance min;
2937 int ret;
2938
2939 x = node;
2940 do {
2941 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2942 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2943 r |= ret;
2944 if (head) {
2945 ret = get_min_match_length(NCAR(x), &min, env);
2946 if (ret != 0) return ret;
2947 if (min != 0) head = 0;
2948 }
2949 } while (IS_NOT_NULL(x = NCDR(x)));
2950 }
2951 break;
2952
2953 case NT_ALT:
2954 {
2955 int ret;
2956 r = RECURSION_EXIST;
2957 do {
2958 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2959 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2960 r &= ret;
2961 } while (IS_NOT_NULL(node = NCDR(node)));
2962 }
2963 break;
2964
2965 case NT_QTFR:
2966 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2967 if (r == RECURSION_EXIST) {
2968 if (NQTFR(node)->lower == 0) r = 0;
2969 }
2970 break;
2971
2972 case NT_ANCHOR:
2973 {
2974 AnchorNode* an = NANCHOR(node);
2975 switch (an->type) {
2976 case ANCHOR_PREC_READ:
2977 case ANCHOR_PREC_READ_NOT:
2978 case ANCHOR_LOOK_BEHIND:
2979 case ANCHOR_LOOK_BEHIND_NOT:
2980 r = subexp_inf_recursive_check(an->target, env, head);
2981 break;
2982 }
2983 }
2984 break;
2985
2986 case NT_CALL:
2987 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2988 break;
2989
2990 case NT_ENCLOSE:
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2992 return 0;
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2994 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2995 else {
2996 SET_ENCLOSE_STATUS(node, NST_MARK2);
2997 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2998 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2999 }
3000 break;
3001
3002 default:
3003 break;
3004 }
3005
3006 return r;
3007}
3008
3009static int
3010subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
3011{
3012 int type;
3013 int r = 0;
3014
3015 type = NTYPE(node);
3016 switch (type) {
3017 case NT_LIST:
3018 case NT_ALT:
3019 do {
3020 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3021 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3022 break;
3023
3024 case NT_QTFR:
3025 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3026 break;
3027
3028 case NT_ANCHOR:
3029 {
3030 AnchorNode* an = NANCHOR(node);
3031 switch (an->type) {
3032 case ANCHOR_PREC_READ:
3033 case ANCHOR_PREC_READ_NOT:
3034 case ANCHOR_LOOK_BEHIND:
3035 case ANCHOR_LOOK_BEHIND_NOT:
3036 r = subexp_inf_recursive_check_trav(an->target, env);
3037 break;
3038 }
3039 }
3040 break;
3041
3042 case NT_ENCLOSE:
3043 {
3044 EncloseNode* en = NENCLOSE(node);
3045
3046 if (IS_ENCLOSE_RECURSION(en)) {
3047 SET_ENCLOSE_STATUS(node, NST_MARK1);
3048 r = subexp_inf_recursive_check(en->target, env, 1);
3049 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3051 }
3052 r = subexp_inf_recursive_check_trav(en->target, env);
3053 }
3054
3055 break;
3056
3057 default:
3058 break;
3059 }
3060
3061 return r;
3062}
3063
3064static int
3065subexp_recursive_check(Node* node)
3066{
3067 int r = 0;
3068
3069 switch (NTYPE(node)) {
3070 case NT_LIST:
3071 case NT_ALT:
3072 do {
3073 r |= subexp_recursive_check(NCAR(node));
3074 } while (IS_NOT_NULL(node = NCDR(node)));
3075 break;
3076
3077 case NT_QTFR:
3078 r = subexp_recursive_check(NQTFR(node)->target);
3079 break;
3080
3081 case NT_ANCHOR:
3082 {
3083 AnchorNode* an = NANCHOR(node);
3084 switch (an->type) {
3085 case ANCHOR_PREC_READ:
3086 case ANCHOR_PREC_READ_NOT:
3087 case ANCHOR_LOOK_BEHIND:
3088 case ANCHOR_LOOK_BEHIND_NOT:
3089 r = subexp_recursive_check(an->target);
3090 break;
3091 }
3092 }
3093 break;
3094
3095 case NT_CALL:
3096 r = subexp_recursive_check(NCALL(node)->target);
3097 if (r != 0) SET_CALL_RECURSION(node);
3098 break;
3099
3100 case NT_ENCLOSE:
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3102 return 0;
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3104 return 1; /* recursion */
3105 else {
3106 SET_ENCLOSE_STATUS(node, NST_MARK2);
3107 r = subexp_recursive_check(NENCLOSE(node)->target);
3108 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3109 }
3110 break;
3111
3112 default:
3113 break;
3114 }
3115
3116 return r;
3117}
3118
3119
3120static int
3121subexp_recursive_check_trav(Node* node, ScanEnv* env)
3122{
3123# define FOUND_CALLED_NODE 1
3124
3125 int type;
3126 int r = 0;
3127
3128 type = NTYPE(node);
3129 switch (type) {
3130 case NT_LIST:
3131 case NT_ALT:
3132 {
3133 int ret;
3134 do {
3135 ret = subexp_recursive_check_trav(NCAR(node), env);
3136 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3137 else if (ret < 0) return ret;
3138 } while (IS_NOT_NULL(node = NCDR(node)));
3139 }
3140 break;
3141
3142 case NT_QTFR:
3143 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3144 if (NQTFR(node)->upper == 0) {
3145 if (r == FOUND_CALLED_NODE)
3146 NQTFR(node)->is_referred = 1;
3147 }
3148 break;
3149
3150 case NT_ANCHOR:
3151 {
3152 AnchorNode* an = NANCHOR(node);
3153 switch (an->type) {
3154 case ANCHOR_PREC_READ:
3155 case ANCHOR_PREC_READ_NOT:
3156 case ANCHOR_LOOK_BEHIND:
3157 case ANCHOR_LOOK_BEHIND_NOT:
3158 r = subexp_recursive_check_trav(an->target, env);
3159 break;
3160 }
3161 }
3162 break;
3163
3164 case NT_ENCLOSE:
3165 {
3166 EncloseNode* en = NENCLOSE(node);
3167
3168 if (! IS_ENCLOSE_RECURSION(en)) {
3169 if (IS_ENCLOSE_CALLED(en)) {
3170 SET_ENCLOSE_STATUS(node, NST_MARK1);
3171 r = subexp_recursive_check(en->target);
3172 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3173 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3174 }
3175 }
3176 r = subexp_recursive_check_trav(en->target, env);
3177 if (IS_ENCLOSE_CALLED(en))
3178 r |= FOUND_CALLED_NODE;
3179 }
3180 break;
3181
3182 default:
3183 break;
3184 }
3185
3186 return r;
3187}
3188
3189static int
3190setup_subexp_call(Node* node, ScanEnv* env)
3191{
3192 int type;
3193 int r = 0;
3194
3195 type = NTYPE(node);
3196 switch (type) {
3197 case NT_LIST:
3198 do {
3199 r = setup_subexp_call(NCAR(node), env);
3200 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3201 break;
3202
3203 case NT_ALT:
3204 do {
3205 r = setup_subexp_call(NCAR(node), env);
3206 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3207 break;
3208
3209 case NT_QTFR:
3210 r = setup_subexp_call(NQTFR(node)->target, env);
3211 break;
3212 case NT_ENCLOSE:
3213 r = setup_subexp_call(NENCLOSE(node)->target, env);
3214 break;
3215
3216 case NT_CALL:
3217 {
3218 CallNode* cn = NCALL(node);
3219 Node** nodes = SCANENV_MEM_NODES(env);
3220
3221 if (cn->group_num != 0) {
3222 int gnum = cn->group_num;
3223
3224# ifdef USE_NAMED_GROUP
3225 if (env->num_named > 0 &&
3226 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3227 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3229 }
3230# endif
3231 if (gnum > env->num_mem) {
3232 onig_scan_env_set_error_string(env,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3235 }
3236
3237# ifdef USE_NAMED_GROUP
3238 set_call_attr:
3239# endif
3240 cn->target = nodes[cn->group_num];
3241 if (IS_NULL(cn->target)) {
3242 onig_scan_env_set_error_string(env,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3245 }
3246 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3247 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3248 cn->unset_addr_list = env->unset_addr_list;
3249 }
3250# ifdef USE_NAMED_GROUP
3251# ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn->name == cn->name_end) {
3253 goto set_call_attr;
3254 }
3255# endif
3256 else {
3257 int *refs;
3258
3259 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3260 &refs);
3261 if (n <= 0) {
3262 onig_scan_env_set_error_string(env,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3265 }
3266 else if (n > 1 &&
3267 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3268 onig_scan_env_set_error_string(env,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3271 }
3272 else {
3273 cn->group_num = refs[0];
3274 goto set_call_attr;
3275 }
3276 }
3277# endif
3278 }
3279 break;
3280
3281 case NT_ANCHOR:
3282 {
3283 AnchorNode* an = NANCHOR(node);
3284
3285 switch (an->type) {
3286 case ANCHOR_PREC_READ:
3287 case ANCHOR_PREC_READ_NOT:
3288 case ANCHOR_LOOK_BEHIND:
3289 case ANCHOR_LOOK_BEHIND_NOT:
3290 r = setup_subexp_call(an->target, env);
3291 break;
3292 }
3293 }
3294 break;
3295
3296 default:
3297 break;
3298 }
3299
3300 return r;
3301}
3302#endif
3303
3304/* divide different length alternatives in look-behind.
3305 (?<=A|B) ==> (?<=A)|(?<=B)
3306 (?<!A|B) ==> (?<!A)(?<!B)
3307*/
3308static int
3309divide_look_behind_alternatives(Node* node)
3310{
3311 Node *head, *np, *insert_node;
3312 AnchorNode* an = NANCHOR(node);
3313 int anc_type = an->type;
3314
3315 head = an->target;
3316 np = NCAR(head);
3317 swap_node(node, head);
3318 NCAR(node) = head;
3319 NANCHOR(head)->target = np;
3320
3321 np = node;
3322 while ((np = NCDR(np)) != NULL_NODE) {
3323 insert_node = onig_node_new_anchor(anc_type);
3324 CHECK_NULL_RETURN_MEMERR(insert_node);
3325 NANCHOR(insert_node)->target = NCAR(np);
3326 NCAR(np) = insert_node;
3327 }
3328
3329 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3330 np = node;
3331 do {
3332 SET_NTYPE(np, NT_LIST); /* alt -> list */
3333 } while ((np = NCDR(np)) != NULL_NODE);
3334 }
3335 return 0;
3336}
3337
3338static int
3339setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3340{
3341 int r, len;
3342 AnchorNode* an = NANCHOR(node);
3343
3344 r = get_char_length_tree(an->target, reg, &len);
3345 if (r == 0)
3346 an->char_len = len;
3347 else if (r == GET_CHAR_LEN_VARLEN)
3348 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3349 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3350 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3351 r = divide_look_behind_alternatives(node);
3352 else
3353 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3354 }
3355
3356 return r;
3357}
3358
3359static int
3360next_setup(Node* node, Node* next_node, regex_t* reg)
3361{
3362 int type;
3363
3364 retry:
3365 type = NTYPE(node);
3366 if (type == NT_QTFR) {
3367 QtfrNode* qn = NQTFR(node);
3368 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3369#ifdef USE_QTFR_PEEK_NEXT
3370 Node* n = get_head_value_node(next_node, 1, reg);
3371 /* '\0': for UTF-16BE etc... */
3372 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3373 qn->next_head_exact = n;
3374 }
3375#endif
3376 /* automatic possessification a*b ==> (?>a*)b */
3377 if (qn->lower <= 1) {
3378 int ttype = NTYPE(qn->target);
3379 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3380 Node *x, *y;
3381 x = get_head_value_node(qn->target, 0, reg);
3382 if (IS_NOT_NULL(x)) {
3383 y = get_head_value_node(next_node, 0, reg);
3384 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3385 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3386 CHECK_NULL_RETURN_MEMERR(en);
3387 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3388 swap_node(node, en);
3389 NENCLOSE(node)->target = en;
3390 }
3391 }
3392 }
3393 }
3394 }
3395 }
3396 else if (type == NT_ENCLOSE) {
3397 EncloseNode* en = NENCLOSE(node);
3398 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3399 node = en->target;
3400 goto retry;
3401 }
3402 }
3403 return 0;
3404}
3405
3406
3407static int
3408update_string_node_case_fold(regex_t* reg, Node *node)
3409{
3410 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3411 UChar *sbuf, *ebuf, *sp;
3412 int r, i, len;
3413 OnigDistance sbuf_size;
3414 StrNode* sn = NSTR(node);
3415
3416 end = sn->end;
3417 sbuf_size = (end - sn->s) * 2;
3418 sbuf = (UChar* )xmalloc(sbuf_size);
3419 CHECK_NULL_RETURN_MEMERR(sbuf);
3420 ebuf = sbuf + sbuf_size;
3421
3422 sp = sbuf;
3423 p = sn->s;
3424 while (p < end) {
3425 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3426 for (i = 0; i < len; i++) {
3427 if (sp >= ebuf) {
3428 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3429 if (IS_NULL(p)) {
3430 xfree(sbuf);
3431 return ONIGERR_MEMORY;
3432 }
3433 sbuf = p;
3434 sp = sbuf + sbuf_size;
3435 sbuf_size *= 2;
3436 ebuf = sbuf + sbuf_size;
3437 }
3438
3439 *sp++ = buf[i];
3440 }
3441 }
3442
3443 r = onig_node_str_set(node, sbuf, sp);
3444
3445 xfree(sbuf);
3446 return r;
3447}
3448
3449static int
3450expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3451 regex_t* reg)
3452{
3453 int r;
3454 Node *node;
3455
3456 node = onig_node_new_str(s, end);
3457 if (IS_NULL(node)) return ONIGERR_MEMORY;
3458
3459 r = update_string_node_case_fold(reg, node);
3460 if (r != 0) {
3461 onig_node_free(node);
3462 return r;
3463 }
3464
3465 NSTRING_SET_AMBIG(node);
3466 NSTRING_SET_DONT_GET_OPT_INFO(node);
3467 *rnode = node;
3468 return 0;
3469}
3470
3471static int
3472is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3473 int slen)
3474{
3475 int i;
3476
3477 for (i = 0; i < item_num; i++) {
3478 if (items[i].byte_len != slen) {
3479 return 1;
3480 }
3481 if (items[i].code_len != 1) {
3482 return 1;
3483 }
3484 }
3485 return 0;
3486}
3487
3488static int
3489expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3490 UChar *p, int slen, UChar *end,
3491 regex_t* reg, Node **rnode)
3492{
3493 int r, i, j, len, varlen;
3494 Node *anode, *var_anode, *snode, *xnode, *an;
3495 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3496
3497 *rnode = var_anode = NULL_NODE;
3498
3499 varlen = 0;
3500 for (i = 0; i < item_num; i++) {
3501 if (items[i].byte_len != slen) {
3502 varlen = 1;
3503 break;
3504 }
3505 }
3506
3507 if (varlen != 0) {
3508 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3509 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3510
3511 xnode = onig_node_new_list(NULL, NULL);
3512 if (IS_NULL(xnode)) goto mem_err;
3513 NCAR(var_anode) = xnode;
3514
3515 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3516 if (IS_NULL(anode)) goto mem_err;
3517 NCAR(xnode) = anode;
3518 }
3519 else {
3520 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3521 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3522 }
3523
3524 snode = onig_node_new_str(p, p + slen);
3525 if (IS_NULL(snode)) goto mem_err;
3526
3527 NCAR(anode) = snode;
3528
3529 for (i = 0; i < item_num; i++) {
3530 snode = onig_node_new_str(NULL, NULL);
3531 if (IS_NULL(snode)) goto mem_err;
3532
3533 for (j = 0; j < items[i].code_len; j++) {
3534 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3535 if (len < 0) {
3536 r = len;
3537 goto mem_err2;
3538 }
3539
3540 r = onig_node_str_cat(snode, buf, buf + len);
3541 if (r != 0) goto mem_err2;
3542 }
3543
3544 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3545 if (IS_NULL(an)) {
3546 goto mem_err2;
3547 }
3548
3549 if (items[i].byte_len != slen) {
3550 Node *rem;
3551 UChar *q = p + items[i].byte_len;
3552
3553 if (q < end) {
3554 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3555 if (r != 0) {
3556 onig_node_free(an);
3557 goto mem_err2;
3558 }
3559
3560 xnode = onig_node_list_add(NULL_NODE, snode);
3561 if (IS_NULL(xnode)) {
3562 onig_node_free(an);
3563 onig_node_free(rem);
3564 goto mem_err2;
3565 }
3566 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3567 onig_node_free(an);
3568 onig_node_free(xnode);
3569 onig_node_free(rem);
3570 goto mem_err;
3571 }
3572
3573 NCAR(an) = xnode;
3574 }
3575 else {
3576 NCAR(an) = snode;
3577 }
3578
3579 NCDR(var_anode) = an;
3580 var_anode = an;
3581 }
3582 else {
3583 NCAR(an) = snode;
3584 NCDR(anode) = an;
3585 anode = an;
3586 }
3587 }
3588
3589 return varlen;
3590
3591 mem_err2:
3592 onig_node_free(snode);
3593
3594 mem_err:
3595 onig_node_free(*rnode);
3596
3597 return ONIGERR_MEMORY;
3598}
3599
3600static int
3601expand_case_fold_string(Node* node, regex_t* reg)
3602{
3603#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3604
3605 int r, n, len, alt_num;
3606 int varlen = 0;
3607 UChar *start, *end, *p;
3608 Node *top_root, *root, *snode, *prev_node;
3609 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3610 StrNode* sn = NSTR(node);
3611
3612 if (NSTRING_IS_AMBIG(node)) return 0;
3613
3614 start = sn->s;
3615 end = sn->end;
3616 if (start >= end) return 0;
3617
3618 r = 0;
3619 top_root = root = prev_node = snode = NULL_NODE;
3620 alt_num = 1;
3621 p = start;
3622 while (p < end) {
3623 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3624 p, end, items);
3625 if (n < 0) {
3626 r = n;
3627 goto err;
3628 }
3629
3630 len = enclen(reg->enc, p, end);
3631
3632 varlen = is_case_fold_variable_len(n, items, len);
3633 if (n == 0 || varlen == 0) {
3634 if (IS_NULL(snode)) {
3635 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3636 onig_node_free(top_root);
3637 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3638 if (IS_NULL(root)) {
3639 onig_node_free(prev_node);
3640 goto mem_err;
3641 }
3642 }
3643
3644 prev_node = snode = onig_node_new_str(NULL, NULL);
3645 if (IS_NULL(snode)) goto mem_err;
3646 if (IS_NOT_NULL(root)) {
3647 if (IS_NULL(onig_node_list_add(root, snode))) {
3648 onig_node_free(snode);
3649 goto mem_err;
3650 }
3651 }
3652 }
3653
3654 r = onig_node_str_cat(snode, p, p + len);
3655 if (r != 0) goto err;
3656 }
3657 else {
3658 alt_num *= (n + 1);
3659 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3660
3661 if (IS_NOT_NULL(snode)) {
3662 r = update_string_node_case_fold(reg, snode);
3663 if (r == 0) {
3664 NSTRING_SET_AMBIG(snode);
3665 }
3666 }
3667 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3668 onig_node_free(top_root);
3669 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3670 if (IS_NULL(root)) {
3671 onig_node_free(prev_node);
3672 goto mem_err;
3673 }
3674 }
3675
3676 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3677 if (r < 0) goto mem_err;
3678 if (r == 1) {
3679 if (IS_NULL(root)) {
3680 top_root = prev_node;
3681 }
3682 else {
3683 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3684 onig_node_free(prev_node);
3685 goto mem_err;
3686 }
3687 }
3688
3689 root = NCAR(prev_node);
3690 }
3691 else { /* r == 0 */
3692 if (IS_NOT_NULL(root)) {
3693 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3694 onig_node_free(prev_node);
3695 goto mem_err;
3696 }
3697 }
3698 }
3699
3700 snode = NULL_NODE;
3701 }
3702
3703 p += len;
3704 }
3705 if (IS_NOT_NULL(snode)) {
3706 r = update_string_node_case_fold(reg, snode);
3707 if (r == 0) {
3708 NSTRING_SET_AMBIG(snode);
3709 }
3710 }
3711
3712 if (p < end) {
3713 Node *srem;
3714
3715 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3716 if (r != 0) goto mem_err;
3717
3718 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3719 onig_node_free(top_root);
3720 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3721 if (IS_NULL(root)) {
3722 onig_node_free(srem);
3723 onig_node_free(prev_node);
3724 goto mem_err;
3725 }
3726 }
3727
3728 if (IS_NULL(root)) {
3729 prev_node = srem;
3730 }
3731 else {
3732 if (IS_NULL(onig_node_list_add(root, srem))) {
3733 onig_node_free(srem);
3734 goto mem_err;
3735 }
3736 }
3737 }
3738
3739 /* ending */
3740 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3741 swap_node(node, top_root);
3742 onig_node_free(top_root);
3743 return 0;
3744
3745 mem_err:
3746 r = ONIGERR_MEMORY;
3747
3748 err:
3749 onig_node_free(top_root);
3750 return r;
3751}
3752
3753
3754#ifdef USE_COMBINATION_EXPLOSION_CHECK
3755
3756# define CEC_THRES_NUM_BIG_REPEAT 512
3757# define CEC_INFINITE_NUM 0x7fffffff
3758
3759# define CEC_IN_INFINITE_REPEAT (1<<0)
3760# define CEC_IN_FINITE_REPEAT (1<<1)
3761# define CEC_CONT_BIG_REPEAT (1<<2)
3762
3763static int
3764setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3765{
3766 int type;
3767 int r = state;
3768
3769 type = NTYPE(node);
3770 switch (type) {
3771 case NT_LIST:
3772 {
3773 do {
3774 r = setup_comb_exp_check(NCAR(node), r, env);
3775 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3776 }
3777 break;
3778
3779 case NT_ALT:
3780 {
3781 int ret;
3782 do {
3783 ret = setup_comb_exp_check(NCAR(node), state, env);
3784 r |= ret;
3785 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3786 }
3787 break;
3788
3789 case NT_QTFR:
3790 {
3791 int child_state = state;
3792 int add_state = 0;
3793 QtfrNode* qn = NQTFR(node);
3794 Node* target = qn->target;
3795 int var_num;
3796
3797 if (! IS_REPEAT_INFINITE(qn->upper)) {
3798 if (qn->upper > 1) {
3799 /* {0,1}, {1,1} are allowed */
3800 child_state |= CEC_IN_FINITE_REPEAT;
3801
3802 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3803 if (env->backrefed_mem == 0) {
3804 if (NTYPE(qn->target) == NT_ENCLOSE) {
3805 EncloseNode* en = NENCLOSE(qn->target);
3806 if (en->type == ENCLOSE_MEMORY) {
3807 if (NTYPE(en->target) == NT_QTFR) {
3808 QtfrNode* q = NQTFR(en->target);
3809 if (IS_REPEAT_INFINITE(q->upper)
3810 && q->greedy == qn->greedy) {
3811 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3812 if (qn->upper == 1)
3813 child_state = state;
3814 }
3815 }
3816 }
3817 }
3818 }
3819 }
3820 }
3821
3822 if (state & CEC_IN_FINITE_REPEAT) {
3823 qn->comb_exp_check_num = -1;
3824 }
3825 else {
3826 if (IS_REPEAT_INFINITE(qn->upper)) {
3827 var_num = CEC_INFINITE_NUM;
3828 child_state |= CEC_IN_INFINITE_REPEAT;
3829 }
3830 else {
3831 var_num = qn->upper - qn->lower;
3832 }
3833
3834 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3835 add_state |= CEC_CONT_BIG_REPEAT;
3836
3837 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3838 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3839 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3840 if (qn->comb_exp_check_num == 0) {
3841 env->num_comb_exp_check++;
3842 qn->comb_exp_check_num = env->num_comb_exp_check;
3843 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3844 env->comb_exp_max_regnum = env->curr_max_regnum;
3845 }
3846 }
3847 }
3848
3849 r = setup_comb_exp_check(target, child_state, env);
3850 r |= add_state;
3851 }
3852 break;
3853
3854 case NT_ENCLOSE:
3855 {
3856 EncloseNode* en = NENCLOSE(node);
3857
3858 switch (en->type) {
3859 case ENCLOSE_MEMORY:
3860 {
3861 if (env->curr_max_regnum < en->regnum)
3862 env->curr_max_regnum = en->regnum;
3863
3864 r = setup_comb_exp_check(en->target, state, env);
3865 }
3866 break;
3867
3868 default:
3869 r = setup_comb_exp_check(en->target, state, env);
3870 break;
3871 }
3872 }
3873 break;
3874
3875# ifdef USE_SUBEXP_CALL
3876 case NT_CALL:
3877 if (IS_CALL_RECURSION(NCALL(node)))
3878 env->has_recursion = 1;
3879 else
3880 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3881 break;
3882# endif
3883
3884 default:
3885 break;
3886 }
3887
3888 return r;
3889}
3890#endif
3891
3892#define IN_ALT (1<<0)
3893#define IN_NOT (1<<1)
3894#define IN_REPEAT (1<<2)
3895#define IN_VAR_REPEAT (1<<3)
3896#define IN_CALL (1<<4)
3897#define IN_RECCALL (1<<5)
3898
3899/* setup_tree does the following work.
3900 1. check empty loop. (set qn->target_empty_info)
3901 2. expand ignore-case in char class.
3902 3. set memory status bit flags. (reg->mem_stats)
3903 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3904 5. find invalid patterns in look-behind.
3905 6. expand repeated string.
3906 */
3907static int
3908setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3909{
3910 int type;
3911 int r = 0;
3912
3913restart:
3914 type = NTYPE(node);
3915 switch (type) {
3916 case NT_LIST:
3917 {
3918 Node* prev = NULL_NODE;
3919 do {
3920 r = setup_tree(NCAR(node), reg, state, env);
3921 if (IS_NOT_NULL(prev) && r == 0) {
3922 r = next_setup(prev, NCAR(node), reg);
3923 }
3924 prev = NCAR(node);
3925 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3926 }
3927 break;
3928
3929 case NT_ALT:
3930 do {
3931 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3932 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3933 break;
3934
3935 case NT_CCLASS:
3936 break;
3937
3938 case NT_STR:
3939 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3940 r = expand_case_fold_string(node, reg);
3941 }
3942 break;
3943
3944 case NT_CTYPE:
3945 case NT_CANY:
3946 break;
3947
3948#ifdef USE_SUBEXP_CALL
3949 case NT_CALL:
3950 break;
3951#endif
3952
3953 case NT_BREF:
3954 {
3955 int i;
3956 int* p;
3957 Node** nodes = SCANENV_MEM_NODES(env);
3958 BRefNode* br = NBREF(node);
3959 p = BACKREFS_P(br);
3960 for (i = 0; i < br->back_num; i++) {
3961 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3962 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3963 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3964#ifdef USE_BACKREF_WITH_LEVEL
3965 if (IS_BACKREF_NEST_LEVEL(br)) {
3966 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3967 }
3968#endif
3969 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3970 }
3971 }
3972 break;
3973
3974 case NT_QTFR:
3975 {
3976 OnigDistance d;
3977 QtfrNode* qn = NQTFR(node);
3978 Node* target = qn->target;
3979
3980 if ((state & IN_REPEAT) != 0) {
3981 qn->state |= NST_IN_REPEAT;
3982 }
3983
3984 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3985 r = get_min_match_length(target, &d, env);
3986 if (r) break;
3987 if (d == 0) {
3988 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3989#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3990 r = quantifiers_memory_node_info(target);
3991 if (r < 0) break;
3992 if (r > 0) {
3993 qn->target_empty_info = r;
3994 }
3995#endif
3996#if 0
3997 r = get_max_match_length(target, &d, env);
3998 if (r == 0 && d == 0) {
3999 /* ()* ==> ()?, ()+ ==> () */
4000 qn->upper = 1;
4001 if (qn->lower > 1) qn->lower = 1;
4002 if (NTYPE(target) == NT_STR) {
4003 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
4004 }
4005 }
4006#endif
4007 }
4008 }
4009
4010 state |= IN_REPEAT;
4011 if (qn->lower != qn->upper)
4012 state |= IN_VAR_REPEAT;
4013 r = setup_tree(target, reg, state, env);
4014 if (r) break;
4015
4016 /* expand string */
4017#define EXPAND_STRING_MAX_LENGTH 100
4018 if (NTYPE(target) == NT_STR) {
4019 if (qn->lower > 1) {
4020 int i, n = qn->lower;
4021 OnigDistance len = NSTRING_LEN(target);
4022 StrNode* sn = NSTR(target);
4023 Node* np;
4024
4025 np = onig_node_new_str(sn->s, sn->end);
4026 if (IS_NULL(np)) return ONIGERR_MEMORY;
4027 NSTR(np)->flag = sn->flag;
4028
4029 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4030 r = onig_node_str_cat(np, sn->s, sn->end);
4031 if (r) {
4032 onig_node_free(np);
4033 return r;
4034 }
4035 }
4036 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4037 Node *np1, *np2;
4038
4039 qn->lower -= i;
4040 if (! IS_REPEAT_INFINITE(qn->upper))
4041 qn->upper -= i;
4042
4043 np1 = onig_node_new_list(np, NULL);
4044 if (IS_NULL(np1)) {
4045 onig_node_free(np);
4046 return ONIGERR_MEMORY;
4047 }
4048 swap_node(np1, node);
4049 np2 = onig_node_list_add(node, np1);
4050 if (IS_NULL(np2)) {
4051 onig_node_free(np1);
4052 return ONIGERR_MEMORY;
4053 }
4054 }
4055 else {
4056 swap_node(np, node);
4057 onig_node_free(np);
4058 }
4059 break; /* break case NT_QTFR: */
4060 }
4061 }
4062
4063#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4064 if (qn->greedy && (qn->target_empty_info != 0)) {
4065 if (NTYPE(target) == NT_QTFR) {
4066 QtfrNode* tqn = NQTFR(target);
4067 if (IS_NOT_NULL(tqn->head_exact)) {
4068 qn->head_exact = tqn->head_exact;
4069 tqn->head_exact = NULL;
4070 }
4071 }
4072 else {
4073 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4074 }
4075 }
4076#endif
4077 }
4078 break;
4079
4080 case NT_ENCLOSE:
4081 {
4082 EncloseNode* en = NENCLOSE(node);
4083
4084 switch (en->type) {
4085 case ENCLOSE_OPTION:
4086 {
4087 OnigOptionType options = reg->options;
4088 reg->options = NENCLOSE(node)->option;
4089 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4090 reg->options = options;
4091 }
4092 break;
4093
4094 case ENCLOSE_MEMORY:
4095 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4096 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4097 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4098 }
4099 if (IS_ENCLOSE_CALLED(en))
4100 state |= IN_CALL;
4101 if (IS_ENCLOSE_RECURSION(en))
4102 state |= IN_RECCALL;
4103 else if ((state & IN_RECCALL) != 0)
4104 SET_CALL_RECURSION(node);
4105 r = setup_tree(en->target, reg, state, env);
4106 break;
4107
4108 case ENCLOSE_STOP_BACKTRACK:
4109 {
4110 Node* target = en->target;
4111 r = setup_tree(target, reg, state, env);
4112 if (NTYPE(target) == NT_QTFR) {
4113 QtfrNode* tqn = NQTFR(target);
4114 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4115 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4116 int qtype = NTYPE(tqn->target);
4117 if (IS_NODE_TYPE_SIMPLE(qtype))
4118 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4119 }
4120 }
4121 }
4122 break;
4123
4124 case ENCLOSE_CONDITION:
4125#ifdef USE_NAMED_GROUP
4126 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4127 env->num_named > 0 &&
4128 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4129 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4130 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4131 }
4132#endif
4133 if (NENCLOSE(node)->regnum > env->num_mem)
4134 return ONIGERR_INVALID_BACKREF;
4135 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4136 break;
4137
4138 case ENCLOSE_ABSENT:
4139 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4140 break;
4141 }
4142 }
4143 break;
4144
4145 case NT_ANCHOR:
4146 {
4147 AnchorNode* an = NANCHOR(node);
4148
4149 switch (an->type) {
4150 case ANCHOR_PREC_READ:
4151 r = setup_tree(an->target, reg, state, env);
4152 break;
4153 case ANCHOR_PREC_READ_NOT:
4154 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4155 break;
4156
4157/* allowed node types in look-behind */
4158#define ALLOWED_TYPE_IN_LB \
4159 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4160 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4161
4162#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4163#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4164
4165#define ALLOWED_ANCHOR_IN_LB \
4166( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4167 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4168 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4169 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4170#define ALLOWED_ANCHOR_IN_LB_NOT \
4171( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4172 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4173 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4174 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4175
4176 case ANCHOR_LOOK_BEHIND:
4177 {
4178 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4180 if (r < 0) return r;
4181 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182 if (NTYPE(node) != NT_ANCHOR) goto restart;
4183 r = setup_tree(an->target, reg, state, env);
4184 if (r != 0) return r;
4185 r = setup_look_behind(node, reg, env);
4186 }
4187 break;
4188
4189 case ANCHOR_LOOK_BEHIND_NOT:
4190 {
4191 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4192 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4193 if (r < 0) return r;
4194 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4195 if (NTYPE(node) != NT_ANCHOR) goto restart;
4196 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4197 if (r != 0) return r;
4198 r = setup_look_behind(node, reg, env);
4199 }
4200 break;
4201 }
4202 }
4203 break;
4204
4205 default:
4206 break;
4207 }
4208
4209 return r;
4210}
4211
4212#ifndef USE_SUNDAY_QUICK_SEARCH
4213/* set skip map for Boyer-Moore search */
4214static int
4215set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4216 UChar skip[], int** int_skip, int ignore_case)
4217{
4218 OnigDistance i, len;
4219 int clen, flen, n, j, k;
4220 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4221 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4222 OnigEncoding enc = reg->enc;
4223
4224 len = end - s;
4225 if (len < ONIG_CHAR_TABLE_SIZE) {
4226 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4227
4228 n = 0;
4229 for (i = 0; i < len - 1; i += clen) {
4230 p = s + i;
4231 if (ignore_case)
4232 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4233 p, end, items);
4234 clen = enclen(enc, p, end);
4235 if (p + clen > end)
4236 clen = (int )(end - p);
4237
4238 for (j = 0; j < n; j++) {
4239 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4240 return 1; /* different length isn't supported. */
4241 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4242 if (flen != clen)
4243 return 1; /* different length isn't supported. */
4244 }
4245 for (j = 0; j < clen; j++) {
4246 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4247 for (k = 0; k < n; k++) {
4248 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4249 }
4250 }
4251 }
4252 }
4253 else {
4254# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4255 /* This should not happen. */
4256 return ONIGERR_TYPE_BUG;
4257# else
4258 if (IS_NULL(*int_skip)) {
4259 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4260 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4261 }
4262 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4263
4264 n = 0;
4265 for (i = 0; i < len - 1; i += clen) {
4266 p = s + i;
4267 if (ignore_case)
4268 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4269 p, end, items);
4270 clen = enclen(enc, p, end);
4271 if (p + clen > end)
4272 clen = (int )(end - p);
4273
4274 for (j = 0; j < n; j++) {
4275 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4276 return 1; /* different length isn't supported. */
4277 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4278 if (flen != clen)
4279 return 1; /* different length isn't supported. */
4280 }
4281 for (j = 0; j < clen; j++) {
4282 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4283 for (k = 0; k < n; k++) {
4284 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4285 }
4286 }
4287 }
4288# endif
4289 }
4290 return 0;
4291}
4292
4293#else /* USE_SUNDAY_QUICK_SEARCH */
4294
4295/* set skip map for Sunday's quick search */
4296static int
4297set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4298 UChar skip[], int** int_skip, int ignore_case)
4299{
4300 OnigDistance i, len;
4301 int clen, flen, n, j, k;
4302 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4303 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4304 OnigEncoding enc = reg->enc;
4305
4306 len = end - s;
4307 if (len < ONIG_CHAR_TABLE_SIZE) {
4308 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4309
4310 n = 0;
4311 for (i = 0; i < len; i += clen) {
4312 p = s + i;
4313 if (ignore_case)
4314 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4315 p, end, items);
4316 clen = enclen(enc, p, end);
4317 if (p + clen > end)
4318 clen = (int )(end - p);
4319
4320 for (j = 0; j < n; j++) {
4321 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4322 return 1; /* different length isn't supported. */
4323 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4324 if (flen != clen)
4325 return 1; /* different length isn't supported. */
4326 }
4327 for (j = 0; j < clen; j++) {
4328 skip[s[i + j]] = (UChar )(len - i - j);
4329 for (k = 0; k < n; k++) {
4330 skip[buf[k][j]] = (UChar )(len - i - j);
4331 }
4332 }
4333 }
4334 }
4335 else {
4336# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4337 /* This should not happen. */
4338 return ONIGERR_TYPE_BUG;
4339# else
4340 if (IS_NULL(*int_skip)) {
4341 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4342 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4343 }
4344 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4345
4346 n = 0;
4347 for (i = 0; i < len; i += clen) {
4348 p = s + i;
4349 if (ignore_case)
4350 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4351 p, end, items);
4352 clen = enclen(enc, p, end);
4353 if (p + clen > end)
4354 clen = (int )(end - p);
4355
4356 for (j = 0; j < n; j++) {
4357 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4358 return 1; /* different length isn't supported. */
4359 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4360 if (flen != clen)
4361 return 1; /* different length isn't supported. */
4362 }
4363 for (j = 0; j < clen; j++) {
4364 (*int_skip)[s[i + j]] = (int )(len - i - j);
4365 for (k = 0; k < n; k++) {
4366 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4367 }
4368 }
4369 }
4370# endif
4371 }
4372 return 0;
4373}
4374#endif /* USE_SUNDAY_QUICK_SEARCH */
4375
4376typedef struct {
4377 OnigDistance min; /* min byte length */
4378 OnigDistance max; /* max byte length */
4379} MinMaxLen;
4380
4381typedef struct {
4382 MinMaxLen mmd;
4383 OnigEncoding enc;
4384 OnigOptionType options;
4385 OnigCaseFoldType case_fold_flag;
4386 ScanEnv* scan_env;
4387} OptEnv;
4388
4389typedef struct {
4390 int left_anchor;
4391 int right_anchor;
4392} OptAncInfo;
4393
4394typedef struct {
4395 MinMaxLen mmd; /* info position */
4396 OptAncInfo anc;
4397
4398 int reach_end;
4399 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4400 int len;
4401 UChar s[OPT_EXACT_MAXLEN];
4402} OptExactInfo;
4403
4404typedef struct {
4405 MinMaxLen mmd; /* info position */
4406 OptAncInfo anc;
4407
4408 int value; /* weighted value */
4409 UChar map[ONIG_CHAR_TABLE_SIZE];
4410} OptMapInfo;
4411
4412typedef struct {
4413 MinMaxLen len;
4414
4415 OptAncInfo anc;
4416 OptExactInfo exb; /* boundary */
4417 OptExactInfo exm; /* middle */
4418 OptExactInfo expr; /* prec read (?=...) */
4419
4420 OptMapInfo map; /* boundary */
4421} NodeOptInfo;
4422
4423
4424static int
4425map_position_value(OnigEncoding enc, int i)
4426{
4427 static const short int ByteValTable[] = {
4428 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4430 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4431 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4432 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4433 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4434 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4435 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4436 };
4437
4438 if (i < numberof(ByteValTable)) {
4439 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4440 return 20;
4441 else
4442 return (int )ByteValTable[i];
4443 }
4444 else
4445 return 4; /* Take it easy. */
4446}
4447
4448static int
4449distance_value(MinMaxLen* mm)
4450{
4451 /* 1000 / (min-max-dist + 1) */
4452 static const short int dist_vals[] = {
4453 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4454 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4455 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4456 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4457 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4458 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4459 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4460 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4461 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4462 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4463 };
4464
4465 OnigDistance d;
4466
4467 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4468
4469 d = mm->max - mm->min;
4470 if (d < numberof(dist_vals))
4471 /* return dist_vals[d] * 16 / (mm->min + 12); */
4472 return (int )dist_vals[d];
4473 else
4474 return 1;
4475}
4476
4477static int
4478comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4479{
4480 if (v2 <= 0) return -1;
4481 if (v1 <= 0) return 1;
4482
4483 v1 *= distance_value(d1);
4484 v2 *= distance_value(d2);
4485
4486 if (v2 > v1) return 1;
4487 if (v2 < v1) return -1;
4488
4489 if (d2->min < d1->min) return 1;
4490 if (d2->min > d1->min) return -1;
4491 return 0;
4492}
4493
4494static int
4495is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4496{
4497 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4498}
4499
4500
4501static void
4502set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4503{
4504 mml->min = min;
4505 mml->max = max;
4506}
4507
4508static void
4509clear_mml(MinMaxLen* mml)
4510{
4511 mml->min = mml->max = 0;
4512}
4513
4514static void
4515copy_mml(MinMaxLen* to, MinMaxLen* from)
4516{
4517 to->min = from->min;
4518 to->max = from->max;
4519}
4520
4521static void
4522add_mml(MinMaxLen* to, MinMaxLen* from)
4523{
4524 to->min = distance_add(to->min, from->min);
4525 to->max = distance_add(to->max, from->max);
4526}
4527
4528#if 0
4529static void
4530add_len_mml(MinMaxLen* to, OnigDistance len)
4531{
4532 to->min = distance_add(to->min, len);
4533 to->max = distance_add(to->max, len);
4534}
4535#endif
4536
4537static void
4538alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4539{
4540 if (to->min > from->min) to->min = from->min;
4541 if (to->max < from->max) to->max = from->max;
4542}
4543
4544static void
4545copy_opt_env(OptEnv* to, OptEnv* from)
4546{
4547 *to = *from;
4548}
4549
4550static void
4551clear_opt_anc_info(OptAncInfo* anc)
4552{
4553 anc->left_anchor = 0;
4554 anc->right_anchor = 0;
4555}
4556
4557static void
4558copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4559{
4560 *to = *from;
4561}
4562
4563static void
4564concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4565 OnigDistance left_len, OnigDistance right_len)
4566{
4567 clear_opt_anc_info(to);
4568
4569 to->left_anchor = left->left_anchor;
4570 if (left_len == 0) {
4571 to->left_anchor |= right->left_anchor;
4572 }
4573
4574 to->right_anchor = right->right_anchor;
4575 if (right_len == 0) {
4576 to->right_anchor |= left->right_anchor;
4577 }
4578 else {
4579 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4580 }
4581}
4582
4583static int
4584is_left_anchor(int anc)
4585{
4586 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4587 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4588 anc == ANCHOR_PREC_READ_NOT)
4589 return 0;
4590
4591 return 1;
4592}
4593
4594static int
4595is_set_opt_anc_info(OptAncInfo* to, int anc)
4596{
4597 if ((to->left_anchor & anc) != 0) return 1;
4598
4599 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4600}
4601
4602static void
4603add_opt_anc_info(OptAncInfo* to, int anc)
4604{
4605 if (is_left_anchor(anc))
4606 to->left_anchor |= anc;
4607 else
4608 to->right_anchor |= anc;
4609}
4610
4611static void
4612remove_opt_anc_info(OptAncInfo* to, int anc)
4613{
4614 if (is_left_anchor(anc))
4615 to->left_anchor &= ~anc;
4616 else
4617 to->right_anchor &= ~anc;
4618}
4619
4620static void
4621alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4622{
4623 to->left_anchor &= add->left_anchor;
4624 to->right_anchor &= add->right_anchor;
4625}
4626
4627static int
4628is_full_opt_exact_info(OptExactInfo* ex)
4629{
4630 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4631}
4632
4633static void
4634clear_opt_exact_info(OptExactInfo* ex)
4635{
4636 clear_mml(&ex->mmd);
4637 clear_opt_anc_info(&ex->anc);
4638 ex->reach_end = 0;
4639 ex->ignore_case = -1; /* unset */
4640 ex->len = 0;
4641 ex->s[0] = '\0';
4642}
4643
4644static void
4645copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4646{
4647 *to = *from;
4648}
4649
4650static void
4651concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4652{
4653 int i, j, len;
4654 UChar *p, *end;
4655 OptAncInfo tanc;
4656
4657 if (to->ignore_case < 0)
4658 to->ignore_case = add->ignore_case;
4659 else if (to->ignore_case != add->ignore_case)
4660 return ; /* avoid */
4661
4662 p = add->s;
4663 end = p + add->len;
4664 for (i = to->len; p < end; ) {
4665 len = enclen(enc, p, end);
4666 if (i + len > OPT_EXACT_MAXLEN) break;
4667 for (j = 0; j < len && p < end; j++)
4668 to->s[i++] = *p++;
4669 }
4670
4671 to->len = i;
4672 to->reach_end = (p == end ? add->reach_end : 0);
4673
4674 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4675 if (! to->reach_end) tanc.right_anchor = 0;
4676 copy_opt_anc_info(&to->anc, &tanc);
4677}
4678
4679static void
4680concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4681 int raw ARG_UNUSED, OnigEncoding enc)
4682{
4683 int i, j, len;
4684 UChar *p;
4685
4686 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4687 len = enclen(enc, p, end);
4688 if (i + len > OPT_EXACT_MAXLEN) break;
4689 for (j = 0; j < len && p < end; j++)
4690 to->s[i++] = *p++;
4691 }
4692
4693 to->len = i;
4694}
4695
4696static void
4697alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4698{
4699 int i, j, len;
4700
4701 if (add->len == 0 || to->len == 0) {
4702 clear_opt_exact_info(to);
4703 return ;
4704 }
4705
4706 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4707 clear_opt_exact_info(to);
4708 return ;
4709 }
4710
4711 for (i = 0; i < to->len && i < add->len; ) {
4712 if (to->s[i] != add->s[i]) break;
4713 len = enclen(env->enc, to->s + i, to->s + to->len);
4714
4715 for (j = 1; j < len; j++) {
4716 if (to->s[i+j] != add->s[i+j]) break;
4717 }
4718 if (j < len) break;
4719 i += len;
4720 }
4721
4722 if (! add->reach_end || i < add->len || i < to->len) {
4723 to->reach_end = 0;
4724 }
4725 to->len = i;
4726 if (to->ignore_case < 0)
4727 to->ignore_case = add->ignore_case;
4728 else if (add->ignore_case >= 0)
4729 to->ignore_case |= add->ignore_case;
4730
4731 alt_merge_opt_anc_info(&to->anc, &add->anc);
4732 if (! to->reach_end) to->anc.right_anchor = 0;
4733}
4734
4735static void
4736select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4737{
4738 int v1, v2;
4739
4740 v1 = now->len;
4741 v2 = alt->len;
4742
4743 if (v2 == 0) {
4744 return ;
4745 }
4746 else if (v1 == 0) {
4747 copy_opt_exact_info(now, alt);
4748 return ;
4749 }
4750 else if (v1 <= 2 && v2 <= 2) {
4751 /* ByteValTable[x] is big value --> low price */
4752 v2 = map_position_value(enc, now->s[0]);
4753 v1 = map_position_value(enc, alt->s[0]);
4754
4755 if (now->len > 1) v1 += 5;
4756 if (alt->len > 1) v2 += 5;
4757 }
4758
4759 if (now->ignore_case <= 0) v1 *= 2;
4760 if (alt->ignore_case <= 0) v2 *= 2;
4761
4762 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4763 copy_opt_exact_info(now, alt);
4764}
4765
4766static void
4767clear_opt_map_info(OptMapInfo* map)
4768{
4769 static const OptMapInfo clean_info = {
4770 {0, 0}, {0, 0}, 0,
4771 {
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4788 }
4789 };
4790
4791 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4792}
4793
4794static void
4795copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4796{
4797 *to = *from;
4798}
4799
4800static void
4801add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4802{
4803 if (map->map[c] == 0) {
4804 map->map[c] = 1;
4805 map->value += map_position_value(enc, c);
4806 }
4807}
4808
4809static int
4810add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4811 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4812{
4813 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4814 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4815 int i, n;
4816
4817 add_char_opt_map_info(map, p[0], enc);
4818
4819 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4820 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4821 if (n < 0) return n;
4822
4823 for (i = 0; i < n; i++) {
4824 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4825 add_char_opt_map_info(map, buf[0], enc);
4826 }
4827
4828 return 0;
4829}
4830
4831static void
4832select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4833{
4834 const int z = 1<<15; /* 32768: something big value */
4835
4836 int v1, v2;
4837
4838 if (alt->value == 0) return ;
4839 if (now->value == 0) {
4840 copy_opt_map_info(now, alt);
4841 return ;
4842 }
4843
4844 v1 = z / now->value;
4845 v2 = z / alt->value;
4846 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4847 copy_opt_map_info(now, alt);
4848}
4849
4850static int
4851comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4852{
4853#define COMP_EM_BASE 20
4854 int ve, vm;
4855
4856 if (m->value <= 0) return -1;
4857
4858 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4859 vm = COMP_EM_BASE * 5 * 2 / m->value;
4860 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4861}
4862
4863static void
4864alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4865{
4866 int i, val;
4867
4868 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4869 if (to->value == 0) return ;
4870 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4871 clear_opt_map_info(to);
4872 return ;
4873 }
4874
4875 alt_merge_mml(&to->mmd, &add->mmd);
4876
4877 val = 0;
4878 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4879 if (add->map[i])
4880 to->map[i] = 1;
4881
4882 if (to->map[i])
4883 val += map_position_value(enc, i);
4884 }
4885 to->value = val;
4886
4887 alt_merge_opt_anc_info(&to->anc, &add->anc);
4888}
4889
4890static void
4891set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4892{
4893 copy_mml(&(opt->exb.mmd), mmd);
4894 copy_mml(&(opt->expr.mmd), mmd);
4895 copy_mml(&(opt->map.mmd), mmd);
4896}
4897
4898static void
4899clear_node_opt_info(NodeOptInfo* opt)
4900{
4901 clear_mml(&opt->len);
4902 clear_opt_anc_info(&opt->anc);
4903 clear_opt_exact_info(&opt->exb);
4904 clear_opt_exact_info(&opt->exm);
4905 clear_opt_exact_info(&opt->expr);
4906 clear_opt_map_info(&opt->map);
4907}
4908
4909static void
4910copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4911{
4912 *to = *from;
4913}
4914
4915static void
4916concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4917{
4918 int exb_reach, exm_reach;
4919 OptAncInfo tanc;
4920
4921 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4922 copy_opt_anc_info(&to->anc, &tanc);
4923
4924 if (add->exb.len > 0 && to->len.max == 0) {
4925 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4926 to->len.max, add->len.max);
4927 copy_opt_anc_info(&add->exb.anc, &tanc);
4928 }
4929
4930 if (add->map.value > 0 && to->len.max == 0) {
4931 if (add->map.mmd.max == 0)
4932 add->map.anc.left_anchor |= to->anc.left_anchor;
4933 }
4934
4935 exb_reach = to->exb.reach_end;
4936 exm_reach = to->exm.reach_end;
4937
4938 if (add->len.max != 0)
4939 to->exb.reach_end = to->exm.reach_end = 0;
4940
4941 if (add->exb.len > 0) {
4942 if (exb_reach) {
4943 concat_opt_exact_info(&to->exb, &add->exb, enc);
4944 clear_opt_exact_info(&add->exb);
4945 }
4946 else if (exm_reach) {
4947 concat_opt_exact_info(&to->exm, &add->exb, enc);
4948 clear_opt_exact_info(&add->exb);
4949 }
4950 }
4951 select_opt_exact_info(enc, &to->exm, &add->exb);
4952 select_opt_exact_info(enc, &to->exm, &add->exm);
4953
4954 if (to->expr.len > 0) {
4955 if (add->len.max > 0) {
4956 if (to->expr.len > (int )add->len.max)
4957 to->expr.len = (int )add->len.max;
4958
4959 if (to->expr.mmd.max == 0)
4960 select_opt_exact_info(enc, &to->exb, &to->expr);
4961 else
4962 select_opt_exact_info(enc, &to->exm, &to->expr);
4963 }
4964 }
4965 else if (add->expr.len > 0) {
4966 copy_opt_exact_info(&to->expr, &add->expr);
4967 }
4968
4969 select_opt_map_info(&to->map, &add->map);
4970
4971 add_mml(&to->len, &add->len);
4972}
4973
4974static void
4975alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4976{
4977 alt_merge_opt_anc_info (&to->anc, &add->anc);
4978 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4979 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4980 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4981 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4982
4983 alt_merge_mml(&to->len, &add->len);
4984}
4985
4986
4987#define MAX_NODE_OPT_INFO_REF_COUNT 5
4988
4989static int
4990optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4991{
4992 int type;
4993 int r = 0;
4994
4995 clear_node_opt_info(opt);
4996 set_bound_node_opt_info(opt, &env->mmd);
4997
4998 type = NTYPE(node);
4999 switch (type) {
5000 case NT_LIST:
5001 {
5002 OptEnv nenv;
5003 NodeOptInfo nopt;
5004 Node* nd = node;
5005
5006 copy_opt_env(&nenv, env);
5007 do {
5008 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
5009 if (r == 0) {
5010 add_mml(&nenv.mmd, &nopt.len);
5011 concat_left_node_opt_info(env->enc, opt, &nopt);
5012 }
5013 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5014 }
5015 break;
5016
5017 case NT_ALT:
5018 {
5019 NodeOptInfo nopt;
5020 Node* nd = node;
5021
5022 do {
5023 r = optimize_node_left(NCAR(nd), &nopt, env);
5024 if (r == 0) {
5025 if (nd == node) copy_node_opt_info(opt, &nopt);
5026 else alt_merge_node_opt_info(opt, &nopt, env);
5027 }
5028 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5029 }
5030 break;
5031
5032 case NT_STR:
5033 {
5034 StrNode* sn = NSTR(node);
5035 OnigDistance slen = sn->end - sn->s;
5036 int is_raw = NSTRING_IS_RAW(node);
5037
5038 if (! NSTRING_IS_AMBIG(node)) {
5039 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5040 is_raw, env->enc);
5041 opt->exb.ignore_case = 0;
5042 if (slen > 0) {
5043 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5044 }
5045 set_mml(&opt->len, slen, slen);
5046 }
5047 else {
5048 OnigDistance max;
5049
5050 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5051 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5052 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5053 }
5054 else {
5055 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5056 is_raw, env->enc);
5057 opt->exb.ignore_case = 1;
5058
5059 if (slen > 0) {
5060 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5061 env->enc, env->case_fold_flag);
5062 if (r != 0) break;
5063 }
5064
5065 max = slen;
5066 }
5067
5068 set_mml(&opt->len, slen, max);
5069 }
5070
5071 if ((OnigDistance )opt->exb.len == slen)
5072 opt->exb.reach_end = 1;
5073 }
5074 break;
5075
5076 case NT_CCLASS:
5077 {
5078 int i, z;
5079 CClassNode* cc = NCCLASS(node);
5080
5081 /* no need to check ignore case. (set in setup_tree()) */
5082
5083 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5084 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5085 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5086
5087 set_mml(&opt->len, min, max);
5088 }
5089 else {
5090 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5091 z = BITSET_AT(cc->bs, i);
5092 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5093 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5094 }
5095 }
5096 set_mml(&opt->len, 1, 1);
5097 }
5098 }
5099 break;
5100
5101 case NT_CTYPE:
5102 {
5103 int i, min, max;
5104 int maxcode;
5105
5106 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5107
5108 if (max == 1) {
5109 min = 1;
5110
5111 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5112 switch (NCTYPE(node)->ctype) {
5113 case ONIGENC_CTYPE_WORD:
5114 if (NCTYPE(node)->not != 0) {
5115 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5116 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5117 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5118 }
5119 }
5120 }
5121 else {
5122 for (i = 0; i < maxcode; i++) {
5123 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5124 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5125 }
5126 }
5127 }
5128 break;
5129 }
5130 }
5131 else {
5132 min = ONIGENC_MBC_MINLEN(env->enc);
5133 }
5134 set_mml(&opt->len, min, max);
5135 }
5136 break;
5137
5138 case NT_CANY:
5139 {
5140 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5141 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5142 set_mml(&opt->len, min, max);
5143 }
5144 break;
5145
5146 case NT_ANCHOR:
5147 switch (NANCHOR(node)->type) {
5148 case ANCHOR_BEGIN_BUF:
5149 case ANCHOR_BEGIN_POSITION:
5150 case ANCHOR_BEGIN_LINE:
5151 case ANCHOR_END_BUF:
5152 case ANCHOR_SEMI_END_BUF:
5153 case ANCHOR_END_LINE:
5154 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5155 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5156 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5157 break;
5158
5159 case ANCHOR_PREC_READ:
5160 {
5161 NodeOptInfo nopt;
5162
5163 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5164 if (r == 0) {
5165 if (nopt.exb.len > 0)
5166 copy_opt_exact_info(&opt->expr, &nopt.exb);
5167 else if (nopt.exm.len > 0)
5168 copy_opt_exact_info(&opt->expr, &nopt.exm);
5169
5170 opt->expr.reach_end = 0;
5171
5172 if (nopt.map.value > 0)
5173 copy_opt_map_info(&opt->map, &nopt.map);
5174 }
5175 }
5176 break;
5177
5178 case ANCHOR_LOOK_BEHIND_NOT:
5179 break;
5180 }
5181 break;
5182
5183 case NT_BREF:
5184 {
5185 int i;
5186 int* backs;
5187 OnigDistance min, max, tmin, tmax;
5188 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5189 BRefNode* br = NBREF(node);
5190
5191 if (br->state & NST_RECURSION) {
5192 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5193 break;
5194 }
5195 backs = BACKREFS_P(br);
5196 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5197 if (r != 0) break;
5198 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5199 if (r != 0) break;
5200 for (i = 1; i < br->back_num; i++) {
5201 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5202 if (r != 0) break;
5203 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5204 if (r != 0) break;
5205 if (min > tmin) min = tmin;
5206 if (max < tmax) max = tmax;
5207 }
5208 if (r == 0) set_mml(&opt->len, min, max);
5209 }
5210 break;
5211
5212#ifdef USE_SUBEXP_CALL
5213 case NT_CALL:
5214 if (IS_CALL_RECURSION(NCALL(node)))
5215 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5216 else {
5217 OnigOptionType save = env->options;
5218 env->options = NENCLOSE(NCALL(node)->target)->option;
5219 r = optimize_node_left(NCALL(node)->target, opt, env);
5220 env->options = save;
5221 }
5222 break;
5223#endif
5224
5225 case NT_QTFR:
5226 {
5227 int i;
5228 OnigDistance min, max;
5229 NodeOptInfo nopt;
5230 QtfrNode* qn = NQTFR(node);
5231
5232 r = optimize_node_left(qn->target, &nopt, env);
5233 if (r) break;
5234
5235 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5236 if (env->mmd.max == 0 &&
5237 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5238 if (IS_MULTILINE(env->options))
5239 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5240 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5241 else
5242 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5243 }
5244 }
5245 else {
5246 if (qn->lower > 0) {
5247 copy_node_opt_info(opt, &nopt);
5248 if (nopt.exb.len > 0) {
5249 if (nopt.exb.reach_end) {
5250 for (i = 2; i <= qn->lower &&
5251 ! is_full_opt_exact_info(&opt->exb); i++) {
5252 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5253 }
5254 if (i < qn->lower) {
5255 opt->exb.reach_end = 0;
5256 }
5257 }
5258 }
5259
5260 if (qn->lower != qn->upper) {
5261 opt->exb.reach_end = 0;
5262 opt->exm.reach_end = 0;
5263 }
5264 if (qn->lower > 1)
5265 opt->exm.reach_end = 0;
5266 }
5267 }
5268
5269 min = distance_multiply(nopt.len.min, qn->lower);
5270 if (IS_REPEAT_INFINITE(qn->upper))
5271 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5272 else
5273 max = distance_multiply(nopt.len.max, qn->upper);
5274
5275 set_mml(&opt->len, min, max);
5276 }
5277 break;
5278
5279 case NT_ENCLOSE:
5280 {
5281 EncloseNode* en = NENCLOSE(node);
5282
5283 switch (en->type) {
5284 case ENCLOSE_OPTION:
5285 {
5286 OnigOptionType save = env->options;
5287
5288 env->options = en->option;
5289 r = optimize_node_left(en->target, opt, env);
5290 env->options = save;
5291 }
5292 break;
5293
5294 case ENCLOSE_MEMORY:
5295#ifdef USE_SUBEXP_CALL
5296 en->opt_count++;
5297 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5298 OnigDistance min, max;
5299
5300 min = 0;
5301 max = ONIG_INFINITE_DISTANCE;
5302 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5303 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5304 set_mml(&opt->len, min, max);
5305 }
5306 else
5307#endif
5308 {
5309 r = optimize_node_left(en->target, opt, env);
5310
5311 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5312 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5313 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5314 }
5315 }
5316 break;
5317
5318 case ENCLOSE_STOP_BACKTRACK:
5319 case ENCLOSE_CONDITION:
5320 r = optimize_node_left(en->target, opt, env);
5321 break;
5322
5323 case ENCLOSE_ABSENT:
5324 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5325 break;
5326 }
5327 }
5328 break;
5329
5330 default:
5331#ifdef ONIG_DEBUG
5332 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5333 NTYPE(node));
5334#endif
5335 r = ONIGERR_TYPE_BUG;
5336 break;
5337 }
5338
5339 return r;
5340}
5341
5342static int
5343set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5344{
5345 int r;
5346 int allow_reverse;
5347
5348 if (e->len == 0) return 0;
5349
5350 reg->exact = (UChar* )xmalloc(e->len);
5351 CHECK_NULL_RETURN_MEMERR(reg->exact);
5352 xmemcpy(reg->exact, e->s, e->len);
5353 reg->exact_end = reg->exact + e->len;
5354
5355 allow_reverse =
5356 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5357
5358 if (e->ignore_case > 0) {
5359 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5360 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5361 reg->map, &(reg->int_map), 1);
5362 if (r == 0) {
5363 reg->optimize = (allow_reverse != 0
5364 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5365 }
5366 else {
5367 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5368 }
5369 }
5370 else {
5371 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5372 }
5373 }
5374 else {
5375 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5376 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5377 reg->map, &(reg->int_map), 0);
5378 if (r == 0) {
5379 reg->optimize = (allow_reverse != 0
5380 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5381 }
5382 else {
5383 reg->optimize = ONIG_OPTIMIZE_EXACT;
5384 }
5385 }
5386 else {
5387 reg->optimize = ONIG_OPTIMIZE_EXACT;
5388 }
5389 }
5390
5391 reg->dmin = e->mmd.min;
5392 reg->dmax = e->mmd.max;
5393
5394 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5395 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5396 }
5397
5398 return 0;
5399}
5400
5401static void
5402set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5403{
5404 int i;
5405
5406 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5407 reg->map[i] = m->map[i];
5408
5409 reg->optimize = ONIG_OPTIMIZE_MAP;
5410 reg->dmin = m->mmd.min;
5411 reg->dmax = m->mmd.max;
5412
5413 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5414 reg->threshold_len = (int )(reg->dmin + 1);
5415 }
5416}
5417
5418static void
5419set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5420{
5421 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5422 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5423}
5424
5425#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5426static void print_optimize_info(FILE* f, regex_t* reg);
5427#endif
5428
5429static int
5430set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5431{
5432
5433 int r;
5434 NodeOptInfo opt;
5435 OptEnv env;
5436
5437 env.enc = reg->enc;
5438 env.options = reg->options;
5439 env.case_fold_flag = reg->case_fold_flag;
5440 env.scan_env = scan_env;
5441 clear_mml(&env.mmd);
5442
5443 r = optimize_node_left(node, &opt, &env);
5444 if (r) return r;
5445
5446 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5447 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5448 ANCHOR_LOOK_BEHIND);
5449
5450 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5451 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5452
5453 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5454 ANCHOR_PREC_READ_NOT);
5455
5456 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5457 reg->anchor_dmin = opt.len.min;
5458 reg->anchor_dmax = opt.len.max;
5459 }
5460
5461 if (opt.exb.len > 0 || opt.exm.len > 0) {
5462 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5463 if (opt.map.value > 0 &&
5464 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5465 goto set_map;
5466 }
5467 else {
5468 r = set_optimize_exact_info(reg, &opt.exb);
5469 set_sub_anchor(reg, &opt.exb.anc);
5470 }
5471 }
5472 else if (opt.map.value > 0) {
5473 set_map:
5474 set_optimize_map_info(reg, &opt.map);
5475 set_sub_anchor(reg, &opt.map.anc);
5476 }
5477 else {
5478 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5479 if (opt.len.max == 0)
5480 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5481 }
5482
5483#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5484 print_optimize_info(stderr, reg);
5485#endif
5486 return r;
5487}
5488
5489static void
5490clear_optimize_info(regex_t* reg)
5491{
5492 reg->optimize = ONIG_OPTIMIZE_NONE;
5493 reg->anchor = 0;
5494 reg->anchor_dmin = 0;
5495 reg->anchor_dmax = 0;
5496 reg->sub_anchor = 0;
5497 reg->exact_end = (UChar* )NULL;
5498 reg->threshold_len = 0;
5499 if (IS_NOT_NULL(reg->exact)) {
5500 xfree(reg->exact);
5501 reg->exact = (UChar* )NULL;
5502 }
5503}
5504
5505#ifdef ONIG_DEBUG
5506
5507static void print_enc_string(FILE* fp, OnigEncoding enc,
5508 const UChar *s, const UChar *end)
5509{
5510 fprintf(fp, "\nPATTERN: /");
5511
5512 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5513 const UChar *p;
5514 OnigCodePoint code;
5515
5516 p = s;
5517 while (p < end) {
5518 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5519 if (code >= 0x80) {
5520 fprintf(fp, " 0x%04x ", (int )code);
5521 }
5522 else {
5523 fputc((int )code, fp);
5524 }
5525
5526 p += enclen(enc, p, end);
5527 }
5528 }
5529 else {
5530 while (s < end) {
5531 fputc((int )*s, fp);
5532 s++;
5533 }
5534 }
5535
5536 fprintf(fp, "/ (%s)\n", enc->name);
5537}
5538#endif /* ONIG_DEBUG */
5539
5540#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5541static void
5542print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5543{
5544 if (a == ONIG_INFINITE_DISTANCE)
5545 fputs("inf", f);
5546 else
5547 fprintf(f, "(%"PRIuPTR")", a);
5548
5549 fputs("-", f);
5550
5551 if (b == ONIG_INFINITE_DISTANCE)
5552 fputs("inf", f);
5553 else
5554 fprintf(f, "(%"PRIuPTR")", b);
5555}
5556
5557static void
5558print_anchor(FILE* f, int anchor)
5559{
5560 int q = 0;
5561
5562 fprintf(f, "[");
5563
5564 if (anchor & ANCHOR_BEGIN_BUF) {
5565 fprintf(f, "begin-buf");
5566 q = 1;
5567 }
5568 if (anchor & ANCHOR_BEGIN_LINE) {
5569 if (q) fprintf(f, ", ");
5570 q = 1;
5571 fprintf(f, "begin-line");
5572 }
5573 if (anchor & ANCHOR_BEGIN_POSITION) {
5574 if (q) fprintf(f, ", ");
5575 q = 1;
5576 fprintf(f, "begin-pos");
5577 }
5578 if (anchor & ANCHOR_END_BUF) {
5579 if (q) fprintf(f, ", ");
5580 q = 1;
5581 fprintf(f, "end-buf");
5582 }
5583 if (anchor & ANCHOR_SEMI_END_BUF) {
5584 if (q) fprintf(f, ", ");
5585 q = 1;
5586 fprintf(f, "semi-end-buf");
5587 }
5588 if (anchor & ANCHOR_END_LINE) {
5589 if (q) fprintf(f, ", ");
5590 q = 1;
5591 fprintf(f, "end-line");
5592 }
5593 if (anchor & ANCHOR_ANYCHAR_STAR) {
5594 if (q) fprintf(f, ", ");
5595 q = 1;
5596 fprintf(f, "anychar-star");
5597 }
5598 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5599 if (q) fprintf(f, ", ");
5600 fprintf(f, "anychar-star-ml");
5601 }
5602
5603 fprintf(f, "]");
5604}
5605
5606static void
5607print_optimize_info(FILE* f, regex_t* reg)
5608{
5609 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5610 "EXACT_IC", "MAP",
5611 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5612
5613 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5614 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5615 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5616 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5617 fprintf(f, "\n");
5618
5619 if (reg->optimize) {
5620 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5621 fprintf(f, "\n");
5622 }
5623 fprintf(f, "\n");
5624
5625 if (reg->exact) {
5626 UChar *p;
5627 fprintf(f, "exact: [");
5628 for (p = reg->exact; p < reg->exact_end; p++) {
5629 fputc(*p, f);
5630 }
5631 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5632 }
5633 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5634 int c, i, n = 0;
5635
5636 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5637 if (reg->map[i]) n++;
5638
5639 fprintf(f, "map: n=%d\n", n);
5640 if (n > 0) {
5641 c = 0;
5642 fputc('[', f);
5643 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5644 if (reg->map[i] != 0) {
5645 if (c > 0) fputs(", ", f);
5646 c++;
5647 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5648 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5649 fputc(i, f);
5650 else
5651 fprintf(f, "%d", i);
5652 }
5653 }
5654 fprintf(f, "]\n");
5655 }
5656 }
5657}
5658#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5659
5660
5661extern void
5662onig_free_body(regex_t* reg)
5663{
5664 if (IS_NOT_NULL(reg)) {
5665 xfree(reg->p);
5666 xfree(reg->exact);
5667 xfree(reg->int_map);
5668 xfree(reg->int_map_backward);
5669 xfree(reg->repeat_range);
5670 onig_free(reg->chain);
5671
5672#ifdef USE_NAMED_GROUP
5673 onig_names_free(reg);
5674#endif
5675 }
5676}
5677
5678extern void
5679onig_free(regex_t* reg)
5680{
5681 if (IS_NOT_NULL(reg)) {
5682 onig_free_body(reg);
5683 xfree(reg);
5684 }
5685}
5686
5687static void*
5688dup_copy(const void *ptr, size_t size)
5689{
5690 void *newptr = xmalloc(size);
5691 if (IS_NOT_NULL(newptr)) {
5692 memcpy(newptr, ptr, size);
5693 }
5694 return newptr;
5695}
5696
5697extern int
5698onig_reg_copy(regex_t** nreg, regex_t* oreg)
5699{
5700 if (IS_NOT_NULL(oreg)) {
5701 regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
5702 if (IS_NULL(reg)) return ONIGERR_MEMORY;
5703
5704 *reg = *oreg;
5705
5706# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5707
5708 if (IS_NOT_NULL(reg->exact)) {
5709 size_t exact_size = reg->exact_end - reg->exact;
5710 if (COPY_FAILED(exact, exact_size))
5711 goto err;
5712 (reg)->exact_end = (reg)->exact + exact_size;
5713 }
5714
5715 if (IS_NOT_NULL(reg->int_map)) {
5716 if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5717 goto err_int_map;
5718 }
5719 if (IS_NOT_NULL(reg->int_map_backward)) {
5720 if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5721 goto err_int_map_backward;
5722 }
5723 if (IS_NOT_NULL(reg->p)) {
5724 if (COPY_FAILED(p, reg->alloc))
5725 goto err_p;
5726 }
5727 if (IS_NOT_NULL(reg->repeat_range)) {
5728 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
5729 goto err_repeat_range;
5730 }
5731 if (IS_NOT_NULL(reg->name_table)) {
5732 if (onig_names_copy(reg, oreg))
5733 goto err_name_table;
5734 }
5735 if (IS_NOT_NULL(reg->chain)) {
5736 if (onig_reg_copy(&reg->chain, reg->chain))
5737 goto err_chain;
5738 }
5739 return 0;
5740# undef COPY_FAILED
5741
5742 err_chain:
5743 onig_names_free(reg);
5744 err_name_table:
5745 xfree(reg->repeat_range);
5746 err_repeat_range:
5747 xfree(reg->p);
5748 err_p:
5749 xfree(reg->int_map_backward);
5750 err_int_map_backward:
5751 xfree(reg->int_map);
5752 err_int_map:
5753 xfree(reg->exact);
5754 err:
5755 xfree(reg);
5756 return ONIGERR_MEMORY;
5757 }
5758 return 0;
5759}
5760
5761#ifdef RUBY
5762size_t
5763onig_memsize(const regex_t *reg)
5764{
5765 size_t size = sizeof(regex_t);
5766 if (IS_NULL(reg)) return 0;
5767 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5768 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5769 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5770 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5771 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5772 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5773
5774 return size;
5775}
5776
5777size_t
5778onig_region_memsize(const OnigRegion *regs)
5779{
5780 size_t size = sizeof(*regs);
5781 if (IS_NULL(regs)) return 0;
5782 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5783 return size;
5784}
5785#endif
5786
5787#define REGEX_TRANSFER(to,from) do {\
5788 onig_free_body(to);\
5789 xmemcpy(to, from, sizeof(regex_t));\
5790 xfree(from);\
5791} while (0)
5792
5793#if 0
5794extern void
5795onig_transfer(regex_t* to, regex_t* from)
5796{
5797 REGEX_TRANSFER(to, from);
5798}
5799#endif
5800
5801#ifdef ONIG_DEBUG_COMPILE
5802static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5803#endif
5804#ifdef ONIG_DEBUG_PARSE_TREE
5805static void print_tree(FILE* f, Node* node);
5806#endif
5807
5808#ifdef RUBY
5809extern int
5810onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5811 OnigErrorInfo* einfo)
5812{
5813 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5814}
5815#endif
5816
5817#ifdef RUBY
5818extern int
5819onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5820 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5821#else
5822extern int
5823onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5824 OnigErrorInfo* einfo)
5825#endif
5826{
5827#define COMPILE_INIT_SIZE 20
5828
5829 int r;
5830 OnigDistance init_size;
5831 Node* root;
5832 ScanEnv scan_env = {0};
5833#ifdef USE_SUBEXP_CALL
5834 UnsetAddrList uslist;
5835#endif
5836
5837 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5838
5839#ifdef RUBY
5840 scan_env.sourcefile = sourcefile;
5841 scan_env.sourceline = sourceline;
5842#endif
5843
5844#ifdef ONIG_DEBUG
5845 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5846#endif
5847
5848 if (reg->alloc == 0) {
5849 init_size = (pattern_end - pattern) * 2;
5850 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5851 r = BBUF_INIT(reg, init_size);
5852 if (r != 0) goto end;
5853 }
5854 else
5855 reg->used = 0;
5856
5857 reg->num_mem = 0;
5858 reg->num_repeat = 0;
5859 reg->num_null_check = 0;
5860 reg->repeat_range_alloc = 0;
5861 reg->repeat_range = (OnigRepeatRange* )NULL;
5862#ifdef USE_COMBINATION_EXPLOSION_CHECK
5863 reg->num_comb_exp_check = 0;
5864#endif
5865
5866 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5867 if (r != 0) goto err;
5868
5869#ifdef ONIG_DEBUG_PARSE_TREE
5870# if 0
5871 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5872 print_tree(stderr, root);
5873# endif
5874#endif
5875
5876#ifdef USE_NAMED_GROUP
5877 /* mixed use named group and no-named group */
5878 if (scan_env.num_named > 0 &&
5879 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5880 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5881 if (scan_env.num_named != scan_env.num_mem)
5882 r = disable_noname_group_capture(&root, reg, &scan_env);
5883 else
5884 r = numbered_ref_check(root);
5885
5886 if (r != 0) goto err;
5887 }
5888#endif
5889
5890#ifdef USE_SUBEXP_CALL
5891 if (scan_env.num_call > 0) {
5892 r = unset_addr_list_init(&uslist, scan_env.num_call);
5893 if (r != 0) goto err;
5894 scan_env.unset_addr_list = &uslist;
5895 r = setup_subexp_call(root, &scan_env);
5896 if (r != 0) goto err_unset;
5897 r = subexp_recursive_check_trav(root, &scan_env);
5898 if (r < 0) goto err_unset;
5899 r = subexp_inf_recursive_check_trav(root, &scan_env);
5900 if (r != 0) goto err_unset;
5901
5902 reg->num_call = scan_env.num_call;
5903 }
5904 else
5905 reg->num_call = 0;
5906#endif
5907
5908 r = setup_tree(root, reg, 0, &scan_env);
5909 if (r != 0) goto err_unset;
5910
5911#ifdef ONIG_DEBUG_PARSE_TREE
5912 print_tree(stderr, root);
5913#endif
5914
5915 reg->capture_history = scan_env.capture_history;
5916 reg->bt_mem_start = scan_env.bt_mem_start;
5917 reg->bt_mem_start |= reg->capture_history;
5918 if (IS_FIND_CONDITION(reg->options))
5919 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5920 else {
5921 reg->bt_mem_end = scan_env.bt_mem_end;
5922 reg->bt_mem_end |= reg->capture_history;
5923 }
5924
5925#ifdef USE_COMBINATION_EXPLOSION_CHECK
5926 if (scan_env.backrefed_mem == 0
5927# ifdef USE_SUBEXP_CALL
5928 || scan_env.num_call == 0
5929# endif
5930 ) {
5931 setup_comb_exp_check(root, 0, &scan_env);
5932# ifdef USE_SUBEXP_CALL
5933 if (scan_env.has_recursion != 0) {
5934 scan_env.num_comb_exp_check = 0;
5935 }
5936 else
5937# endif
5938 if (scan_env.comb_exp_max_regnum > 0) {
5939 int i;
5940 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5941 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5942 scan_env.num_comb_exp_check = 0;
5943 break;
5944 }
5945 }
5946 }
5947 }
5948
5949 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5950#endif
5951
5952 clear_optimize_info(reg);
5953#ifndef ONIG_DONT_OPTIMIZE
5954 r = set_optimize_info_from_tree(root, reg, &scan_env);
5955 if (r != 0) goto err_unset;
5956#endif
5957
5958 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5959 xfree(scan_env.mem_nodes_dynamic);
5960 scan_env.mem_nodes_dynamic = (Node** )NULL;
5961 }
5962
5963 r = compile_tree(root, reg);
5964 if (r == 0) {
5965 r = add_opcode(reg, OP_END);
5966#ifdef USE_SUBEXP_CALL
5967 if (scan_env.num_call > 0) {
5968 r = unset_addr_list_fix(&uslist, reg);
5969 unset_addr_list_end(&uslist);
5970 if (r) goto err;
5971 }
5972#endif
5973
5974 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5975 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5976 else {
5977 if (reg->bt_mem_start != 0)
5978 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5979 else
5980 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5981 }
5982 }
5983#ifdef USE_SUBEXP_CALL
5984 else if (scan_env.num_call > 0) {
5985 unset_addr_list_end(&uslist);
5986 }
5987#endif
5988 onig_node_free(root);
5989
5990#ifdef ONIG_DEBUG_COMPILE
5991# ifdef USE_NAMED_GROUP
5992 onig_print_names(stderr, reg);
5993# endif
5994 print_compiled_byte_code_list(stderr, reg);
5995#endif
5996
5997 end:
5998 onig_reg_resize(reg);
5999 return r;
6000
6001 err_unset:
6002#ifdef USE_SUBEXP_CALL
6003 if (scan_env.num_call > 0) {
6004 unset_addr_list_end(&uslist);
6005 }
6006#endif
6007 err:
6008 if (IS_NOT_NULL(scan_env.error)) {
6009 if (IS_NOT_NULL(einfo)) {
6010 einfo->enc = scan_env.enc;
6011 einfo->par = scan_env.error;
6012 einfo->par_end = scan_env.error_end;
6013 }
6014 }
6015
6016 onig_node_free(root);
6017 xfree(scan_env.mem_nodes_dynamic);
6018
6019 return r;
6020}
6021
6022static int onig_inited = 0;
6023
6024extern int
6025onig_reg_init(regex_t* reg, OnigOptionType option,
6026 OnigCaseFoldType case_fold_flag,
6027 OnigEncoding enc, const OnigSyntaxType* syntax)
6028{
6029 if (! onig_inited)
6030 onig_init();
6031
6032 if (IS_NULL(reg))
6033 return ONIGERR_INVALID_ARGUMENT;
6034
6035 if (ONIGENC_IS_UNDEF(enc))
6036 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
6037
6038 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6039 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6040 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6041 }
6042
6043 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6044 option |= syntax->options;
6045 option &= ~ONIG_OPTION_SINGLELINE;
6046 }
6047 else
6048 option |= syntax->options;
6049
6050 (reg)->enc = enc;
6051 (reg)->options = option;
6052 (reg)->syntax = syntax;
6053 (reg)->optimize = 0;
6054 (reg)->exact = (UChar* )NULL;
6055 (reg)->int_map = (int* )NULL;
6056 (reg)->int_map_backward = (int* )NULL;
6057 (reg)->chain = (regex_t* )NULL;
6058
6059 (reg)->p = (UChar* )NULL;
6060 (reg)->alloc = 0;
6061 (reg)->used = 0;
6062 (reg)->name_table = (void* )NULL;
6063
6064 (reg)->case_fold_flag = case_fold_flag;
6065
6066 (reg)->timelimit = 0;
6067
6068 return 0;
6069}
6070
6071extern int
6072onig_new_without_alloc(regex_t* reg, const UChar* pattern,
6073 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6074 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6075{
6076 int r;
6077
6078 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6079 if (r) return r;
6080
6081 r = onig_compile(reg, pattern, pattern_end, einfo);
6082 return r;
6083}
6084
6085extern int
6086onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6087 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6088 OnigErrorInfo* einfo)
6089{
6090 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6091 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6092
6093 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6094 if (r) {
6095 onig_free(*reg);
6096 *reg = NULL;
6097 }
6098
6099 return r;
6100}
6101
6102extern int
6103onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6104{
6105 return onig_init();
6106}
6107
6108extern int
6109onig_init(void)
6110{
6111 if (onig_inited != 0)
6112 return 0;
6113
6114 onig_inited = 1;
6115
6116#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6117 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6118#endif
6119
6120 onigenc_init();
6121 /* onigenc_set_default_caseconv_table((UChar* )0); */
6122
6123#ifdef ONIG_DEBUG_STATISTICS
6124 onig_statistics_init();
6125#endif
6126
6127 return 0;
6128}
6129
6130
6131static OnigEndCallListItemType* EndCallTop;
6132
6133extern void onig_add_end_call(void (*func)(void))
6134{
6136
6137 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6138 if (item == 0) return ;
6139
6140 item->next = EndCallTop;
6141 item->func = func;
6142
6143 EndCallTop = item;
6144}
6145
6146static void
6147exec_end_call_list(void)
6148{
6150 void (*func)(void);
6151
6152 while (EndCallTop != 0) {
6153 func = EndCallTop->func;
6154 (*func)();
6155
6156 prev = EndCallTop;
6157 EndCallTop = EndCallTop->next;
6158 xfree(prev);
6159 }
6160}
6161
6162extern int
6163onig_end(void)
6164{
6165 exec_end_call_list();
6166
6167#ifdef ONIG_DEBUG_STATISTICS
6168 onig_print_statistics(stderr);
6169#endif
6170
6171#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6172 _CrtDumpMemoryLeaks();
6173#endif
6174
6175 onig_inited = 0;
6176
6177 return 0;
6178}
6179
6180extern int
6181onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6182{
6183 OnigCodePoint n, *data;
6184 OnigCodePoint low, high, x;
6185
6186 GET_CODE_POINT(n, p);
6187 data = (OnigCodePoint* )p;
6188 data++;
6189
6190 for (low = 0, high = n; low < high; ) {
6191 x = (low + high) >> 1;
6192 if (code > data[x * 2 + 1])
6193 low = x + 1;
6194 else
6195 high = x;
6196 }
6197
6198 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6199}
6200
6201extern int
6202onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6203{
6204 int found;
6205
6206 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6207 if (IS_NULL(cc->mbuf)) {
6208 found = 0;
6209 }
6210 else {
6211 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6212 }
6213 }
6214 else {
6215 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6216 }
6217
6218 if (IS_NCCLASS_NOT(cc))
6219 return !found;
6220 else
6221 return found;
6222}
6223
6224extern int
6225onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6226{
6227 int len;
6228
6229 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6230 len = 2;
6231 }
6232 else {
6233 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6234 }
6235 return onig_is_code_in_cc_len(len, code, cc);
6236}
6237
6238
6239#ifdef ONIG_DEBUG
6240
6241/* arguments type */
6242# define ARG_SPECIAL -1
6243# define ARG_NON 0
6244# define ARG_RELADDR 1
6245# define ARG_ABSADDR 2
6246# define ARG_LENGTH 3
6247# define ARG_MEMNUM 4
6248# define ARG_OPTION 5
6249# define ARG_STATE_CHECK 6
6250
6251OnigOpInfoType OnigOpInfo[] = {
6252 { OP_FINISH, "finish", ARG_NON },
6253 { OP_END, "end", ARG_NON },
6254 { OP_EXACT1, "exact1", ARG_SPECIAL },
6255 { OP_EXACT2, "exact2", ARG_SPECIAL },
6256 { OP_EXACT3, "exact3", ARG_SPECIAL },
6257 { OP_EXACT4, "exact4", ARG_SPECIAL },
6258 { OP_EXACT5, "exact5", ARG_SPECIAL },
6259 { OP_EXACTN, "exactn", ARG_SPECIAL },
6260 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6261 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6262 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6263 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6264 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6265 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6266 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6267 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6268 { OP_CCLASS, "cclass", ARG_SPECIAL },
6269 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6270 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6271 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6272 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6273 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6274 { OP_ANYCHAR, "anychar", ARG_NON },
6275 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6276 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6277 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6278 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6279 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6280 { OP_WORD, "word", ARG_NON },
6281 { OP_NOT_WORD, "not-word", ARG_NON },
6282 { OP_WORD_BOUND, "word-bound", ARG_NON },
6283 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6284 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6285 { OP_WORD_END, "word-end", ARG_NON },
6286 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6287 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6288 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6289 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6290 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6291 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6292 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6293 { OP_END_BUF, "end-buf", ARG_NON },
6294 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6295 { OP_END_LINE, "end-line", ARG_NON },
6296 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6297 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6298 { OP_BACKREF1, "backref1", ARG_NON },
6299 { OP_BACKREF2, "backref2", ARG_NON },
6300 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6301 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6302 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6303 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6304 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6305 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6306 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6307 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6308 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6309 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6310 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6311 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6312 { OP_SET_OPTION, "set-option", ARG_OPTION },
6313 { OP_KEEP, "keep", ARG_NON },
6314 { OP_FAIL, "fail", ARG_NON },
6315 { OP_JUMP, "jump", ARG_RELADDR },
6316 { OP_PUSH, "push", ARG_RELADDR },
6317 { OP_POP, "pop", ARG_NON },
6318 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6319 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6320 { OP_REPEAT, "repeat", ARG_SPECIAL },
6321 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6322 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6323 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6324 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6325 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6326 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6327 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6328 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6329 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6330 { OP_PUSH_POS, "push-pos", ARG_NON },
6331 { OP_POP_POS, "pop-pos", ARG_NON },
6332 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6333 { OP_FAIL_POS, "fail-pos", ARG_NON },
6334 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6335 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6336 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6337 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6338 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6339 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6340 { OP_ABSENT, "absent", ARG_RELADDR },
6341 { OP_ABSENT_END, "absent-end", ARG_NON },
6342 { OP_CALL, "call", ARG_ABSADDR },
6343 { OP_RETURN, "return", ARG_NON },
6344 { OP_CONDITION, "condition", ARG_SPECIAL },
6345 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6346 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6347 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6348 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6349 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6350 "state-check-anychar-ml*", ARG_STATE_CHECK },
6351 { -1, "", ARG_NON }
6352};
6353
6354static const char*
6355op2name(int opcode)
6356{
6357 int i;
6358
6359 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6360 if (opcode == OnigOpInfo[i].opcode)
6361 return OnigOpInfo[i].name;
6362 }
6363 return "";
6364}
6365
6366static int
6367op2arg_type(int opcode)
6368{
6369 int i;
6370
6371 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6372 if (opcode == OnigOpInfo[i].opcode)
6373 return OnigOpInfo[i].arg_type;
6374 }
6375 return ARG_SPECIAL;
6376}
6377
6378# ifdef ONIG_DEBUG_PARSE_TREE
6379static void
6380Indent(FILE* f, int indent)
6381{
6382 int i;
6383 for (i = 0; i < indent; i++) putc(' ', f);
6384}
6385# endif /* ONIG_DEBUG_PARSE_TREE */
6386
6387static void
6388p_string(FILE* f, ptrdiff_t len, UChar* s)
6389{
6390 fputs(":", f);
6391 while (len-- > 0) { fputc(*s++, f); }
6392}
6393
6394static void
6395p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6396{
6397 int x = len * mb_len;
6398
6399 fprintf(f, ":%d:", len);
6400 while (x-- > 0) { fputc(*s++, f); }
6401}
6402
6403extern void
6404onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6405 OnigEncoding enc)
6406{
6407 int i, n, arg_type;
6408 RelAddrType addr;
6409 LengthType len;
6410 MemNumType mem;
6411 StateCheckNumType scn;
6412 OnigCodePoint code;
6413 UChar *q;
6414
6415 fprintf(f, "[%s", op2name(*bp));
6416 arg_type = op2arg_type(*bp);
6417 if (arg_type != ARG_SPECIAL) {
6418 bp++;
6419 switch (arg_type) {
6420 case ARG_NON:
6421 break;
6422 case ARG_RELADDR:
6423 GET_RELADDR_INC(addr, bp);
6424 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6425 break;
6426 case ARG_ABSADDR:
6427 GET_ABSADDR_INC(addr, bp);
6428 fprintf(f, ":(%d)", addr);
6429 break;
6430 case ARG_LENGTH:
6431 GET_LENGTH_INC(len, bp);
6432 fprintf(f, ":%d", len);
6433 break;
6434 case ARG_MEMNUM:
6435 mem = *((MemNumType* )bp);
6436 bp += SIZE_MEMNUM;
6437 fprintf(f, ":%d", mem);
6438 break;
6439 case ARG_OPTION:
6440 {
6441 OnigOptionType option = *((OnigOptionType* )bp);
6442 bp += SIZE_OPTION;
6443 fprintf(f, ":%d", option);
6444 }
6445 break;
6446
6447 case ARG_STATE_CHECK:
6448 scn = *((StateCheckNumType* )bp);
6449 bp += SIZE_STATE_CHECK_NUM;
6450 fprintf(f, ":%d", scn);
6451 break;
6452 }
6453 }
6454 else {
6455 switch (*bp++) {
6456 case OP_EXACT1:
6457 case OP_ANYCHAR_STAR_PEEK_NEXT:
6458 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6459 p_string(f, 1, bp++); break;
6460 case OP_EXACT2:
6461 p_string(f, 2, bp); bp += 2; break;
6462 case OP_EXACT3:
6463 p_string(f, 3, bp); bp += 3; break;
6464 case OP_EXACT4:
6465 p_string(f, 4, bp); bp += 4; break;
6466 case OP_EXACT5:
6467 p_string(f, 5, bp); bp += 5; break;
6468 case OP_EXACTN:
6469 GET_LENGTH_INC(len, bp);
6470 p_len_string(f, len, 1, bp);
6471 bp += len;
6472 break;
6473
6474 case OP_EXACTMB2N1:
6475 p_string(f, 2, bp); bp += 2; break;
6476 case OP_EXACTMB2N2:
6477 p_string(f, 4, bp); bp += 4; break;
6478 case OP_EXACTMB2N3:
6479 p_string(f, 6, bp); bp += 6; break;
6480 case OP_EXACTMB2N:
6481 GET_LENGTH_INC(len, bp);
6482 p_len_string(f, len, 2, bp);
6483 bp += len * 2;
6484 break;
6485 case OP_EXACTMB3N:
6486 GET_LENGTH_INC(len, bp);
6487 p_len_string(f, len, 3, bp);
6488 bp += len * 3;
6489 break;
6490 case OP_EXACTMBN:
6491 {
6492 int mb_len;
6493
6494 GET_LENGTH_INC(mb_len, bp);
6495 GET_LENGTH_INC(len, bp);
6496 fprintf(f, ":%d:%d:", mb_len, len);
6497 n = len * mb_len;
6498 while (n-- > 0) { fputc(*bp++, f); }
6499 }
6500 break;
6501
6502 case OP_EXACT1_IC:
6503 len = enclen(enc, bp, bpend);
6504 p_string(f, len, bp);
6505 bp += len;
6506 break;
6507 case OP_EXACTN_IC:
6508 GET_LENGTH_INC(len, bp);
6509 p_len_string(f, len, 1, bp);
6510 bp += len;
6511 break;
6512
6513 case OP_CCLASS:
6514 n = bitset_on_num((BitSetRef )bp);
6515 bp += SIZE_BITSET;
6516 fprintf(f, ":%d", n);
6517 break;
6518
6519 case OP_CCLASS_NOT:
6520 n = bitset_on_num((BitSetRef )bp);
6521 bp += SIZE_BITSET;
6522 fprintf(f, ":%d", n);
6523 break;
6524
6525 case OP_CCLASS_MB:
6526 case OP_CCLASS_MB_NOT:
6527 GET_LENGTH_INC(len, bp);
6528 q = bp;
6529# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6530 ALIGNMENT_RIGHT(q);
6531# endif
6532 GET_CODE_POINT(code, q);
6533 bp += len;
6534 fprintf(f, ":%d:%d", (int )code, len);
6535 break;
6536
6537 case OP_CCLASS_MIX:
6538 case OP_CCLASS_MIX_NOT:
6539 n = bitset_on_num((BitSetRef )bp);
6540 bp += SIZE_BITSET;
6541 GET_LENGTH_INC(len, bp);
6542 q = bp;
6543# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6544 ALIGNMENT_RIGHT(q);
6545# endif
6546 GET_CODE_POINT(code, q);
6547 bp += len;
6548 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6549 break;
6550
6551 case OP_BACKREFN_IC:
6552 mem = *((MemNumType* )bp);
6553 bp += SIZE_MEMNUM;
6554 fprintf(f, ":%d", mem);
6555 break;
6556
6557 case OP_BACKREF_MULTI_IC:
6558 case OP_BACKREF_MULTI:
6559 fputs(" ", f);
6560 GET_LENGTH_INC(len, bp);
6561 for (i = 0; i < len; i++) {
6562 GET_MEMNUM_INC(mem, bp);
6563 if (i > 0) fputs(", ", f);
6564 fprintf(f, "%d", mem);
6565 }
6566 break;
6567
6568 case OP_BACKREF_WITH_LEVEL:
6569 {
6570 OnigOptionType option;
6571 LengthType level;
6572
6573 GET_OPTION_INC(option, bp);
6574 fprintf(f, ":%d", option);
6575 GET_LENGTH_INC(level, bp);
6576 fprintf(f, ":%d", level);
6577
6578 fputs(" ", f);
6579 GET_LENGTH_INC(len, bp);
6580 for (i = 0; i < len; i++) {
6581 GET_MEMNUM_INC(mem, bp);
6582 if (i > 0) fputs(", ", f);
6583 fprintf(f, "%d", mem);
6584 }
6585 }
6586 break;
6587
6588 case OP_REPEAT:
6589 case OP_REPEAT_NG:
6590 {
6591 mem = *((MemNumType* )bp);
6592 bp += SIZE_MEMNUM;
6593 addr = *((RelAddrType* )bp);
6594 bp += SIZE_RELADDR;
6595 fprintf(f, ":%d:%d", mem, addr);
6596 }
6597 break;
6598
6599 case OP_PUSH_OR_JUMP_EXACT1:
6600 case OP_PUSH_IF_PEEK_NEXT:
6601 addr = *((RelAddrType* )bp);
6602 bp += SIZE_RELADDR;
6603 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6604 p_string(f, 1, bp);
6605 bp += 1;
6606 break;
6607
6608 case OP_LOOK_BEHIND:
6609 GET_LENGTH_INC(len, bp);
6610 fprintf(f, ":%d", len);
6611 break;
6612
6613 case OP_PUSH_LOOK_BEHIND_NOT:
6614 GET_RELADDR_INC(addr, bp);
6615 GET_LENGTH_INC(len, bp);
6616 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6617 break;
6618
6619 case OP_STATE_CHECK_PUSH:
6620 case OP_STATE_CHECK_PUSH_OR_JUMP:
6621 scn = *((StateCheckNumType* )bp);
6622 bp += SIZE_STATE_CHECK_NUM;
6623 addr = *((RelAddrType* )bp);
6624 bp += SIZE_RELADDR;
6625 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6626 break;
6627
6628 case OP_CONDITION:
6629 GET_MEMNUM_INC(mem, bp);
6630 GET_RELADDR_INC(addr, bp);
6631 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6632 break;
6633
6634 default:
6635 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6636 bp[-1]);
6637 }
6638 }
6639 fputs("]", f);
6640 if (nextp) *nextp = bp;
6641}
6642
6643# ifdef ONIG_DEBUG_COMPILE
6644static void
6645print_compiled_byte_code_list(FILE* f, regex_t* reg)
6646{
6647 int ncode;
6648 UChar* bp = reg->p;
6649 UChar* end = reg->p + reg->used;
6650
6651 fprintf(f, "code length: %d", reg->used);
6652
6653 ncode = -1;
6654 while (bp < end) {
6655 ncode++;
6656 if (ncode % 5 == 0)
6657 fprintf(f, "\n%ld:", bp - reg->p);
6658 else
6659 fprintf(f, " %ld:", bp - reg->p);
6660 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6661 }
6662
6663 fprintf(f, "\n");
6664}
6665# endif /* ONIG_DEBUG_COMPILE */
6666
6667# ifdef ONIG_DEBUG_PARSE_TREE
6668static void
6669print_indent_tree(FILE* f, Node* node, int indent)
6670{
6671 int i, type, container_p = 0;
6672 int add = 3;
6673 UChar* p;
6674
6675 Indent(f, indent);
6676 if (IS_NULL(node)) {
6677 fprintf(f, "ERROR: null node!!!\n");
6678 exit (0);
6679 }
6680
6681 type = NTYPE(node);
6682 switch (type) {
6683 case NT_LIST:
6684 case NT_ALT:
6685 if (NTYPE(node) == NT_LIST)
6686 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6687 else
6688 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6689
6690 print_indent_tree(f, NCAR(node), indent + add);
6691 while (IS_NOT_NULL(node = NCDR(node))) {
6692 if (NTYPE(node) != type) {
6693 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6694 exit(0);
6695 }
6696 print_indent_tree(f, NCAR(node), indent + add);
6697 }
6698 break;
6699
6700 case NT_STR:
6701 fprintf(f, "<string%s:%"PRIxPTR">",
6702 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6703 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6704 if (*p >= 0x20 && *p < 0x7f)
6705 fputc(*p, f);
6706 else {
6707 fprintf(f, " 0x%02x", *p);
6708 }
6709 }
6710 break;
6711
6712 case NT_CCLASS:
6713 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6714 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6715 if (NCCLASS(node)->mbuf) {
6716 BBuf* bbuf = NCCLASS(node)->mbuf;
6717 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6718 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6719 fprintf(f, "%d", *data++);
6720 for (; data < end; data+=2) {
6721 fprintf(f, ",");
6722 fprintf(f, "%04x-%04x", data[0], data[1]);
6723 }
6724 }
6725 break;
6726
6727 case NT_CTYPE:
6728 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6729 switch (NCTYPE(node)->ctype) {
6730 case ONIGENC_CTYPE_WORD:
6731 if (NCTYPE(node)->not != 0)
6732 fputs("not word", f);
6733 else
6734 fputs("word", f);
6735 break;
6736
6737 default:
6738 fprintf(f, "ERROR: undefined ctype.\n");
6739 exit(0);
6740 }
6741 break;
6742
6743 case NT_CANY:
6744 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6745 break;
6746
6747 case NT_ANCHOR:
6748 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6749 switch (NANCHOR(node)->type) {
6750 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6751 case ANCHOR_END_BUF: fputs("end buf", f); break;
6752 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6753 case ANCHOR_END_LINE: fputs("end line", f); break;
6754 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6755 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6756
6757 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6758 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6759# ifdef USE_WORD_BEGIN_END
6760 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6761 case ANCHOR_WORD_END: fputs("word end", f); break;
6762# endif
6763 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6764 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6765 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6766 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6767 case ANCHOR_KEEP: fputs("keep",f); break;
6768
6769 default:
6770 fprintf(f, "ERROR: undefined anchor type.\n");
6771 break;
6772 }
6773 break;
6774
6775 case NT_BREF:
6776 {
6777 int* p;
6778 BRefNode* br = NBREF(node);
6779 p = BACKREFS_P(br);
6780 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6781 for (i = 0; i < br->back_num; i++) {
6782 if (i > 0) fputs(", ", f);
6783 fprintf(f, "%d", p[i]);
6784 }
6785 }
6786 break;
6787
6788# ifdef USE_SUBEXP_CALL
6789 case NT_CALL:
6790 {
6791 CallNode* cn = NCALL(node);
6792 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6793 p_string(f, cn->name_end - cn->name, cn->name);
6794 }
6795 break;
6796# endif
6797
6798 case NT_QTFR:
6799 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6800 NQTFR(node)->lower, NQTFR(node)->upper,
6801 (NQTFR(node)->greedy ? "" : "?"));
6802 print_indent_tree(f, NQTFR(node)->target, indent + add);
6803 break;
6804
6805 case NT_ENCLOSE:
6806 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6807 switch (NENCLOSE(node)->type) {
6808 case ENCLOSE_OPTION:
6809 fprintf(f, "option:%d", NENCLOSE(node)->option);
6810 break;
6811 case ENCLOSE_MEMORY:
6812 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6813 break;
6814 case ENCLOSE_STOP_BACKTRACK:
6815 fprintf(f, "stop-bt");
6816 break;
6817 case ENCLOSE_CONDITION:
6818 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6819 break;
6820 case ENCLOSE_ABSENT:
6821 fprintf(f, "absent");
6822 break;
6823
6824 default:
6825 break;
6826 }
6827 fprintf(f, "\n");
6828 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6829 break;
6830
6831 default:
6832 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6833 break;
6834 }
6835
6836 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6837 type != NT_ENCLOSE)
6838 fprintf(f, "\n");
6839
6840 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6841
6842 fflush(f);
6843}
6844
6845static void
6846print_tree(FILE* f, Node* node)
6847{
6848 print_indent_tree(f, node, 0);
6849}
6850# endif /* ONIG_DEBUG_PARSE_TREE */
6851#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.