To: vim_dev@googlegroups.com Subject: Patch 7.3.1073 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1073 Problem: New regexp engine may run out of states. Solution: Allocate states dynamically. Also make the test report errors. Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, src/testdir/test95.in *** ../vim-7.3.1072/src/regexp_nfa.c 2013-05-30 17:49:19.000000000 +0200 --- src/regexp_nfa.c 2013-05-30 18:36:12.000000000 +0200 *************** *** 233,239 **** /* helper functions used when doing re2post() ... regatom() parsing */ #define EMIT(c) do { \ ! if (post_ptr >= post_end) \ return FAIL; \ *post_ptr++ = c; \ } while (0) --- 233,239 ---- /* helper functions used when doing re2post() ... regatom() parsing */ #define EMIT(c) do { \ ! if (post_ptr >= post_end && realloc_post_list() == FAIL) \ return FAIL; \ *post_ptr++ = c; \ } while (0) *************** *** 256,266 **** nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; /* Some items blow up in size, such as [A-z]. Add more space for that. ! * TODO: some patterns may still fail. */ nstate_max += 1000; /* Size for postfix representation of expr. */ ! postfix_size = sizeof(*post_start) * nstate_max; post_start = (int *)lalloc(postfix_size, TRUE); if (post_start == NULL) --- 256,266 ---- nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; /* Some items blow up in size, such as [A-z]. Add more space for that. ! * When it is still not enough realloc_post_list() will be used. */ nstate_max += 1000; /* Size for postfix representation of expr. */ ! postfix_size = sizeof(int) * nstate_max; post_start = (int *)lalloc(postfix_size, TRUE); if (post_start == NULL) *************** *** 277,282 **** --- 277,307 ---- } /* + * Allocate more space for post_start. Called when + * running above the estimated number of states. + */ + static int + realloc_post_list() + { + int nstate_max = post_end - post_start; + int new_max = nstate_max + 1000; + int *new_start; + int *old_start; + + new_start = (int *)lalloc(new_max * sizeof(int), TRUE); + if (new_start == NULL) + return FAIL; + mch_memmove(new_start, post_start, nstate_max * sizeof(int)); + vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int)); + old_start = post_start; + post_start = new_start; + post_ptr = new_start + (post_ptr - old_start); + post_end = post_start + new_max; + vim_free(old_start); + return OK; + } + + /* * Search between "start" and "end" and try to recognize a * character class in expanded form. For example [0-9]. * On success, return the id the character class to be emitted. *************** *** 1306,1312 **** int greedy = TRUE; /* Braces are prefixed with '-' ? */ char_u *old_regparse, *new_regparse; int c2; ! int *old_post_ptr, *my_post_start; int old_regnpar; int quest; --- 1331,1338 ---- int greedy = TRUE; /* Braces are prefixed with '-' ? */ char_u *old_regparse, *new_regparse; int c2; ! int old_post_pos; ! int my_post_start; int old_regnpar; int quest; *************** *** 1317,1323 **** * {m,n} is next */ old_regnpar = regnpar; /* store current pos in the postfix form, for \{m,n} involving 0s */ ! my_post_start = post_ptr; ret = nfa_regatom(); if (ret == FAIL) --- 1343,1349 ---- * {m,n} is next */ old_regnpar = regnpar; /* store current pos in the postfix form, for \{m,n} involving 0s */ ! my_post_start = (int)(post_ptr - post_start); ret = nfa_regatom(); if (ret == FAIL) *************** *** 1430,1443 **** if (maxval == 0) { /* Ignore result of previous call to nfa_regatom() */ ! post_ptr = my_post_start; /* NFA_SKIP_CHAR has 0-length and works everywhere */ EMIT(NFA_SKIP_CHAR); return OK; } /* Ignore previous call to nfa_regatom() */ ! post_ptr = my_post_start; /* Save pos after the repeated atom and the \{} */ new_regparse = regparse; --- 1456,1469 ---- if (maxval == 0) { /* Ignore result of previous call to nfa_regatom() */ ! post_ptr = post_start + my_post_start; /* NFA_SKIP_CHAR has 0-length and works everywhere */ EMIT(NFA_SKIP_CHAR); return OK; } /* Ignore previous call to nfa_regatom() */ ! post_ptr = post_start + my_post_start; /* Save pos after the repeated atom and the \{} */ new_regparse = regparse; *************** *** 1449,1461 **** curchr = -1; /* Restore count of parenthesis */ regnpar = old_regnpar; ! old_post_ptr = post_ptr; if (nfa_regatom() == FAIL) return FAIL; /* after "minval" times, atoms are optional */ if (i + 1 > minval) EMIT(quest); ! if (old_post_ptr != my_post_start) EMIT(NFA_CONCAT); } --- 1475,1487 ---- curchr = -1; /* Restore count of parenthesis */ regnpar = old_regnpar; ! old_post_pos = (int)(post_ptr - post_start); if (nfa_regatom() == FAIL) return FAIL; /* after "minval" times, atoms are optional */ if (i + 1 > minval) EMIT(quest); ! if (old_post_pos != my_post_start) EMIT(NFA_CONCAT); } *************** *** 1572,1580 **** nfa_regbranch() { int ch; ! int *old_post_ptr; ! old_post_ptr = post_ptr; /* First branch, possibly the only one */ if (nfa_regconcat() == FAIL) --- 1598,1606 ---- nfa_regbranch() { int ch; ! int old_post_pos; ! old_post_pos = (int)(post_ptr - post_start); /* First branch, possibly the only one */ if (nfa_regconcat() == FAIL) *************** *** 1587,1604 **** skipchr(); EMIT(NFA_NOPEN); EMIT(NFA_PREV_ATOM_NO_WIDTH); ! old_post_ptr = post_ptr; if (nfa_regconcat() == FAIL) return FAIL; /* if concat is empty, skip a input char. But do emit a node */ ! if (old_post_ptr == post_ptr) EMIT(NFA_SKIP_CHAR); EMIT(NFA_CONCAT); ch = peekchr(); } /* Even if a branch is empty, emit one node for it */ ! if (old_post_ptr == post_ptr) EMIT(NFA_SKIP_CHAR); return OK; --- 1613,1630 ---- skipchr(); EMIT(NFA_NOPEN); EMIT(NFA_PREV_ATOM_NO_WIDTH); ! old_post_pos = (int)(post_ptr - post_start); if (nfa_regconcat() == FAIL) return FAIL; /* if concat is empty, skip a input char. But do emit a node */ ! if (old_post_pos == (int)(post_ptr - post_start)) EMIT(NFA_SKIP_CHAR); EMIT(NFA_CONCAT); ch = peekchr(); } /* Even if a branch is empty, emit one node for it */ ! if (old_post_pos == (int)(post_ptr - post_start)) EMIT(NFA_SKIP_CHAR); return OK; *** ../vim-7.3.1072/src/testdir/test64.in 2013-05-30 17:05:34.000000000 +0200 --- src/testdir/test64.in 2013-05-30 18:38:49.000000000 +0200 *************** *** 348,353 **** --- 348,356 ---- :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"]) :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"]) :" + :"""" Requiring lots of states. + :call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"]) + :" :" :"""" Run the tests :" *************** *** 361,367 **** : continue : endif : let ®expengine = engine ! : let l = matchlist(text, pat) :" check the match itself : if len(l) == 0 && len(t) > matchidx : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' --- 364,374 ---- : continue : endif : let ®expengine = engine ! : try ! : let l = matchlist(text, pat) ! : catch ! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' ! : endtry :" check the match itself : if len(l) == 0 && len(t) > matchidx : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' *** ../vim-7.3.1072/src/testdir/test64.ok 2013-05-30 17:05:34.000000000 +0200 --- src/testdir/test64.ok 2013-05-30 18:42:43.000000000 +0200 *************** *** 740,745 **** --- 740,747 ---- OK 0 - \_[^a]\+ OK 1 - \_[^a]\+ OK 2 - \_[^a]\+ + OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} + OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} 192.168.0.1 192.168.0.1 192.168.0.1 *** ../vim-7.3.1072/src/testdir/test95.in 2013-05-26 15:14:49.000000000 +0200 --- src/testdir/test95.in 2013-05-30 18:13:59.000000000 +0200 *************** *** 85,91 **** : continue : endif : let ®expengine = engine ! : let l = matchlist(text, pat) :" check the match itself : if len(l) == 0 && len(t) > matchidx : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' --- 85,95 ---- : continue : endif : let ®expengine = engine ! : try ! : let l = matchlist(text, pat) ! : catch ! : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' ! : endtry :" check the match itself : if len(l) == 0 && len(t) > matchidx : $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' *** ../vim-7.3.1072/src/version.c 2013-05-30 17:49:19.000000000 +0200 --- src/version.c 2013-05-30 18:43:08.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1073, /**/ -- How To Keep A Healthy Level Of Insanity: 17. When the money comes out the ATM, scream "I won!, I won! 3rd time this week!!!!!" /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///