To: vim_dev@googlegroups.com Subject: Patch 7.4.1783 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.4.1783 Problem: The old regexp engine doesn't handle character classes correctly. (Manuel Ortega) Solution: Use regmbc() instead of regc(). Add a test. Files: src/regexp.c, src/testdir/test_regexp_utf8.vim *** ../vim-7.4.1782/src/regexp.c 2016-04-08 17:07:09.546160667 +0200 --- src/regexp.c 2016-04-24 14:05:46.624490513 +0200 *************** *** 2544,2555 **** case CLASS_ALNUM: for (cu = 1; cu <= 255; cu++) if (isalnum(cu)) ! regc(cu); break; case CLASS_ALPHA: for (cu = 1; cu <= 255; cu++) if (isalpha(cu)) ! regc(cu); break; case CLASS_BLANK: regc(' '); --- 2544,2555 ---- case CLASS_ALNUM: for (cu = 1; cu <= 255; cu++) if (isalnum(cu)) ! regmbc(cu); break; case CLASS_ALPHA: for (cu = 1; cu <= 255; cu++) if (isalpha(cu)) ! regmbc(cu); break; case CLASS_BLANK: regc(' '); *************** *** 2558,2589 **** case CLASS_CNTRL: for (cu = 1; cu <= 255; cu++) if (iscntrl(cu)) ! regc(cu); break; case CLASS_DIGIT: for (cu = 1; cu <= 255; cu++) if (VIM_ISDIGIT(cu)) ! regc(cu); break; case CLASS_GRAPH: for (cu = 1; cu <= 255; cu++) if (isgraph(cu)) ! regc(cu); break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) if (MB_ISLOWER(cu)) ! regc(cu); break; case CLASS_PRINT: for (cu = 1; cu <= 255; cu++) if (vim_isprintc(cu)) ! regc(cu); break; case CLASS_PUNCT: for (cu = 1; cu <= 255; cu++) if (ispunct(cu)) ! regc(cu); break; case CLASS_SPACE: for (cu = 9; cu <= 13; cu++) --- 2558,2589 ---- case CLASS_CNTRL: for (cu = 1; cu <= 255; cu++) if (iscntrl(cu)) ! regmbc(cu); break; case CLASS_DIGIT: for (cu = 1; cu <= 255; cu++) if (VIM_ISDIGIT(cu)) ! regmbc(cu); break; case CLASS_GRAPH: for (cu = 1; cu <= 255; cu++) if (isgraph(cu)) ! regmbc(cu); break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) if (MB_ISLOWER(cu)) ! regmbc(cu); break; case CLASS_PRINT: for (cu = 1; cu <= 255; cu++) if (vim_isprintc(cu)) ! regmbc(cu); break; case CLASS_PUNCT: for (cu = 1; cu <= 255; cu++) if (ispunct(cu)) ! regmbc(cu); break; case CLASS_SPACE: for (cu = 9; cu <= 13; cu++) *************** *** 2593,2604 **** case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) if (MB_ISUPPER(cu)) ! regc(cu); break; case CLASS_XDIGIT: for (cu = 1; cu <= 255; cu++) if (vim_isxdigit(cu)) ! regc(cu); break; case CLASS_TAB: regc('\t'); --- 2593,2604 ---- case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) if (MB_ISUPPER(cu)) ! regmbc(cu); break; case CLASS_XDIGIT: for (cu = 1; cu <= 255; cu++) if (vim_isxdigit(cu)) ! regmbc(cu); break; case CLASS_TAB: regc('\t'); *** ../vim-7.4.1782/src/testdir/test_regexp_utf8.vim 2016-04-03 14:00:29.324148917 +0200 --- src/testdir/test_regexp_utf8.vim 2016-04-24 14:33:42.179179528 +0200 *************** *** 33,35 **** --- 33,91 ---- set re=2 call s:equivalence_test() endfunc + + func s:classes_test() + call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) + + let alphachars = '' + let lowerchars = '' + let upperchars = '' + let alnumchars = '' + let printchars = '' + let punctchars = '' + let xdigitchars = '' + let i = 1 + while i <= 255 + let c = nr2char(i) + if c =~ '[[:alpha:]]' + let alphachars .= c + endif + if c =~ '[[:lower:]]' + let lowerchars .= c + endif + if c =~ '[[:upper:]]' + let upperchars .= c + endif + if c =~ '[[:alnum:]]' + let alnumchars .= c + endif + if c =~ '[[:print:]]' + let printchars .= c + endif + if c =~ '[[:punct:]]' + let punctchars .= c + endif + if c =~ '[[:xdigit:]]' + let xdigitchars .= c + endif + let i += 1 + endwhile + + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) + call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) + call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) + call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) + call assert_equal('0123456789ABCDEFabcdef', xdigitchars) + endfunc + + func Test_classes_re1() + set re=1 + call s:classes_test() + endfunc + + func Test_classes_re2() + set re=2 + call s:classes_test() + endfunc *** ../vim-7.4.1782/src/version.c 2016-04-23 15:30:00.542243189 +0200 --- src/version.c 2016-04-24 14:34:55.926419794 +0200 *************** *** 755,756 **** --- 755,758 ---- { /* Add new patch number below this line */ + /**/ + 1783, /**/ -- If your life is a hard drive, Christ can be your backup. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///