From e7585f23bc5d93d688371e28815b150e0de2c069 Mon Sep 17 00:00:00 2001 From: Juan RP Date: Tue, 18 May 2010 15:53:09 +0200 Subject: [PATCH] perl: add a patch to fix a crash with invalid utf-8 characters, from LFS. --- srcpkgs/perl/patches/perl-5.10.1-utf8-1.patch | 167 ++++++++++++++++++ srcpkgs/perl/template | 3 +- 2 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 srcpkgs/perl/patches/perl-5.10.1-utf8-1.patch diff --git a/srcpkgs/perl/patches/perl-5.10.1-utf8-1.patch b/srcpkgs/perl/patches/perl-5.10.1-utf8-1.patch new file mode 100644 index 0000000000..c1a56fbe76 --- /dev/null +++ b/srcpkgs/perl/patches/perl-5.10.1-utf8-1.patch @@ -0,0 +1,167 @@ +Submitted By: Robert Connolly (ashes) +Date: 2010-02-24 +Initial Package Version: 5.10.1 +Upstream Status: From upstream +Origin: Git +http://perl5.git.perl.org/perl.git/patch/0abd0d78a73da1c4d13b1c700526b7e5d03b32d4 +Description: Bug fix for invalid utf-8 characters causing Perl to crash. + +http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2009-3626 + +From 0abd0d78a73da1c4d13b1c700526b7e5d03b32d4 Mon Sep 17 00:00:00 2001 +From: Yves Orton +Date: Sun, 25 Oct 2009 20:37:08 +0100 +Subject: [PATCH] disable non-unicode case insensitive trie matching + +Also revert 8902bb05b18c9858efa90229ca1ee42b17277554 as it merely +masked one symptom of the deeper problems. + +Also fixes RT #69973, which was a segfault which was exposed by +8902bb05, see the ticket for further details. + +http://rt.perl.org/rt3//Public/Bug/Display.html?id=69973 + +At the code of this is the problem that in unicode matching a bunch +of code points have case folding rules beyond just A-Z/a-z. Since +the case folding rules are decided at runtime by the string, we cant +use the same TRIE tables for both unicode/non-unicode matching. + +Until this is reconciled or some other solution is found case insensitive +matching only gets the TRIE optimisation when the pattern is uniocde. + +From CaseFolding.txt: + +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +--- + ext/re/t/regop.t | 12 ++++++------ + regcomp.c | 17 +++++++++++------ + regexec.c | 9 ++------- + 3 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t +index 9118bf6..46e6ec0 100644 +--- ext/re/t/regop.t ++++ ext/re/t/regop.t +@@ -231,12 +231,12 @@ anchored "ABC" at 0 + #Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."...... + %MATCHED% + floating ""$ at 3..4 (checking floating) +-1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0] +-stclass EXACTF <.> minlen 3 +-Found floating substr ""$ at offset 30... +-Does not contradict STCLASS... +-Guessed: match at offset 26 +-Matching stclass EXACTF <.> against ".exe" ++#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0] ++#stclass EXACTF <.> minlen 3 ++#Found floating substr ""$ at offset 30... ++#Does not contradict STCLASS... ++#Guessed: match at offset 26 ++#Matching stclass EXACTF <.> against ".exe" + --- + #Compiling REx "[q]" + #size 12 nodes Got 100 bytes for offset annotations. +diff --git a/regcomp.c b/regcomp.c +index 6e9fa26..eb5f12f 100644 +--- regcomp.c ++++ regcomp.c +@@ -2833,13 +2833,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, + } + } else { + /* +- Currently we assume that the trie can handle unicode and ascii +- matches fold cased matches. If this proves true then the following +- define will prevent tries in this situation. +- +- #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT) +-*/ ++ Currently we do not believe that the trie logic can ++ handle case insensitive matching properly when the ++ pattern is not unicode (thus forcing unicode semantics). ++ ++ If/when this is fixed the following define can be swapped ++ in below to fully enable trie logic. ++ + #define TRIE_TYPE_IS_SAFE 1 ++ ++*/ ++#define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT) ++ + if ( last && TRIE_TYPE_IS_SAFE ) { + make_trie( pRExC_state, + startbranch, first, cur, tail, count, +diff --git a/regexec.c b/regexec.c +index 402ede3..ec09c28 100644 +--- regexec.c ++++ regexec.c +@@ -1105,16 +1105,15 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, + + #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \ + uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ +- UV uvc_unfolded = 0; \ + switch (trie_type) { \ + case trie_utf8_fold: \ + if ( foldlen>0 ) { \ +- uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \ ++ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \ + foldlen -= len; \ + uscan += len; \ + len=0; \ + } else { \ +- uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \ ++ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \ + uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \ + foldlen -= UNISKIP( uvc ); \ + uscan = foldbuf + UNISKIP( uvc ); \ +@@ -1140,7 +1139,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ + uvc = (UV)*uc; \ + len = 1; \ + } \ +- \ + if (uvc < 256) { \ + charid = trie->charmap[ uvc ]; \ + } \ +@@ -1153,9 +1151,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ + charid = (U16)SvIV(*svpp); \ + } \ + } \ +- if (!charid && trie_type == trie_utf8_fold && !UTF) { \ +- charid = trie->charmap[uvc_unfolded]; \ +- } \ + } STMT_END + + #define REXEC_FBC_EXACTISH_CHECK(CoNd) \ +-- +1.6.5.2.74.g610f9.dirty + diff --git a/srcpkgs/perl/template b/srcpkgs/perl/template index 9ad6f59e7a..6e759897b5 100644 --- a/srcpkgs/perl/template +++ b/srcpkgs/perl/template @@ -1,6 +1,7 @@ # Template build file for 'perl'. pkgname=perl version=5.10.1 +revision=1 distfiles="http://www.cpan.org/src/$pkgname-$version.tar.gz" build_style=configure configure_script="./Configure" @@ -45,6 +46,4 @@ pre_configure() -Dsitelib=${p5_base}/site_perl/${p5_apiver} \ -Dvendorlib=${p5_base}/vendor_perl/${p5_apiver}" export configure_args="${args}" - - sed -i -e "s|'/bin/pwd'|'/usr/local/bin/pwd'|" lib/Cwd.pm }