diff options
Diffstat (limited to 'meta/recipes-extended/grep')
9 files changed, 0 insertions, 1612 deletions
diff --git a/meta/recipes-extended/grep/grep-2.5.1a/Makevars b/meta/recipes-extended/grep/grep-2.5.1a/Makevars deleted file mode 100644 index 8b09f53b0f..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/Makevars +++ /dev/null @@ -1,25 +0,0 @@ -# Makefile variables for PO directory in any package using GNU gettext. - -# Usually the message domain is the same as the package name. -DOMAIN = $(PACKAGE) - -# These two variables depend on the location of this directory. -subdir = po -top_builddir = .. - -# These options get passed to xgettext. -XGETTEXT_OPTIONS = --keyword=_ --keyword=N_ - -# This is the copyright holder that gets inserted into the header of the -# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding -# package. (Note that the msgstr strings, extracted from the package's -# sources, belong to the copyright holder of the package.) Translators are -# expected to transfer the copyright for their translations to this person -# or entity, or to disclaim their copyright. The empty string stands for -# the public domain; in this case the translators are expected to disclaim -# their copyright. -COPYRIGHT_HOLDER = Free Software Foundation, Inc. - -# This is the list of locale categories, beyond LC_MESSAGES, for which the -# message catalogs shall be used. It is usually empty. -EXTRA_LOCALE_CATEGORIES = diff --git a/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch b/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch deleted file mode 100644 index ce595099f4..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch +++ /dev/null @@ -1,17 +0,0 @@ -Upstream-Status: Inappropriate [Poky Specific this is gplv2 version] - -Signed-off-by: Saul Wold <sgw@linux.intel.com> - -Index: grep-2.5.1a/doc/grep.texi -=================================================================== ---- grep-2.5.1a.orig/doc/grep.texi -+++ grep-2.5.1a/doc/grep.texi -@@ -288,7 +288,7 @@ This version number should be included i - Print a usage message briefly summarizing these command-line options - and the bug-reporting address, then exit. - --@itemx --binary-files=@var{type} -+@item --binary-files=@var{type} - @opindex --binary-files - @cindex binary files - If the first few bytes of a file indicate that the file contains binary diff --git a/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch b/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch deleted file mode 100644 index 3b91520fb2..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch +++ /dev/null @@ -1,17 +0,0 @@ -Always use locale.h as HAVE_LOCALE_H is no longer handled by ./configure -Upstream-Status: Inappropriate [ old version that will not be maintained ] -Signed-off-by: Alex DAMIAN <alexandru.damian@intel.com> - -diff --recursive --unified grep-2.5.1a-orig/lib/hard-locale.c grep-2.5.1a/lib/hard-locale.c ---- grep-2.5.1a-orig/lib/hard-locale.c 2001-03-04 07:33:12.000000000 +0200 -+++ grep-2.5.1a/lib/hard-locale.c 2013-03-11 17:05:52.086444891 +0200 -@@ -38,9 +38,7 @@ - # endif - #endif - --#if HAVE_LOCALE_H - # include <locale.h> --#endif - - #if HAVE_STRING_H - # include <string.h> diff --git a/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch b/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch deleted file mode 100644 index 57463355a7..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch +++ /dev/null @@ -1,15 +0,0 @@ -Enable operation with later versions of gettext. - -Upstream-Status: Inappropriate -RP 2012/10/19 - -Index: grep-2.5.1a/configure.in -=================================================================== ---- grep-2.5.1a.orig/configure.in 2012-10-19 12:57:51.646970204 +0000 -+++ grep-2.5.1a/configure.in 2012-10-19 12:59:49.946968803 +0000 -@@ -140,4 +140,4 @@ - AC_CHECK_LIB(pcre, pcre_exec) - fi - --AC_OUTPUT(Makefile lib/Makefile lib/posix/Makefile src/Makefile tests/Makefile po/Makefile.in intl/Makefile doc/Makefile m4/Makefile vms/Makefile bootstrap/Makefile, [sed -e "/POTFILES =/r po/POTFILES" po/Makefile.in > po/Makefile; echo timestamp > stamp-h]) -+AC_OUTPUT(Makefile lib/Makefile lib/posix/Makefile src/Makefile tests/Makefile po/Makefile.in intl/Makefile doc/Makefile m4/Makefile vms/Makefile bootstrap/Makefile, [echo timestamp > stamp-h]) diff --git a/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch b/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch deleted file mode 100644 index a40a9f30bc..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch +++ /dev/null @@ -1,33 +0,0 @@ -The patch to fix CVE-2012-5667 -Reference: https://bugzilla.redhat.com/attachment.cgi?id=686605&action=diff - -Multiple integer overflows in GNU Grep before 2.11 might allow -context-dependent attackers to execute arbitrary code via vectors -involving a long input line that triggers a heap-based buffer overflow. - -http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2012-5667 - -Upstream-Status: Inappropriate [other] -This version of GNU Grep has been abandoned upstream and they are no longer -accepting patches. This is not a backport. -CVE: CVE-2012-5667 - -Signed-off-by: Ming Liu <ming.liu@windriver.com> ---- - grep.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - ---- a/src/grep.c 2013-05-15 13:39:33.359191769 +0800 -+++ a/src/grep.c 2013-05-15 13:50:22.609191882 +0800 -@@ -306,6 +306,11 @@ fillbuf (size_t save, struct stats const - int cc = 1; - char *readbuf; - size_t readsize; -+ const size_t max_save = INT_MAX / 2; -+ -+ /* Limit the amount of saved data to INT_MAX to fix CVE-2012-5667 */ -+ if (save > max_save) -+ error (2, 0, _("line too long")); - - /* Offset from start of buffer to start of old stuff - that we want to save. */ diff --git a/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch b/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch deleted file mode 100644 index 327ee56402..0000000000 --- a/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch +++ /dev/null @@ -1,1342 +0,0 @@ -From c884dd12ec062569335702848fc5f29f436c28fa Mon Sep 17 00:00:00 2001 -From: Li xin <lixin.fnst@cn.fujitsu.com> -Date: Mon, 25 May 2015 10:15:57 +0900 -Subject: [PATCH] grep egrep fgrep: Fix LSB NG cases. - -The LSB core test requires grep egrep and fgrep can -perform pattern matching in searches without regard -to case if -i option is specified. - -Upstream-Status: backport. - -Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com> ---- - lib/posix/regex.h | 4 + - src/dfa.c | 22 +- - src/grep.c | 96 ++++--- - src/search.c | 833 +++++++++++++++++++++++++++++++++++++++++++++--------- - 4 files changed, 768 insertions(+), 187 deletions(-) - -diff --git a/lib/posix/regex.h b/lib/posix/regex.h -index 63c2fef..7bb2b0e 100644 ---- a/lib/posix/regex.h -+++ b/lib/posix/regex.h -@@ -109,6 +109,10 @@ typedef unsigned long int reg_syntax_t; - If not set, \{, \}, {, and } are literals. */ - #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -+/* If this bit is set, then ignore case when matching. -+ If not set, then case is significant. */ -+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) -+ - /* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ - #define RE_LIMITED_OPS (RE_INTERVALS << 1) -diff --git a/src/dfa.c b/src/dfa.c -index 590bfa7..27c876a 100644 ---- a/src/dfa.c -+++ b/src/dfa.c -@@ -414,7 +414,7 @@ update_mb_len_index (unsigned char const *p, int len) - - /* This function fetch a wide character, and update cur_mb_len, - used only if the current locale is a multibyte environment. */ --static wchar_t -+static wint_t - fetch_wc (char const *eoferr) - { - wchar_t wc; -@@ -423,7 +423,7 @@ fetch_wc (char const *eoferr) - if (eoferr != 0) - dfaerror (eoferr); - else -- return -1; -+ return WEOF; - } - - cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); -@@ -459,7 +459,7 @@ fetch_wc (char const *eoferr) - static void - parse_bracket_exp_mb () - { -- wchar_t wc, wc1, wc2; -+ wint_t wc, wc1, wc2; - - /* Work area to build a mb_char_classes. */ - struct mb_char_classes *work_mbc; -@@ -496,7 +496,7 @@ parse_bracket_exp_mb () - work_mbc->invert = 0; - do - { -- wc1 = -1; /* mark wc1 is not initialized". */ -+ wc1 = WEOF; /* mark wc1 is not initialized". */ - - /* Note that if we're looking at some other [:...:] construct, - we just treat it as a bunch of ordinary characters. We can do -@@ -586,7 +586,7 @@ parse_bracket_exp_mb () - work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; - } - } -- wc = -1; -+ wc1 = wc = WEOF; - } - else - /* We treat '[' as a normal character here. */ -@@ -600,7 +600,7 @@ parse_bracket_exp_mb () - wc = fetch_wc(("Unbalanced [")); - } - -- if (wc1 == -1) -+ if (wc1 == WEOF) - wc1 = fetch_wc(_("Unbalanced [")); - - if (wc1 == L'-') -@@ -630,17 +630,17 @@ parse_bracket_exp_mb () - } - REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, - range_sts_al, work_mbc->nranges + 1); -- work_mbc->range_sts[work_mbc->nranges] = wc; -+ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc; - REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, - range_ends_al, work_mbc->nranges + 1); -- work_mbc->range_ends[work_mbc->nranges++] = wc2; -+ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; - } -- else if (wc != -1) -+ else if (wc != WEOF) - /* build normal characters. */ - { - REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, - work_mbc->nchars + 1); -- work_mbc->chars[work_mbc->nchars++] = wc; -+ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; - } - } - while ((wc = wc1) != L']'); -@@ -2552,6 +2552,8 @@ match_mb_charset (struct dfa *d, int s, position pos, int index) - } - - /* match with a character? */ -+ if (case_fold) -+ wc = towlower (wc); - for (i = 0; i<work_mbc->nchars; i++) - { - if (wc == work_mbc->chars[i]) -diff --git a/src/grep.c b/src/grep.c -index 2fb2fac..3fd4b47 100644 ---- a/src/grep.c -+++ b/src/grep.c -@@ -30,6 +30,12 @@ - # include <sys/time.h> - # include <sys/resource.h> - #endif -+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC -+/* We can handle multibyte string. */ -+# define MBS_SUPPORT -+# include <wchar.h> -+# include <wctype.h> -+#endif - #include <stdio.h> - #include "system.h" - #include "getopt.h" -@@ -255,19 +261,6 @@ reset (int fd, char const *file, struct stats *stats) - bufbeg[-1] = eolbyte; - bufdesc = fd; - -- if (fstat (fd, &stats->stat) != 0) -- { -- error (0, errno, "fstat"); -- return 0; -- } -- if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) -- return 0; --#ifndef DJGPP -- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode))) --#else -- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) --#endif -- return 0; - if (S_ISREG (stats->stat.st_mode)) - { - if (file) -@@ -558,33 +551,6 @@ prline (char const *beg, char const *lim, int sep) - { - size_t match_size; - size_t match_offset; -- if(match_icase) -- { -- /* Yuck, this is tricky */ -- char *buf = (char*) xmalloc (lim - beg); -- char *ibeg = buf; -- char *ilim = ibeg + (lim - beg); -- int i; -- for (i = 0; i < lim - beg; i++) -- ibeg[i] = tolower (beg[i]); -- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) -- != (size_t) -1) -- { -- char const *b = beg + match_offset; -- if (b == lim) -- break; -- fwrite (beg, sizeof (char), match_offset, stdout); -- printf ("\33[%sm", grep_color); -- fwrite (b, sizeof (char), match_size, stdout); -- fputs ("\33[00m", stdout); -- beg = b + match_size; -- ibeg = ibeg + match_offset + match_size; -- } -- fwrite (beg, 1, lim - beg, stdout); -- free (buf); -- lastout = lim; -- return; -- } - while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) - != (size_t) -1) - { -@@ -601,6 +567,7 @@ prline (char const *beg, char const *lim, int sep) - fputs ("\33[00m", stdout); - beg = b + match_size; - } -+ fputs ("\33[K", stdout); - } - fwrite (beg, 1, lim - beg, stdout); - if (ferror (stdout)) -@@ -623,7 +590,7 @@ prpending (char const *lim) - size_t match_size; - --pending; - if (outleft -- || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1) -+ || (((*execute) (lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1) - == !out_invert)) - prline (lastout, nl + 1, '-'); - else -@@ -895,6 +862,19 @@ grepfile (char const *file, struct stats *stats) - } - else - { -+ if (stat (file, &stats->stat) != 0) -+ { -+ suppressible_error (file, errno); -+ return 1; -+ } -+ if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) -+ return 1; -+#ifndef DJGPP -+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode))) -+#else -+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) -+#endif -+ return 1; - while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) - continue; - -@@ -1681,9 +1661,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) - out_invert ^= 1; - match_lines = match_words = 0; - } -- else -- /* Strip trailing newline. */ -- --keycc; - } - else - if (optind < argc) -@@ -1697,6 +1674,37 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) - if (!install_matcher (matcher) && !install_matcher ("default")) - abort (); - -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX != 1 && match_icase) -+ { -+ wchar_t wc; -+ mbstate_t cur_state, prev_state; -+ int i, len = strlen(keys); -+ -+ memset(&cur_state, 0, sizeof(mbstate_t)); -+ for (i = 0; i <= len ;) -+ { -+ size_t mbclen; -+ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state); -+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -+ { -+ /* An invalid sequence, or a truncated multibyte character. -+ We treat it as a singlebyte character. */ -+ mbclen = 1; -+ } -+ else -+ { -+ if (iswupper((wint_t)wc)) -+ { -+ wc = towlower((wint_t)wc); -+ wcrtomb(keys + i, wc, &cur_state); -+ } -+ } -+ i += mbclen; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ - (*compile)(keys, keycc); - - if ((argc - optind > 1 && !no_filenames) || with_filenames) -diff --git a/src/search.c b/src/search.c -index 7bd233f..3c6a485 100644 ---- a/src/search.c -+++ b/src/search.c -@@ -18,9 +18,13 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include <config.h> - #endif -+#include <assert.h> - #include <sys/types.h> - #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC - /* We can handle multibyte string. */ -@@ -31,7 +35,7 @@ - - #include "system.h" - #include "grep.h" --#include "regex.h" -+#include <regex.h> - #include "dfa.h" - #include "kwset.h" - #include "error.h" -@@ -39,6 +43,9 @@ - #ifdef HAVE_LIBPCRE - # include <pcre.h> - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include <langinfo.h> -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -70,9 +77,10 @@ static kwset_t kwset; - call the regexp matcher at all. */ - static int kwset_exact_matches; - --#if defined(MBS_SUPPORT) --static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); --#endif -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ - static void kwsinit PARAMS ((void)); - static void kwsmusts PARAMS ((void)); - static void Gcompile PARAMS ((char const *, size_t)); -@@ -84,6 +92,15 @@ static void Pcompile PARAMS ((char const *, size_t )); - static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); - - void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ -+void - dfaerror (char const *mesg) - { - error (2, 0, mesg); -@@ -141,38 +158,6 @@ kwsmusts (void) - } - } - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not singlebyte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = malloc(size); -- mbstate_t cur_state; -- int i; -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrlen(buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a singlebyte character. */ -- mbclen = 1; -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif -- - static void - Gcompile (char const *pattern, size_t size) - { -@@ -181,7 +166,8 @@ Gcompile (char const *pattern, size_t size) - size_t total = size; - char const *motif = pattern; - -- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); -+ check_utf8 (); -+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - - /* For GNU regex compiler we have to pass the patterns separately to detect -@@ -218,6 +204,10 @@ Gcompile (char const *pattern, size_t size) - motif = sep; - } while (sep && total != 0); - -+ /* Strip trailing newline. */ -+ if (size && pattern[size - 1] == '\n') -+ size--; -+ - /* In the match_words and match_lines cases, we use a different pattern - for the DFA matcher that will quickly throw out cases that won't work. - Then if DFA succeeds we do some hairy stuff using the regex matcher -@@ -233,7 +223,7 @@ Gcompile (char const *pattern, size_t size) - static char const line_end[] = "\\)$"; - static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; - static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen (n); -@@ -257,14 +247,15 @@ Ecompile (char const *pattern, size_t size) - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - if (strcmp (matcher, "awk") == 0) - { -- re_set_syntax (RE_SYNTAX_AWK); -+ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); - } - else - { -- re_set_syntax (RE_SYNTAX_POSIX_EGREP); -+ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); - } - -@@ -301,6 +292,10 @@ Ecompile (char const *pattern, size_t size) - motif = sep; - } while (sep && total != 0); - -+ /* Strip trailing newline. */ -+ if (size && pattern[size - 1] == '\n') -+ size--; -+ - /* In the match_words and match_lines cases, we use a different pattern - for the DFA matcher that will quickly throw out cases that won't work. - Then if DFA succeeds we do some hairy stuff using the regex matcher -@@ -316,7 +311,7 @@ Ecompile (char const *pattern, size_t size) - static char const line_end[] = ")$"; - static char const word_beg[] = "(^|[^[:alnum:]_])("; - static char const word_end[] = ")([^[:alnum:]_]|$)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen(n); -@@ -339,15 +334,34 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) - char eol = eolbyte; - int backref, start, len; - struct kwsmatch kwsm; -- size_t i; -+ size_t i, ret_val; -+ static int use_dfa; -+ static int use_dfa_checked = 0; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - -+ if (!use_dfa_checked) -+ { -+ char *grep_use_dfa = getenv ("GREP_USE_DFA"); -+ if (!grep_use_dfa) -+ { - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && kwset) -- mb_properties = check_multibyte_string(buf, size); -+ /* Turn off DFA when processing multibyte input. */ -+ use_dfa = (MB_CUR_MAX == 1); -+#else -+ use_dfa = 1; - #endif /* MBS_SUPPORT */ -+ } -+ else -+ { -+ use_dfa = atoi (grep_use_dfa); -+ } -+ -+ use_dfa_checked = 1; -+ } - - buflim = buf + size; - -@@ -358,47 +372,120 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) -- { -+ goto failure; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free(mb_properties); --#endif -- return (size_t)-1; -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } - } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; --#endif -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -- goto success; -- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) -+ goto success_in_beg_and_end; -+ if (use_dfa && -+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; - } - else - { - /* No good fixed strings; start with DFA. */ -- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset = 0; -+ if (use_dfa) -+ offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } - /* Successful, no backreferences encountered! */ -- if (!backref) -- goto success; -+ if (use_dfa && !backref) -+ goto success_in_beg_and_end; - } - else - end = beg + size; -@@ -413,14 +500,11 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) - end - beg - 1, &(patterns[i].regs)))) - { - len = patterns[i].regs.end[0] - start; -- if (exact) -- { -- *match_size = len; -- return start; -- } -+ if (exact && !match_words) -+ goto success_in_start_and_len; - if ((!match_lines && !match_words) - || (match_lines && len == end - beg - 1)) -- goto success; -+ goto success_in_beg_and_end; - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: - (a) the line may contain more than one occurence of the -@@ -431,10 +515,114 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) - if (match_words) - while (start >= 0) - { -- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) -- && (len == end - beg - 1 -- || !WCHAR ((unsigned char) beg[start + len]))) -- goto success; -+ int lword_match = 0; -+ if (start == 0) -+ lword_match = 1; -+ else -+ { -+ assert (start > 0); -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ size_t mr; -+ wchar_t pwc; -+ -+ /* Locate the start of the multibyte character -+ before the match position (== beg + start). */ -+ if (using_utf8) -+ { -+ /* UTF-8 is a special case: scan backwards -+ until we find a 7-bit character or a -+ lead byte. */ -+ s = beg + start - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ { -+ /* Scan forwards to find the start of the -+ last complete character before the -+ match position. */ -+ size_t bytes_left = start - 1; -+ s = beg; -+ while (bytes_left > 0) -+ { -+ mr = mbrlen (s, bytes_left, &mbs); -+ if (mr == (size_t) -1 || mr == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbs)); -+ s++; -+ bytes_left--; -+ continue; -+ } -+ if (mr == (size_t) -2) -+ { -+ memset (&mbs, '\0', sizeof (mbs)); -+ break; -+ } -+ s += mr; -+ bytes_left -= mr; -+ } -+ } -+ mr = mbrtowc (&pwc, s, beg + start - s, &mbs); -+ if (mr == (size_t) -2 || mr == (size_t) -1 || -+ mr == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ lword_match = 1; -+ } -+ else if (!(iswalnum (pwc) || pwc == L'_') -+ && mr == beg + start - s) -+ lword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start - 1])) -+ lword_match = 1; -+ } -+ -+ if (lword_match) -+ { -+ int rword_match = 0; -+ if (start + len == end - beg - 1) -+ rword_match = 1; -+ else -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + start + len, -+ end - beg - start - len - 1); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ rword_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ rword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start + len])) -+ rword_match = 1; -+ } -+ -+ if (rword_match) -+ { -+ if (!exact) -+ /* Returns the whole line. */ -+ goto success_in_beg_and_end; -+ else -+ /* Returns just this word match. */ -+ goto success_in_start_and_len; -+ } -+ } - if (len > 0) - { - /* Try a shorter length anchored at the same place. */ -@@ -461,26 +649,154 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) - } - } /* for Regex patterns. */ - } /* for (beg = end ..) */ --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -+ -+ failure: - return (size_t) -1; - -- success: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -- *match_size = end - beg; -- return beg - buf; -+ success_in_beg_and_end: -+ len = end - beg; -+ start = beg - buf; -+ /* FALLTHROUGH */ -+ -+ success_in_start_and_len: -+ *match_size = len; -+ return start; - } - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - static void - Fcompile (char const *pattern, size_t size) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) |
