grep: Fix LSB NG Cases for gplv2 version.

-/tset/LI18NUX2K.L1/utils/egrep-tp/T.egrep-tp 5 -/tset/LI18NUX2K.L1/utils/fgrep/T.fgrep 5 -/tset/LI18NUX2K.L1/utils/grep-tp/T.grep-tp 5 The LSB core test requires grep egrep and fgrep can perform pattern matching in searches without regard to case if -i option is specified. Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Li xin <lixin.fnst@cn.fujitsu.com> 2015-05-25 10:18:43 +0800
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2015-05-25 08:54:48 +0100
commit: d3b6aa30b3ea30d4e6a6ca923693367f66957ab0 (patch)
tree: 29b213308617c3bbd40d1722ca92f023767238b2 /meta
parent: 4434977ccb95f8f366ba133366093b8c7ef1f718 (diff)
download: openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.tar.gz
openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.tar.bz2
openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.zip
2 files changed, 1345 insertions, 1 deletions
diff --git a/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch b/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch
new file mode 100644
index 0000000000..327ee56402
--- /dev/null
+++ b/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch
@@ -0,0 +1,1342 @@
+From c884dd12ec062569335702848fc5f29f436c28fa Mon Sep 17 00:00:00 2001
+From: Li xin <lixin.fnst@cn.fujitsu.com>
+Date: Mon, 25 May 2015 10:15:57 +0900
+Subject: [PATCH] grep egrep fgrep: Fix LSB NG cases.
+
+The LSB core test requires grep egrep and fgrep can
+perform pattern matching in searches without regard
+to case if -i option is specified.
+
+Upstream-Status: backport.
+
+Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com>
+---
+ lib/posix/regex.h |   4 +
+ src/dfa.c         |  22 +-
+ src/grep.c        |  96 ++++---
+ src/search.c      | 833 +++++++++++++++++++++++++++++++++++++++++++++---------
+ 4 files changed, 768 insertions(+), 187 deletions(-)
+
+diff --git a/lib/posix/regex.h b/lib/posix/regex.h
+index 63c2fef..7bb2b0e 100644
+--- a/lib/posix/regex.h
++++ b/lib/posix/regex.h
+@@ -109,6 +109,10 @@ typedef unsigned long int reg_syntax_t;
+    If not set, \{, \}, {, and } are literals.  */
+ #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+ 
++/* If this bit is set, then ignore case when matching.
++   If not set, then case is significant.  */
++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
++
+ /* If this bit is set, +, ? and | aren't recognized as operators.
+    If not set, they are.  */
+ #define RE_LIMITED_OPS (RE_INTERVALS << 1)
+diff --git a/src/dfa.c b/src/dfa.c
+index 590bfa7..27c876a 100644
+--- a/src/dfa.c
++++ b/src/dfa.c
+@@ -414,7 +414,7 @@ update_mb_len_index (unsigned char const *p, int len)
+ 
+ /* This function fetch a wide character, and update cur_mb_len,
+    used only if the current locale is a multibyte environment.  */
+-static wchar_t
++static wint_t
+ fetch_wc (char const *eoferr)
+ {
+   wchar_t wc;
+@@ -423,7 +423,7 @@ fetch_wc (char const *eoferr)
+       if (eoferr != 0)
+ 	dfaerror (eoferr);
+       else
+-	return -1;
++	return WEOF;
+     }
+ 
+   cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
+@@ -459,7 +459,7 @@ fetch_wc (char const *eoferr)
+ static void
+ parse_bracket_exp_mb ()
+ {
+-  wchar_t wc, wc1, wc2;
++  wint_t wc, wc1, wc2;
+ 
+   /* Work area to build a mb_char_classes.  */
+   struct mb_char_classes *work_mbc;
+@@ -496,7 +496,7 @@ parse_bracket_exp_mb ()
+     work_mbc->invert = 0;
+   do
+     {
+-      wc1 = -1; /* mark wc1 is not initialized".  */
++      wc1 = WEOF; /* mark wc1 is not initialized".  */
+ 
+       /* Note that if we're looking at some other [:...:] construct,
+ 	 we just treat it as a bunch of ordinary characters.  We can do
+@@ -586,7 +586,7 @@ parse_bracket_exp_mb ()
+ 		      work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
+ 		    }
+  		}
+-	      wc = -1;
++	      wc1 = wc = WEOF;
+ 	    }
+ 	  else
+ 	    /* We treat '[' as a normal character here.  */
+@@ -600,7 +600,7 @@ parse_bracket_exp_mb ()
+ 	    wc = fetch_wc(("Unbalanced ["));
+ 	}
+ 
+-      if (wc1 == -1)
++      if (wc1 == WEOF)
+ 	wc1 = fetch_wc(_("Unbalanced ["));
+ 
+       if (wc1 == L'-')
+@@ -630,17 +630,17 @@ parse_bracket_exp_mb ()
+ 	    }
+ 	  REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+ 			       range_sts_al, work_mbc->nranges + 1);
+-	  work_mbc->range_sts[work_mbc->nranges] = wc;
++	  work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
+ 	  REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+ 			       range_ends_al, work_mbc->nranges + 1);
+-	  work_mbc->range_ends[work_mbc->nranges++] = wc2;
++	  work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
+ 	}
+-      else if (wc != -1)
++      else if (wc != WEOF)
+ 	/* build normal characters.  */
+ 	{
+ 	  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
+ 			       work_mbc->nchars + 1);
+-	  work_mbc->chars[work_mbc->nchars++] = wc;
++	  work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
+ 	}
+     }
+   while ((wc = wc1) != L']');
+@@ -2552,6 +2552,8 @@ match_mb_charset (struct dfa *d, int s, position pos, int index)
+     }
+ 
+   /* match with a character?  */
++  if (case_fold)
++    wc = towlower (wc);
+   for (i = 0; i<work_mbc->nchars; i++)
+     {
+       if (wc == work_mbc->chars[i])
+diff --git a/src/grep.c b/src/grep.c
+index 2fb2fac..3fd4b47 100644
+--- a/src/grep.c
++++ b/src/grep.c
+@@ -30,6 +30,12 @@
+ # include <sys/time.h>
+ # include <sys/resource.h>
+ #endif
++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
++/* We can handle multibyte string.  */
++# define MBS_SUPPORT
++# include <wchar.h>
++# include <wctype.h>
++#endif
+ #include <stdio.h>
+ #include "system.h"
+ #include "getopt.h"
+@@ -255,19 +261,6 @@ reset (int fd, char const *file, struct stats *stats)
+   bufbeg[-1] = eolbyte;
+   bufdesc = fd;
+ 
+-  if (fstat (fd, &stats->stat) != 0)
+-    {
+-      error (0, errno, "fstat");
+-      return 0;
+-    }
+-  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
+-    return 0;
+-#ifndef DJGPP
+-  if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode)))
+-#else
+-  if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode)))
+-#endif
+-    return 0;
+   if (S_ISREG (stats->stat.st_mode))
+     {
+       if (file)
+@@ -558,33 +551,6 @@ prline (char const *beg, char const *lim, int sep)
+     {
+       size_t match_size;
+       size_t match_offset;
+-      if(match_icase)
+-        {
+-	  /* Yuck, this is tricky */
+-          char *buf = (char*) xmalloc (lim - beg);
+-	  char *ibeg = buf;
+-	  char *ilim = ibeg + (lim - beg);
+-	  int i;
+-	  for (i = 0; i < lim - beg; i++)
+-	    ibeg[i] = tolower (beg[i]);
+-	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
+-		 != (size_t) -1)
+-	    {
+-	      char const *b = beg + match_offset;
+-	      if (b == lim)
+-		break;
+-	      fwrite (beg, sizeof (char), match_offset, stdout);
+-	      printf ("\33[%sm", grep_color);
+-	      fwrite (b, sizeof (char), match_size, stdout);
+-	      fputs ("\33[00m", stdout);
+-	      beg = b + match_size;
+-	      ibeg = ibeg + match_offset + match_size;
+-	    }
+-	  fwrite (beg, 1, lim - beg, stdout);
+-	  free (buf);
+-	  lastout = lim;
+-	  return;
+-	}
+       while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+ 	     != (size_t) -1)
+ 	{
+@@ -601,6 +567,7 @@ prline (char const *beg, char const *lim, int sep)
+ 	  fputs ("\33[00m", stdout);
+ 	  beg = b + match_size;
+ 	}
++      fputs ("\33[K", stdout);
+     }
+   fwrite (beg, 1, lim - beg, stdout);
+   if (ferror (stdout))
+@@ -623,7 +590,7 @@ prpending (char const *lim)
+       size_t match_size;
+       --pending;
+       if (outleft
+-	  || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1)
++	  || (((*execute) (lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1)
+ 	      == !out_invert))
+ 	prline (lastout, nl + 1, '-');
+       else
+@@ -895,6 +862,19 @@ grepfile (char const *file, struct stats *stats)
+     }
+   else
+     {
++      if (stat (file, &stats->stat) != 0)
++        {
++          suppressible_error (file, errno);
++          return 1;
++        }
++      if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
++        return 1;
++#ifndef DJGPP
++      if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode)))
++#else
++      if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode)))
++#endif
++        return 1;
+       while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
+ 	continue;
+ 
+@@ -1681,9 +1661,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
+ 	  out_invert ^= 1;
+ 	  match_lines = match_words = 0;
+ 	}
+-      else
+-	/* Strip trailing newline. */
+-        --keycc;
+     }
+   else
+     if (optind < argc)
+@@ -1697,6 +1674,37 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
+   if (!install_matcher (matcher) && !install_matcher ("default"))
+     abort ();
+ 
++#ifdef MBS_SUPPORT
++  if (MB_CUR_MAX != 1 && match_icase)
++    {
++      wchar_t wc;
++      mbstate_t cur_state, prev_state;
++      int i, len = strlen(keys);
++
++      memset(&cur_state, 0, sizeof(mbstate_t));
++      for (i = 0; i <= len ;)
++	{
++	  size_t mbclen;
++	  mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
++	  if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
++	    {
++	      /* An invalid sequence, or a truncated multibyte character.
++		 We treat it as a singlebyte character.  */
++	      mbclen = 1;
++	    }
++	  else
++	    {
++	      if (iswupper((wint_t)wc))
++		{
++		  wc = towlower((wint_t)wc);
++		  wcrtomb(keys + i, wc, &cur_state);
++		}
++	    }
++	  i += mbclen;
++	}
++    }
++#endif /* MBS_SUPPORT */
++
+   (*compile)(keys, keycc);
+ 
+   if ((argc - optind > 1 && !no_filenames) || with_filenames)
+diff --git a/src/search.c b/src/search.c
+index 7bd233f..3c6a485 100644
+--- a/src/search.c
++++ b/src/search.c
+@@ -18,9 +18,13 @@
+ 
+ /* Written August 1992 by Mike Haertel. */
+ 
++#ifndef _GNU_SOURCE
++# define _GNU_SOURCE 1
++#endif
+ #ifdef HAVE_CONFIG_H
+ # include <config.h>
+ #endif
++#include <assert.h>
+ #include <sys/types.h>
+ #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+ /* We can handle multibyte string.  */
+@@ -31,7 +35,7 @@
+ 
+ #include "system.h"
+ #include "grep.h"
+-#include "regex.h"
++#include <regex.h>
+ #include "dfa.h"
+ #include "kwset.h"
+ #include "error.h"
+@@ -39,6 +43,9 @@
+ #ifdef HAVE_LIBPCRE
+ # include <pcre.h>
+ #endif
++#ifdef HAVE_LANGINFO_CODESET
++# include <langinfo.h>
++#endif
+ 
+ #define NCHAR (UCHAR_MAX + 1)
+ 
+@@ -70,9 +77,10 @@ static kwset_t kwset;
+    call the regexp matcher at all. */
+ static int kwset_exact_matches;
+ 
+-#if defined(MBS_SUPPORT)
+-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+-#endif
++/* UTF-8 encoding allows some optimizations that we can't otherwise
++   assume in a multibyte encoding. */
++static int using_utf8;
++
+ static void kwsinit PARAMS ((void));
+ static void kwsmusts PARAMS ((void));
+ static void Gcompile PARAMS ((char const *, size_t));
+@@ -84,6 +92,15 @@ static void Pcompile PARAMS ((char const *, size_t ));
+ static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+ 
+ void
++check_utf8 (void)
++{
++#ifdef HAVE_LANGINFO_CODESET
++  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
++    using_utf8 = 1;
++#endif
++}
++
++void
+ dfaerror (char const *mesg)
+ {
+   error (2, 0, mesg);
+@@ -141,38 +158,6 @@ kwsmusts (void)
+     }
+ }
+ 
+-#ifdef MBS_SUPPORT
+-/* This function allocate the array which correspond to "buf".
+-   Then this check multibyte string and mark on the positions which
+-   are not singlebyte character nor the first byte of a multibyte
+-   character.  Caller must free the array.  */
+-static char*
+-check_multibyte_string(char const *buf, size_t size)
+-{
+-  char *mb_properties = malloc(size);
+-  mbstate_t cur_state;
+-  int i;
+-  memset(&cur_state, 0, sizeof(mbstate_t));
+-  memset(mb_properties, 0, sizeof(char)*size);
+-  for (i = 0; i < size ;)
+-    {
+-      size_t mbclen;
+-      mbclen = mbrlen(buf + i, size - i, &cur_state);
+-
+-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+-	{
+-	  /* An invalid sequence, or a truncated multibyte character.
+-	     We treat it as a singlebyte character.  */
+-	  mbclen = 1;
+-	}
+-      mb_properties[i] = mbclen;
+-      i += mbclen;
+-    }
+-
+-  return mb_properties;
+-}
+-#endif
+-
+ static void
+ Gcompile (char const *pattern, size_t size)
+ {
+@@ -181,7 +166,8 @@ Gcompile (char const *pattern, size_t size)
+   size_t total = size;
+   char const *motif = pattern;
+ 
+-  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
++  check_utf8 ();
++  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
+   dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+ 
+   /* For GNU regex compiler we have to pass the patterns separately to detect
+@@ -218,6 +204,10 @@ Gcompile (char const *pattern, size_t size)
+       motif = sep;
+     } while (sep && total != 0);
+ 
++  /* Strip trailing newline. */
++  if (size && pattern[size - 1] == '\n')
++    size--;
++
+   /* In the match_words and match_lines cases, we use a different pattern
+      for the DFA matcher that will quickly throw out cases that won't work.
+      Then if DFA succeeds we do some hairy stuff using the regex matcher
+@@ -233,7 +223,7 @@ Gcompile (char const *pattern, size_t size)
+       static char const line_end[] = "\\)$";
+       static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+       static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+       size_t i;
+       strcpy (n, match_lines ? line_beg : word_beg);
+       i = strlen (n);
+@@ -257,14 +247,15 @@ Ecompile (char const *pattern, size_t size)
+   size_t total = size;
+   char const *motif = pattern;
+ 
++  check_utf8 ();
+   if (strcmp (matcher, "awk") == 0)
+     {
+-      re_set_syntax (RE_SYNTAX_AWK);
++      re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
+       dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
+     }
+   else
+     {
+-      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
++      re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
+       dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
+     }
+ 
+@@ -301,6 +292,10 @@ Ecompile (char const *pattern, size_t size)
+       motif = sep;
+     } while (sep && total != 0);
+ 
++  /* Strip trailing newline. */
++  if (size && pattern[size - 1] == '\n')
++    size--;
++
+   /* In the match_words and match_lines cases, we use a different pattern
+      for the DFA matcher that will quickly throw out cases that won't work.
+      Then if DFA succeeds we do some hairy stuff using the regex matcher
+@@ -316,7 +311,7 @@ Ecompile (char const *pattern, size_t size)
+       static char const line_end[] = ")$";
+       static char const word_beg[] = "(^|[^[:alnum:]_])(";
+       static char const word_end[] = ")([^[:alnum:]_]|$)";
+-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+       size_t i;
+       strcpy (n, match_lines ? line_beg : word_beg);
+       i = strlen(n);
+@@ -339,15 +334,34 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+   char eol = eolbyte;
+   int backref, start, len;
+   struct kwsmatch kwsm;
+-  size_t i;
++  size_t i, ret_val;
++  static int use_dfa;
++  static int use_dfa_checked = 0;
+ #ifdef MBS_SUPPORT
+-  char *mb_properties = NULL;
++  int mb_cur_max = MB_CUR_MAX;
++  mbstate_t mbs;
++  memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+ 
++  if (!use_dfa_checked)
++    {
++      char *grep_use_dfa = getenv ("GREP_USE_DFA");
++      if (!grep_use_dfa)
++	{
+ #ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && kwset)
+-    mb_properties = check_multibyte_string(buf, size);
++	  /* Turn off DFA when processing multibyte input. */
++	  use_dfa = (MB_CUR_MAX == 1);
++#else
++	  use_dfa = 1;
+ #endif /* MBS_SUPPORT */
++	}
++      else
++	{
++	  use_dfa = atoi (grep_use_dfa);
++	}
++
++      use_dfa_checked = 1;
++    }
+ 
+   buflim = buf + size;
+ 
+@@ -358,47 +372,120 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ 	  if (kwset)
+ 	    {
+ 	      /* Find a possible match using the KWset matcher. */
+-	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
++#ifdef MBS_SUPPORT
++	      size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++	      size_t offset;
++#ifdef MBS_SUPPORT
++	      /* kwsexec doesn't work with match_icase and multibyte input. */
++	      if (match_icase && mb_cur_max > 1)
++		/* Avoid kwset */
++		offset = 0;
++	      else
++#endif /* MBS_SUPPORT */
++	      offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ 	      if (offset == (size_t) -1)
+-		{
++	        goto failure;
+ #ifdef MBS_SUPPORT
+-		  if (MB_CUR_MAX > 1)
+-		    free(mb_properties);
+-#endif
+-		  return (size_t)-1;
++	      if (mb_cur_max > 1 && !using_utf8)
++		{
++		  bytes_left = offset;
++		  while (bytes_left)
++		    {
++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++		      if (mlen == (size_t) -1 || mlen == 0)
++			{
++			  /* Incomplete character: treat as single-byte. */
++			  memset (&mbs, '\0', sizeof (mbstate_t));
++			  beg++;
++			  bytes_left--;
++			  continue;
++			}
++
++		      if (mlen == (size_t) -2)
++			/* Offset points inside multibyte character:
++			 * no good. */
++			break;
++
++		      beg += mlen;
++		      bytes_left -= mlen;
++		    }
+ 		}
++	      else
++#endif /* MBS_SUPPORT */
+ 	      beg += offset;
+ 	      /* Narrow down to the line containing the candidate, and
+ 		 run it through DFA. */
+ 	      end = memchr(beg, eol, buflim - beg);
+ 	      end++;
+ #ifdef MBS_SUPPORT
+-	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
++	      if (mb_cur_max > 1 && bytes_left)
+ 		continue;
+-#endif
++#endif /* MBS_SUPPORT */
+ 	      while (beg > buf && beg[-1] != eol)
+ 		--beg;
+-	      if (kwsm.index < kwset_exact_matches)
+-		goto success;
+-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++	      if (
++#ifdef MBS_SUPPORT
++		  !(match_icase && mb_cur_max > 1) &&
++#endif /* MBS_SUPPORT */
++		  (kwsm.index < kwset_exact_matches))
++		goto success_in_beg_and_end;
++	      if (use_dfa &&
++		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ 		continue;
+ 	    }
+ 	  else
+ 	    {
+ 	      /* No good fixed strings; start with DFA. */
+-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++#ifdef MBS_SUPPORT
++	      size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++	      size_t offset = 0;
++	      if (use_dfa)
++		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ 	      if (offset == (size_t) -1)
+ 		break;
+ 	      /* Narrow down to the line we've found. */
++#ifdef MBS_SUPPORT
++	      if (mb_cur_max > 1 && !using_utf8)
++		{
++		  bytes_left = offset;
++		  while (bytes_left)
++		    {
++		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++		      if (mlen == (size_t) -1 || mlen == 0)
++			{
++			  /* Incomplete character: treat as single-byte. */
++			  memset (&mbs, '\0', sizeof (mbstate_t));
++			  beg++;
++			  bytes_left--;
++			  continue;
++			}
++
++		      if (mlen == (size_t) -2)
++			/* Offset points inside multibyte character:
++			 * no good. */
++			break;
++
++		      beg += mlen;
++		      bytes_left -= mlen;
++		    }
++		}
++	      else
++#endif /* MBS_SUPPORT */
+ 	      beg += offset;
+ 	      end = memchr (beg, eol, buflim - beg);
+ 	      end++;
++#ifdef MBS_SUPPORT
++	      if (mb_cur_max > 1 && bytes_left)
++		continue;
++#endif /* MBS_SUPPORT */
+ 	      while (beg > buf && beg[-1] != eol)
+ 		--beg;
+ 	    }
+ 	  /* Successful, no backreferences encountered! */
+-	  if (!backref)
+-	    goto success;
++	  if (use_dfa && !backref)
++	    goto success_in_beg_and_end;
+ 	}
+       else
+ 	end = beg + size;
+@@ -413,14 +500,11 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ 				       end - beg - 1, &(patterns[i].regs))))
+ 	    {
+ 	      len = patterns[i].regs.end[0] - start;
+-	      if (exact)
+-		{
+-		  *match_size = len;
+-		  return start;
+-		}
++	      if (exact && !match_words)
++	        goto success_in_start_and_len;
+ 	      if ((!match_lines && !match_words)
+ 		  || (match_lines && len == end - beg - 1))
+-		goto success;
++		goto success_in_beg_and_end;
+ 	      /* If -w, check if the match aligns with word boundaries.
+ 		 We do this iteratively because:
+ 		 (a) the line may contain more than one occurence of the
+@@ -431,10 +515,114 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ 	      if (match_words)
+ 		while (start >= 0)
+ 		  {
+-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+-			&& (len == end - beg - 1
+-			    || !WCHAR ((unsigned char) beg[start + len])))
+-		      goto success;
++		    int lword_match = 0;
++		    if (start == 0)
++		      lword_match = 1;
++		    else
++		      {
++			assert (start > 0);
++#ifdef MBS_SUPPORT
++			if (mb_cur_max > 1)
++			  {
++			    const char *s;
++			    size_t mr;
++			    wchar_t pwc;
++
++			    /* Locate the start of the multibyte character
++			       before the match position (== beg + start). */
++			    if (using_utf8)
++			      {
++				/* UTF-8 is a special case: scan backwards
++				   until we find a 7-bit character or a
++				   lead byte. */
++				s = beg + start - 1;
++				while (s > buf
++				       && (unsigned char) *s >= 0x80
++				       && (unsigned char) *s <= 0xbf)
++				  --s;
++			      }
++			    else
++			      {
++				/* Scan forwards to find the start of the
++				   last complete character before the
++				   match position.  */
++				size_t bytes_left = start - 1;
++				s = beg;
++				while (bytes_left > 0)
++				  {
++				    mr = mbrlen (s, bytes_left, &mbs);
++				    if (mr == (size_t) -1 || mr == 0)
++				      {
++					memset (&mbs, '\0', sizeof (mbs));
++					s++;
++					bytes_left--;
++					continue;
++				      }
++				    if (mr == (size_t) -2)
++				      {
++					memset (&mbs, '\0', sizeof (mbs));
++					break;
++				      }
++				    s += mr;
++				    bytes_left -= mr;
++				  }
++			      }
++			    mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
++			    if (mr == (size_t) -2 || mr == (size_t) -1 ||
++				mr == 0)
++			      {
++				memset (&mbs, '\0', sizeof (mbstate_t));
++				lword_match = 1;
++			      }
++			    else if (!(iswalnum (pwc) || pwc == L'_')
++				     && mr == beg + start - s)
++			      lword_match = 1;
++			  }
++			else
++#endif /* MBS_SUPPORT */
++			if (!WCHAR ((unsigned char) beg[start - 1]))
++			  lword_match = 1;
++		      }
++
++		    if (lword_match)
++		      {
++			int rword_match = 0;
++			if (start + len == end - beg - 1)
++			  rword_match = 1;
++			else
++			  {
++#ifdef MBS_SUPPORT
++			    if (mb_cur_max > 1)
++			      {
++				wchar_t nwc;
++				int mr;
++
++				mr = mbtowc (&nwc, beg + start + len,
++					     end - beg - start - len - 1);
++				if (mr <= 0)
++				  {
++				    memset (&mbs, '\0', sizeof (mbstate_t));
++				    rword_match = 1;
++				  }
++				else if (!iswalnum (nwc) && nwc != L'_')
++				  rword_match = 1;
++			      }
++			    else
++#endif /* MBS_SUPPORT */
++			    if (!WCHAR ((unsigned char) beg[start + len]))
++			      rword_match = 1;
++			  }
++
++			if (rword_match)
++			  {
++			    if (!exact)
++			      /* Returns the whole line. */
++			      goto success_in_beg_and_end;
++			    else
++			      /* Returns just this word match. */
++			      goto success_in_start_and_len;
++			  }
++		      }
+ 		    if (len > 0)
+ 		      {
+ 			/* Try a shorter length anchored at the same place. */
+@@ -461,26 +649,154 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ 	    }
+ 	} /* for Regex patterns.  */
+     } /* for (beg = end ..) */
+-#ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && mb_properties)
+-    free (mb_properties);
+-#endif /* MBS_SUPPORT */
++
++ failure:
+   return (size_t) -1;
+ 
+- success:
+-#ifdef MBS_SUPPORT
+-  if (MB_CUR_MAX > 1 && mb_properties)
+-    free (mb_properties);
+-#endif /* MBS_SUPPORT */
+-  *match_size = end - beg;
+-  return beg - buf;
++ success_in_beg_and_end:
++  len = end - beg;
++  start = beg - buf;
++  /* FALLTHROUGH */
++
++ success_in_start_and_len:
++  *match_size = len;
++  return start;
+ }
+ 
++#ifdef MBS_SUPPORT
++static int f_i_multibyte; /* whether we're using the new -Fi MB method */
++static struct
++{
++  wchar_t **patterns;
++  size_t count, maxlen;
++  unsigned char *match;
++} Fimb;
++#endif
++
+ static void
+ Fcompile (char const *pattern, size_t size)
+ {
++  int mb_cur_max = MB_CUR_MAX;
+   char const *beg, *lim, *err;
+ 
++  check_utf8 ();
++#ifdef MBS_SUPPORT
++  /* Support -F -i for UTF-8 input. */
++  if (match_icase && mb_cur_max > 1)
++    {
++      mbstate_t mbs;
++      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
++      const char *patternend = pattern;
++      size_t wcsize;
++      kwset_t fimb_kwset = NULL;
++      char *starts = NULL;
++      wchar_t *wcbeg, *wclim;
++      size_t allocated = 0;
++
++      memset (&mbs, '\0', sizeof (mbs));
++# ifdef __GNU_LIBRARY__
++      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
++      if (patternend != pattern + size)
++	wcsize = (size_t) -1;
++# else
++      {
++	char *patterncopy = xmalloc (size + 1);
++
++	memcpy (patterncopy, pattern, size);
++	patterncopy[size] = '\0';
++	patternend = patterncopy;
++	wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
++	if (patternend != patterncopy + size)
++	  wcsize = (size_t) -1;
++	free (patterncopy);
++      }
++# endif
++      if (wcsize + 2 <= 2)
++	{
++fimb_fail:
++	  free (wcpattern);
++	  free (starts);
++	  if (fimb_kwset)
++	    kwsfree (fimb_kwset);
++	  free (Fimb.patterns);
++	  Fimb.patterns = NULL;
++	}
++      else
++	{
++	  if (!(fimb_kwset = kwsalloc (NULL)))
++	    error (2, 0, _("memory exhausted"));
++
++	  starts = xmalloc (mb_cur_max * 3);
++	  wcbeg = wcpattern;
++	  do
++	    {
++	      int i;
++	      size_t wclen;
++
++	      if (Fimb.count >= allocated)
++		{
++		  if (allocated == 0)
++		    allocated = 128;
++		  else
++		    allocated *= 2;
++		  Fimb.patterns = xrealloc (Fimb.patterns,
++					    sizeof (wchar_t *) * allocated);
++		}
++	      Fimb.patterns[Fimb.count++] = wcbeg;
++	      for (wclim = wcbeg;
++		   wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
++		*wclim = towlower (*wclim);
++	      *wclim = L'\0';
++	      wclen = wclim - wcbeg;
++	      if (wclen > Fimb.maxlen)
++		Fimb.maxlen = wclen;
++	      if (wclen > 3)
++		wclen = 3;
++	      if (wclen == 0)
++		{
++		  if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
++		    error (2, 0, err);
++		}
++	      else
++		for (i = 0; i < (1 << wclen); i++)
++		  {
++		    char *p = starts;
++		    int j, k;
++
++		    for (j = 0; j < wclen; ++j)
++		      {
++			wchar_t wc = wcbeg[j];
++			if (i & (1 << j))
++			  {
++			    wc = towupper (wc);
++			    if (wc == wcbeg[j])
++			      continue;
++			  }
++			k = wctomb (p, wc);
++			if (k <= 0)
++			  goto fimb_fail;
++			p += k;
++		      }
++		    if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
++		      error (2, 0, err);
++		  }
++	      if (wclim < wcpattern + wcsize)
++		++wclim;
++	      wcbeg = wclim;
++	    }
++	  while (wcbeg < wcpattern + wcsize);
++	  f_i_multibyte = 1;
++	  kwset = fimb_kwset;
++	  free (starts);
++	  Fimb.match = xmalloc (Fimb.count);
++	  if ((err = kwsprep (kwset)) != 0)
++	    error (2, 0, err);
++	  return;
++	}
++    }
++#endif /* MBS_SUPPORT */
++
++
+   kwsinit ();
+   beg = pattern;
+   do
+@@ -499,6 +815,76 @@ Fcompile (char const *pattern, size_t size)
+     error (2, 0, err);
+ }
+ 
++#ifdef MBS_SUPPORT
++static int
++Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
++{
++  size_t len, letter, i;
++  int ret = -1;
++  mbstate_t mbs;
++  wchar_t wc;
++  int patterns_left;
++
++  assert (match_icase && f_i_multibyte == 1);
++  assert (MB_CUR_MAX > 1);
++
++  memset (&mbs, '\0', sizeof (mbs));
++  memset (Fimb.match, '\1', Fimb.count);
++  letter = len = 0;
++  patterns_left = 1;
++  while (patterns_left && len <= size)
++    {
++      size_t c;
++
++      patterns_left = 0;
++      if (len < size)
++	{
++	  c = mbrtowc (&wc, buf + len, size - len, &mbs);
++	  if (c + 2 <= 2)
++	    return ret;
++
++	  wc = towlower (wc);
++	}
++      else
++	{
++	  c = 1;
++	  wc = L'\0';
++	}
++
++      for (i = 0; i < Fimb.count; i++)
++	{
++	  if (Fimb.match[i])
++	    {
++	      if (Fimb.patterns[i][letter] == L'\0')
++		{
++		  /* Found a match. */
++		  *plen = len;
++		  if (!exact && !match_words)
++		    return 0;
++		  else
++		    {
++		      /* For -w or exact look for longest match.  */
++		      ret = 0;
++		      Fimb.match[i] = '\0';
++		      continue;
++		    }
++		}
++
++	      if (Fimb.patterns[i][letter] == wc)
++		patterns_left = 1;
++	      else
++		Fimb.match[i] = '\0';
++	    }
++	}
++
++      len += c;
++      letter++;
++    }
++
++  return ret;
++}
++#endif /* MBS_SUPPORT */
++
+ static size_t
+ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ {
+@@ -506,88 +892,268 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+   register size_t len;
+   char eol = eolbyte;
+   struct kwsmatch kwsmatch;
++  size_t ret_val;
+ #ifdef MBS_SUPPORT
+-  char *mb_properties;
+-  if (MB_CUR_MAX > 1)
+-    mb_properties = check_multibyte_string (buf, size);
++  int mb_cur_max = MB_CUR_MAX;
++  mbstate_t mbs;
++  memset (&mbs, '\0', sizeof (mbstate_t));
++  const char *last_char = NULL;
+ #endif /* MBS_SUPPORT */
+ 
+-  for (beg = buf; beg <= buf + size; ++beg)
++  for (beg = buf; beg < buf + size; ++beg)
+     {
+-      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++      size_t offset;
++      offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++
+       if (offset == (size_t) -1)
+-	{
++	goto failure;
+ #ifdef MBS_SUPPORT
+-	  if (MB_CUR_MAX > 1)
+-	    free(mb_properties);
+-#endif /* MBS_SUPPORT */
+-	  return offset;
++      if (mb_cur_max > 1 && !using_utf8)
++	{
++	  size_t bytes_left = offset;
++	  while (bytes_left)
++	    {
++	      size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++	      last_char = beg;
++	      if (mlen == (size_t) -1 || mlen == 0)
++		{
++		  /* Incomplete character: treat as single-byte. */
++		  memset (&mbs, '\0', sizeof (mbstate_t));
++		  beg++;
++		  bytes_left--;
++		  continue;
++		}
++
++	      if (mlen == (size_t) -2)
++		/* Offset points inside multibyte character: no good. */
++		break;
++
++	      beg += mlen;
++	      bytes_left -= mlen;
++	    }
++
++	  if (bytes_left)
++	    continue;
+ 	}
+-#ifdef MBS_SUPPORT
+-      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+-	continue; /* It is a part of multibyte character.  */
++      else
+ #endif /* MBS_SUPPORT */
+       beg += offset;
+-      len = kwsmatch.size[0];
+-      if (exact)
+-	{
+-	  *match_size = len;
+ #ifdef MBS_SUPPORT
+-	  if (MB_CUR_MAX > 1)
+-	    free (mb_properties);
++      /* For f_i_multibyte, the string at beg now matches first 3 chars of
++	 one of the search strings (less if there are shorter search strings).
++	 See if this is a real match.  */
++      if (f_i_multibyte
++	  && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
++	goto next_char;
+ #endif /* MBS_SUPPORT */
+-	  return beg - buf;
+-	}
++      len = kwsmatch.size[0];
++      if (exact && !match_words)
++	goto success_in_beg_and_len;
+       if (match_lines)
+ 	{
+ 	  if (beg > buf && beg[-1] != eol)
+-	    continue;
++	    goto next_char;
+ 	  if (beg + len < buf + size && beg[len] != eol)
+-	    continue;
++	    goto next_char;
+ 	  goto success;
+ 	}
+       else if (match_words)</
author	Li xin <lixin.fnst@cn.fujitsu.com>	2015-05-25 10:18:43 +0800
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2015-05-25 08:54:48 +0100
commit	d3b6aa30b3ea30d4e6a6ca923693367f66957ab0 (patch)
tree	29b213308617c3bbd40d1722ca92f023767238b2 /meta
parent	4434977ccb95f8f366ba133366093b8c7ef1f718 (diff)
download	openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.tar.gz openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.tar.bz2 openembedded-core-d3b6aa30b3ea30d4e6a6ca923693367f66957ab0.zip