diff options
-rw-r--r-- | meta/recipes-core/glibc/glibc/strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch | 323 | ||||
-rw-r--r-- | meta/recipes-core/glibc/glibc_2.22.bb | 1 |
2 files changed, 324 insertions, 0 deletions
diff --git a/meta/recipes-core/glibc/glibc/strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch b/meta/recipes-core/glibc/glibc/strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch new file mode 100644 index 0000000000..8ce255f110 --- /dev/null +++ b/meta/recipes-core/glibc/glibc/strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch @@ -0,0 +1,323 @@ +Upstream-Status: Backport + +Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com> + +From https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=6c84109cfa26f35c3dfed3acb97d347361bd5849 +Author: Carlos O'Donell <carlos@systemhalted.org> +Date: Thu Oct 8 16:34:53 2015 -0400 + + strcoll: Remove incorrect STRDIFF-based optimization (Bug 18589). + + The optimization introduced in commit + f13c2a8dff2329c6692a80176262ceaaf8a6f74e, causes regressions in + sorting for languages that have digraphs that change sort order, like + cs_CZ which sorts ch between h and i. + + My analysis shows the fast-forwarding optimization in STRCOLL advances + through a digraph while possibly stopping in the middle which results + in a subsequent skipping of the digraph and incorrect sorting. The + optimization is incorrect as implemented and because of that I'm + removing it for 2.23, and I will also commit this fix for 2.22 where + it was originally introduced. + + This patch reverts the optimization, introduces a new bug-strcoll2.c + regression test that tests both cs_CZ.UTF-8 and da_DK.ISO-8859-1 and + ensures they sort one digraph each correctly. The optimization can't be + applied without regressing this test. + + Checked on x86_64, bug-strcoll2.c fails without this patch and passes + after. This will also get a fix on 2.22 which has the same bug. + + (cherry picked from commit 87701a58e291bd7ac3b407d10a829dac52c9c16e) +--- + locale/C-collate.c | 4 +- + locale/categories.def | 1 - + locale/langinfo.h | 1 - + locale/localeinfo.h | 7 ---- + locale/programs/ld-collate.c | 9 ----- + string/bug-strcoll2.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ + string/strcoll_l.c | 39 +----------------- + wcsmbs/wcscoll_l.c | 1 - + 8 files changed, 98 insertions(+), 59 deletions(-) + create mode 100644 string/bug-strcoll2.c + +diff --git a/locale/C-collate.c b/locale/C-collate.c +index d7f3c55..06dfdfa 100644 +--- a/locale/C-collate.c ++++ b/locale/C-collate.c +@@ -144,8 +144,6 @@ const struct __locale_data _nl_C_LC_COLLATE attribute_hidden = + /* _NL_COLLATE_COLLSEQWC */ + { .string = (const char *) collseqwc }, + /* _NL_COLLATE_CODESET */ +- { .string = _nl_C_codeset }, +- /* _NL_COLLATE_ENCODING_TYPE */ +- { .word = __cet_8bit } ++ { .string = _nl_C_codeset } + } + }; +diff --git a/locale/categories.def b/locale/categories.def +index 045489d..a8dda53 100644 +--- a/locale/categories.def ++++ b/locale/categories.def +@@ -58,7 +58,6 @@ DEFINE_CATEGORY + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, wstring) + DEFINE_ELEMENT (_NL_COLLATE_CODESET, "collate-codeset", std, string) +- DEFINE_ELEMENT (_NL_COLLATE_ENCODING_TYPE, "collate-encoding-type", std, word) + ), NO_POSTLOAD) + + +diff --git a/locale/langinfo.h b/locale/langinfo.h +index ffc5c7f..a565d9d 100644 +--- a/locale/langinfo.h ++++ b/locale/langinfo.h +@@ -255,7 +255,6 @@ enum + _NL_COLLATE_COLLSEQMB, + _NL_COLLATE_COLLSEQWC, + _NL_COLLATE_CODESET, +- _NL_COLLATE_ENCODING_TYPE, + _NL_NUM_LC_COLLATE, + + /* LC_CTYPE category: character classification. +diff --git a/locale/localeinfo.h b/locale/localeinfo.h +index a7516c0..c076d8e 100644 +--- a/locale/localeinfo.h ++++ b/locale/localeinfo.h +@@ -110,13 +110,6 @@ enum coll_sort_rule + sort_mask + }; + +-/* Collation encoding type. */ +-enum collation_encoding_type +-{ +- __cet_other, +- __cet_8bit, +- __cet_utf8 +-}; + + /* We can map the types of the entries into a few categories. */ + enum value_type +diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c +index 16e9039..3c88c6d 100644 +--- a/locale/programs/ld-collate.c ++++ b/locale/programs/ld-collate.c +@@ -32,7 +32,6 @@ + #include "linereader.h" + #include "locfile.h" + #include "elem-hash.h" +-#include "../localeinfo.h" + + /* Uncomment the following line in the production version. */ + /* #define NDEBUG 1 */ +@@ -2130,8 +2129,6 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + /* The words have to be handled specially. */ + if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) + add_locale_uint32 (&file, 0); +- else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)) +- add_locale_uint32 (&file, __cet_other); + else + add_locale_empty (&file); + } +@@ -2495,12 +2492,6 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + add_locale_raw_data (&file, collate->mbseqorder, 256); + add_locale_collseq_table (&file, &collate->wcseqorder); + add_locale_string (&file, charmap->code_set_name); +- if (strcmp (charmap->code_set_name, "UTF-8") == 0) +- add_locale_uint32 (&file, __cet_utf8); +- else if (charmap->mb_cur_max == 1) +- add_locale_uint32 (&file, __cet_8bit); +- else +- add_locale_uint32 (&file, __cet_other); + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + + obstack_free (&weightpool, NULL); +diff --git a/string/bug-strcoll2.c b/string/bug-strcoll2.c +new file mode 100644 +index 0000000..950b090 +--- /dev/null ++++ b/string/bug-strcoll2.c +@@ -0,0 +1,95 @@ ++/* Bug 18589: sort-test.sh fails at random. ++ * Copyright (C) 1998-2015 Free Software Foundation, Inc. ++ * This file is part of the GNU C Library. ++ * Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. ++ * ++ * The GNU C Library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * The GNU C Library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with the GNU C Library; if not, see ++ * <http://www.gnu.org/licenses/>. */ ++ ++#include <stdio.h> ++#include <string.h> ++#include <locale.h> ++ ++/* An incorrect strcoll optimization resulted in incorrect ++ * results from strcoll for cs_CZ and da_DK. */ ++ ++int ++test_cs_CZ (void) ++{ ++ const char t1[] = "config"; ++ const char t2[] = "choose"; ++ if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL) ++ { ++ perror ("setlocale"); ++ return 1; ++ } ++ /* In Czech the digraph ch sorts after c, therefore we expect ++ * config to sort before choose. */ ++ int a = strcoll (t1, t2); ++ int b = strcoll (t2, t1); ++ printf ("strcoll (\"%s\", \"%s\") = %d\n", t1, t2, a); ++ printf ("strcoll (\"%s\", \"%s\") = %d\n", t2, t1, b); ++ if (a < 0 && b > 0) ++ { ++ puts ("PASS: config < choose"); ++ return 0; ++ } ++ else ++ { ++ puts ("FAIL: Wrong sorting in cz_CZ.UTF-8."); ++ return 1; ++ } ++} ++ ++int ++test_da_DK (void) ++{ ++ const char t1[] = "AS"; ++ const char t2[] = "AA"; ++ if (setlocale (LC_ALL, "da_DK.ISO-8859-1") == NULL) ++ { ++ perror ("setlocale"); ++ return 1; ++ } ++ /* AA should be treated as the last letter of the Danish alphabet, ++ * hence sorting after AS. */ ++ int a = strcoll (t1, t2); ++ int b = strcoll (t2, t1); ++ printf ("strcoll (\"%s\", \"%s\") = %d\n", t1, t2, a); ++ printf ("strcoll (\"%s\", \"%s\") = %d\n", t2, t1, b); ++ if (a < 0 && b > 0) ++ { ++ puts ("PASS: AS < AA"); ++ return 0; ++ } ++ else ++ { ++ puts ("FAIL: Wrong sorting in da_DK.ISO-8859-1"); ++ return 1; ++ } ++} ++ ++static int ++do_test (void) ++{ ++ int err = 0; ++ err |= test_cs_CZ (); ++ err |= test_da_DK (); ++ return err; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" ++ ++ +diff --git a/string/strcoll_l.c b/string/strcoll_l.c +index b36b18c..a18b65e 100644 +--- a/string/strcoll_l.c ++++ b/string/strcoll_l.c +@@ -30,7 +30,6 @@ + # define STRING_TYPE char + # define USTRING_TYPE unsigned char + # define STRCOLL __strcoll_l +-# define STRDIFF __strdiff + # define STRCMP strcmp + # define WEIGHT_H "../locale/weight.h" + # define SUFFIX MB +@@ -43,19 +42,6 @@ + #include "../locale/localeinfo.h" + #include WEIGHT_H + +-#define MASK_UTF8_7BIT (1 << 7) +-#define MASK_UTF8_START (3 << 6) +- +-size_t +-STRDIFF (const STRING_TYPE *s, const STRING_TYPE *t) +-{ +- size_t n; +- +- for (n = 0; *s != '\0' && *s++ == *t++; ++n) +- continue; +- +- return n; +-} + + /* Track status while looking for sequences in a string. */ + typedef struct +@@ -274,29 +260,9 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) + const USTRING_TYPE *extra; + const int32_t *indirect; + +- /* In case there is no locale specific sort order (C / POSIX). */ + if (nrules == 0) + return STRCMP (s1, s2); + +- /* Fast forward to the position of the first difference. Needs to be +- encoding aware as the byte-by-byte comparison can stop in the middle +- of a char sequence for multibyte encodings like UTF-8. */ +- uint_fast32_t encoding = +- current->values[_NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)].word; +- if (encoding != __cet_other) +- { +- size_t diff = STRDIFF (s1, s2); +- if (diff > 0) +- { +- if (encoding == __cet_utf8 && (*(s1 + diff) & MASK_UTF8_7BIT) != 0) +- do +- diff--; +- while (diff > 0 && (*(s1 + diff) & MASK_UTF8_START) != MASK_UTF8_START); +- s1 += diff; +- s2 += diff; +- } +- } +- + /* Catch empty strings. */ + if (__glibc_unlikely (*s1 == '\0') || __glibc_unlikely (*s2 == '\0')) + return (*s1 != '\0') - (*s2 != '\0'); +@@ -363,9 +329,8 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) + byte-level comparison to ensure that we don't waste time + going through multiple passes for totally equal strings + before proceeding to subsequent passes. */ +- if (pass == 0 && encoding == __cet_other && +- STRCMP (s1, s2) == 0) +- return result; ++ if (pass == 0 && STRCMP (s1, s2) == 0) ++ return result; + else + break; + } +diff --git a/wcsmbs/wcscoll_l.c b/wcsmbs/wcscoll_l.c +index 6d9384a..87f240d 100644 +--- a/wcsmbs/wcscoll_l.c ++++ b/wcsmbs/wcscoll_l.c +@@ -23,7 +23,6 @@ + #define STRING_TYPE wchar_t + #define USTRING_TYPE wint_t + #define STRCOLL __wcscoll_l +-#define STRDIFF __wcsdiff + #define STRCMP __wcscmp + #define WEIGHT_H "../locale/weightwc.h" + #define SUFFIX WC +-- +1.8.4.2 + diff --git a/meta/recipes-core/glibc/glibc_2.22.bb b/meta/recipes-core/glibc/glibc_2.22.bb index 1b23597ef7..da755865ad 100644 --- a/meta/recipes-core/glibc/glibc_2.22.bb +++ b/meta/recipes-core/glibc/glibc_2.22.bb @@ -40,6 +40,7 @@ SRC_URI = "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \ file://0027-eglibc-use-option-groups-Conditionally-exclude-c-tes.patch \ file://nscd-no-bash.patch \ file://0028-Clear-ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA-for-prel.patch \ + file://strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch \ " SRC_URI += "\ |