Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix unnecessary overallocation due to incomplete character
When incomplete characters are found at the end of a string the
code ran amok and allocated lots of memory.  Stricter limits
are now in place.
  • Loading branch information
Ulrich Drepper committed May 28, 2011
1 parent 4f03107 commit 8887a92
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 16 deletions.
12 changes: 12 additions & 0 deletions ChangeLog
@@ -1,5 +1,17 @@
2011-05-28 Ulrich Drepper <drepper@gmail.com>

[BZ #12811]
* posix/regex_internal.c (build_wcs_buffer): Don't signal we have to
grow the buffers more if it already has to be sufficient.
(build_wcs_upper_buffer): Likewise.
* posix/regexec.c (check_matching): Likewise.
(clean_state_log_if_needed): Likewise.
(extend_buffers): Don't enlarge buffers beyond size of the input
buffer.
Patches mostly by Emil Wojak <emil@wojak.eu>.
* posix/bug-regex32.c: New file.
* posix/Makefile (tests): Add bug-regex32.

* locale/findlocale.c (_nl_find_locale): Return right away if
_nl_explode_name failed.
* locale/programs/locarchive.c (add_locale_to_archive): Likewise.
Expand Down
2 changes: 1 addition & 1 deletion NEWS
Expand Up @@ -17,7 +17,7 @@ Version 2.14
12545, 12551, 12582, 12583, 12587, 12597, 12601, 12611, 12625, 12626,
12631, 12650, 12653, 12655, 12660, 12671, 12681, 12685, 12711, 12713,
12714, 12717, 12723, 12724, 12734, 12738, 12746, 12766, 12775, 12777,
12782, 12788, 12792, 12795, 12813, 12814
12782, 12788, 12792, 12795, 12811, 12813, 12814

* The RPC implementation in libc is obsoleted. Old programs keep working
but new programs cannot be linked with the routines in libc anymore.
Expand Down
2 changes: 1 addition & 1 deletion posix/Makefile
Expand Up @@ -82,7 +82,7 @@ tests := tstgetopt testfnm runtests runptests \
bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
bug-regex25 bug-regex26 bug-regex27 bug-regex28 \
bug-regex29 bug-regex30 bug-regex31 \
bug-regex29 bug-regex30 bug-regex31 bug-regex32 \
tst-nice tst-nanosleep tst-regex2 \
transbug tst-rxspencer tst-pcre tst-boost \
bug-ga1 tst-vfork1 tst-vfork2 tst-vfork3 tst-waitid \
Expand Down
36 changes: 36 additions & 0 deletions posix/bug-regex32.c
@@ -0,0 +1,36 @@
// BZ 12811
#include <regex.h>
#include <stdio.h>
#include <locale.h>

static int
do_test (void)
{
char buf[1000];
regex_t preg;
if (setlocale (LC_CTYPE, "de_DE.UTF-8") == NULL)
{
puts ("setlocale failed");
return 1;
}

int e = regcomp (&preg, ".*ab", REG_ICASE);
if (e != 0)
{
regerror (e, &preg, buf, sizeof (buf));
printf ("regcomp = %d \"%s\"\n", e, buf);
return 1;
}

// Incomplete character at the end of the buffer
e = regexec (&preg, "aaaaaaaaaaaa\xc4", 0, NULL, 0);

regfree (&preg);
regerror (e, &preg, buf, sizeof (buf));
printf ("regexec = %d \"%s\"\n", e, buf);

return e != REG_NOMATCH;
}

#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
24 changes: 14 additions & 10 deletions posix/regex_internal.c
Expand Up @@ -237,13 +237,8 @@ build_wcs_buffer (re_string_t *pstr)
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
if (BE (mbclen == (size_t) -1 || mbclen == 0
|| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
Expand All @@ -252,6 +247,12 @@ build_wcs_buffer (re_string_t *pstr)
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
else if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}

/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
Expand Down Expand Up @@ -334,9 +335,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
else if (mbclen == (size_t) -1 || mbclen == 0
|| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
/* It is an invalid character or '\0'. Just use the byte. */
/* It is an invalid character, an incomplete character
at the end of the string, or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
Expand Down Expand Up @@ -449,7 +452,8 @@ build_wcs_upper_buffer (re_string_t *pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
else if (mbclen == (size_t) -1 || mbclen == 0
|| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
Expand Down
10 changes: 6 additions & 4 deletions posix/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Expand Down Expand Up @@ -1156,7 +1156,8 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
re_dfastate_t *old_state = cur_state;
int next_char_idx = re_string_cur_idx (&mctx->input) + 1;

if (BE (next_char_idx >= mctx->input.bufs_len, 0)
if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
&& mctx->input.bufs_len < mctx->input.len)
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
&& mctx->input.valid_len < mctx->input.len))
{
Expand Down Expand Up @@ -1732,7 +1733,8 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
{
int top = mctx->state_log_top;

if (next_state_log_idx >= mctx->input.bufs_len
if ((next_state_log_idx >= mctx->input.bufs_len
&& mctx->input.bufs_len < mctx->input.len)
|| (next_state_log_idx >= mctx->input.valid_len
&& mctx->input.valid_len < mctx->input.len))
{
Expand Down Expand Up @@ -4111,7 +4113,7 @@ extend_buffers (re_match_context_t *mctx)
return REG_ESPACE;

/* Double the lengthes of the buffers. */
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
if (BE (ret != REG_NOERROR, 0))
return ret;

Expand Down

0 comments on commit 8887a92

Please sign in to comment.