From 7fb90fb89bbdf273ab7ab96517fe1b156cd7aee1 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 8 May 2011 08:37:19 -0400 Subject: [PATCH] Fix Linux getcwd for long paths The getcwd syscall (so far?) can only handle path up to one page in size. There is no limit about directory hierarchy depth, though, and the POSIX getcwd is supposed to handle this. In that case fall back to the generic getcwd. Additionally, optimize the generic getcwd to use openat when possible to change the asymptotic performance from O(N^2) to O(n). --- ChangeLog | 13 + NEWS | 6 +- dirent/rewinddir.c | 3 +- include/dirent.h | 2 + include/sys/stat.h | 2 + sysdeps/mach/hurd/rewinddir.c | 4 +- sysdeps/posix/getcwd.c | 356 ++++++++++++++++++---------- sysdeps/unix/rewinddir.c | 3 +- sysdeps/unix/sysv/linux/Makefile | 2 +- sysdeps/unix/sysv/linux/dl-getcwd.c | 1 + sysdeps/unix/sysv/linux/getcwd.c | 61 +++-- 11 files changed, 307 insertions(+), 146 deletions(-) create mode 100644 sysdeps/unix/sysv/linux/dl-getcwd.c diff --git a/ChangeLog b/ChangeLog index 132f0d0649..488ce8f187 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2011-05-08 Ulrich Drepper + [BZ #12713] + * sysdeps/unix/sysv/linux/getcwd.c: If getcwd syscall report + ENAMETOOLONG use generic getcwd. + * sysdeps/posix/getcwd.c: Add support to use openat. + * sysdeps/unix/sysv/linux/Makefile [subdir=elf] (sysdep-rtld-routines): + Add dl-getcwd. + * sysdeps/unix/sysv/linux/dl-getcwd.c: New file. + * include/sys/stat.h: Define __fstatat macro. + * include/dirent.h: Add libc_hidden_proto for rewinddir. + * dirent/rewinddir.c: Add libc_hidden_def. + * sysdeps/mach/hurd/rewinddir.c: Likewise. + * sysdeps/unix/rewinddir.c: Likewise. + * include/dirent.h (__alloc_dir): Add flags parameter. * sysdeps/unix/fdopendir.c (__fdopendir): Pass flags to __alloc_dir. * sysdeps/unix/opendir.c (__opendir): Pass 0 in new parameter to diff --git a/NEWS b/NEWS index 8ce7e3bb94..933fa8fec1 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU C Library NEWS -- history of user-visible changes. 2011-5-7 +GNU C Library NEWS -- history of user-visible changes. 2011-5-8 Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc. See the end for copying conditions. @@ -23,8 +23,8 @@ Version 2.14 * The following bugs are resolved with this release: 11724, 12393, 12420, 12445, 12454, 12460, 12469, 12489, 12509, 12510, - 12518, 12583, 12587, 12597, 12631, 12650, 12653, 12655, 12685, 12714, - 12717, 12723, 12734 + 12518, 12583, 12587, 12597, 12631, 12650, 12653, 12655, 12685, 12713, + 12714, 12717, 12723, 12734 Version 2.13 diff --git a/dirent/rewinddir.c b/dirent/rewinddir.c index e78d316880..85009b6509 100644 --- a/dirent/rewinddir.c +++ b/dirent/rewinddir.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1991, 1995, 1996, 1997, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -29,6 +29,7 @@ rewinddir (dirp) __set_errno (ENOSYS); /* No way to indicate failure. */ } +libc_hidden_def (rewinddir) stub_warning (rewinddir) diff --git a/include/dirent.h b/include/dirent.h index 4db63a626c..0f8f1cbe88 100644 --- a/include/dirent.h +++ b/include/dirent.h @@ -32,4 +32,6 @@ extern DIR *__alloc_dir (int fd, bool close_fd, int flags, const struct stat64 *statp) internal_function; +libc_hidden_proto (rewinddir) + #endif diff --git a/include/sys/stat.h b/include/sys/stat.h index 66898b1760..e00df5381e 100644 --- a/include/sys/stat.h +++ b/include/sys/stat.h @@ -48,4 +48,6 @@ libc_hidden_proto (__fxstatat64) #define fstat64(fd, buf) __fxstat64 (_STAT_VER, fd, buf) #define fstat(fd, buf) __fxstat (_STAT_VER, fd, buf) #define __fstat(fd, buf) __fxstat (_STAT_VER, fd, buf) +#define __fstatat(dfd, fname, buf, flag) \ + __fxstatat (_STAT_VER, dfd, fname, buf, flag) #endif diff --git a/sysdeps/mach/hurd/rewinddir.c b/sysdeps/mach/hurd/rewinddir.c index 69564536c9..322844e639 100644 --- a/sysdeps/mach/hurd/rewinddir.c +++ b/sysdeps/mach/hurd/rewinddir.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1994, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1994, 1997, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,10 +22,10 @@ #include /* Rewind DIRP to the beginning of the directory. */ -/* XXX should be __rewinddir ? */ void rewinddir (dirp) DIR *dirp; { seekdir (dirp, (off_t) 0L); } +libc_hidden_def (rewinddir) diff --git a/sysdeps/posix/getcwd.c b/sysdeps/posix/getcwd.c index f793521340..f683158284 100644 --- a/sysdeps/posix/getcwd.c +++ b/sysdeps/posix/getcwd.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991,92,93,94,95,96,97,98,99 Free Software Foundation, Inc. +/* Copyright (C) 1991,92,93,94,95,96,97,98,99,11 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -35,6 +35,7 @@ #endif #include +#include #include #include @@ -171,6 +172,13 @@ extern char *alloca (); # include #endif +#if defined _LIBC && !defined NOT_IN_libc +# include +#else +# define openat_not_cancel_3(dfd, name, mode) openat (dfd, name, mode) +# define close_not_cancel_no_status(fd) close (fd) +#endif + #ifndef PATH_MAX # ifdef MAXPATHLEN # define PATH_MAX MAXPATHLEN @@ -200,6 +208,12 @@ extern char *alloca (); # define GETCWD_RETURN_TYPE char * #endif +#ifdef __ASSUME_ATFCTS +# define have_openat 1 +#else +static int have_openat = 0; +#endif + /* Get the pathname of the current working directory, and put it in SIZE bytes of BUF. Returns NULL if the directory couldn't be determined or SIZE was too small. If successful, returns BUF. In GNU, if BUF is @@ -211,6 +225,7 @@ __getcwd (buf, size) char *buf; size_t size; { +#ifndef __ASSUME_ATFCTS static const char dots[] = "../../../../../../../../../../../../../../../../../../../../../../../\ ../../../../../../../../../../../../../../../../../../../../../../../../../../\ @@ -218,14 +233,15 @@ __getcwd (buf, size) const char *dotp = &dots[sizeof (dots)]; const char *dotlist = dots; size_t dotsize = sizeof (dots) - 1; - dev_t rootdev, thisdev; - ino_t rootino, thisino; - char *path; - register char *pathp; - struct stat st; +#endif int prev_errno = errno; - size_t allocated = size; + DIR *dirstream = NULL; + bool fd_needs_closing = false; + int fd = AT_FDCWD; + char *path; +#ifndef NO_ALLOCATION + size_t allocated = size; if (size == 0) { if (buf != NULL) @@ -237,189 +253,285 @@ __getcwd (buf, size) allocated = PATH_MAX + 1; } - if (buf != NULL) - path = buf; - else + if (buf == NULL) { path = malloc (allocated); if (path == NULL) return NULL; } + else +#else +# define allocated size +#endif + path = buf; - pathp = path + allocated; + char *pathp = path + allocated; *--pathp = '\0'; + struct stat st; if (__lstat (".", &st) < 0) - goto lose2; - thisdev = st.st_dev; - thisino = st.st_ino; + goto lose; + dev_t thisdev = st.st_dev; + ino_t thisino = st.st_ino; if (__lstat ("/", &st) < 0) - goto lose2; - rootdev = st.st_dev; - rootino = st.st_ino; + goto lose; + dev_t rootdev = st.st_dev; + ino_t rootino = st.st_ino; while (!(thisdev == rootdev && thisino == rootino)) { - register DIR *dirstream; - struct dirent *d; - dev_t dotdev; - ino_t dotino; - char mount_point; - - /* Look at the parent directory. */ - if (dotp == dotlist) + if (have_openat >= 0) { - /* My, what a deep directory tree you have, Grandma. */ - char *new; - if (dotlist == dots) - { - new = malloc (dotsize * 2 + 1); - if (new == NULL) - goto lose; -#ifdef HAVE_MEMPCPY - dotp = mempcpy (new, dots, dotsize); -#else - memcpy (new, dots, dotsize); - dotp = &new[dotsize]; + int mode = O_RDONLY; +#ifdef O_CLOEXEC + mode |= O_CLOEXEC; #endif - } - else + fd = openat_not_cancel_3 (fd, "..", mode); + } + else + fd = -1; + if (fd >= 0) + { + fd_needs_closing = true; + if (__fstat (fd, &st) < 0) + goto lose; + } +#ifndef __ASSUME_ATFCTS + else if (errno == ENOSYS) + { + have_openat = -1; + + /* Look at the parent directory. */ + if (dotp == dotlist) { - new = realloc ((__ptr_t) dotlist, dotsize * 2 + 1); - if (new == NULL) - goto lose; - dotp = &new[dotsize]; +# ifdef NO_ALLOCATION + __set_errno (ENOMEM); + goto lose; +# else + /* My, what a deep directory tree you have, Grandma. */ + char *new; + if (dotlist == dots) + { + new = malloc (dotsize * 2 + 1); + if (new == NULL) + goto lose; +# ifdef HAVE_MEMPCPY + dotp = mempcpy (new, dots, dotsize); +# else + memcpy (new, dots, dotsize); + dotp = &new[dotsize]; +# endif + } + else + { + new = realloc ((__ptr_t) dotlist, dotsize * 2 + 1); + if (new == NULL) + goto lose; + dotp = &new[dotsize]; + } +# ifdef HAVE_MEMPCPY + *((char *) mempcpy ((char *) dotp, new, dotsize)) = '\0'; + dotsize *= 2; +# else + memcpy ((char *) dotp, new, dotsize); + dotsize *= 2; + new[dotsize] = '\0'; +# endif + dotlist = new; +# endif } -#ifdef HAVE_MEMPCPY - *((char *) mempcpy ((char *) dotp, new, dotsize)) = '\0'; - dotsize *= 2; -#else - memcpy ((char *) dotp, new, dotsize); - dotsize *= 2; - new[dotsize] = '\0'; -#endif - dotlist = new; - } - dotp -= 3; + dotp -= 3; - /* Figure out if this directory is a mount point. */ - if (__lstat (dotp, &st) < 0) + /* Figure out if this directory is a mount point. */ + if (__lstat (dotp, &st) < 0) + goto lose; + } +#endif + else goto lose; - dotdev = st.st_dev; - dotino = st.st_ino; - mount_point = dotdev != thisdev; + + if (dirstream && __closedir (dirstream) != 0) + { + dirstream = NULL; + goto lose; + } + + dev_t dotdev = st.st_dev; + ino_t dotino = st.st_ino; + bool mount_point = dotdev != thisdev; /* Search for the last directory. */ - dirstream = __opendir (dotp); + if (have_openat >= 0) + dirstream = __fdopendir (fd); +#ifndef __ASSUME_ATFCTS + else + dirstream = __opendir (dotp); +#endif if (dirstream == NULL) goto lose; - /* Clear errno to distinguish EOF from error if readdir returns - NULL. */ - __set_errno (0); - while ((d = __readdir (dirstream)) != NULL) + fd_needs_closing = false; + + struct dirent *d; + bool use_d_ino = true; + while (1) { - if (d->d_name[0] == '.' && - (d->d_name[1] == '\0' || - (d->d_name[1] == '.' && d->d_name[2] == '\0'))) + /* Clear errno to distinguish EOF from error if readdir returns + NULL. */ + __set_errno (0); + d = __readdir (dirstream); + if (d == NULL) + { + if (errno == 0) + { + /* When we've iterated through all directory entries + without finding one with a matching d_ino, rewind the + stream and consider each name again, but this time, using + lstat. This is necessary in a chroot on at least one + system. */ + if (use_d_ino) + { + use_d_ino = false; + rewinddir (dirstream); + continue; + } + + /* EOF on dirstream, which means that the current directory + has been removed. */ + __set_errno (ENOENT); + } + goto lose; + } + + if (d->d_type != DT_DIR && d->d_type != DT_UNKNOWN) + continue; + if (d->d_name[0] == '.' + && (d->d_name[1] == '\0' + || (d->d_name[1] == '.' && d->d_name[2] == '\0'))) continue; - if (mount_point || (ino_t) d->d_ino == thisino) + if (use_d_ino && !mount_point && (ino_t) d->d_ino != thisino) + continue; + + if (have_openat >= 0) + { + /* We don't fail here if we cannot stat() a directory entry. + This can happen when (network) filesystems fail. If this + entry is in fact the one we are looking for we will find + out soon as we reach the end of the directory without + having found anything. */ + if (__fstatat (fd, d->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + continue; + } +#ifndef __ASSUME_ATFCTS + else { char name[dotlist + dotsize - dotp + 1 + _D_ALLOC_NAMLEN (d)]; -#ifdef HAVE_MEMPCPY +# ifdef HAVE_MEMPCPY char *tmp = mempcpy (name, dotp, dotlist + dotsize - dotp); *tmp++ = '/'; strcpy (tmp, d->d_name); -#else +# else memcpy (name, dotp, dotlist + dotsize - dotp); name[dotlist + dotsize - dotp] = '/'; strcpy (&name[dotlist + dotsize - dotp + 1], d->d_name); -#endif +# endif /* We don't fail here if we cannot stat() a directory entry. This can happen when (network) filesystems fail. If this entry is in fact the one we are looking for we will find out soon as we reach the end of the directory without having found anything. */ - if (__lstat (name, &st) >= 0 - && st.st_dev == thisdev && st.st_ino == thisino) - break; + if (__lstat (name, &st) < 0) + continue; } +#endif + if (S_ISDIR (st.st_mode) + && st.st_dev == thisdev && st.st_ino == thisino) + break; } - if (d == NULL) - { - int save = errno; - (void) __closedir (dirstream); - if (save == 0) - /* EOF on dirstream, which means that the current directory - has been removed. */ - save = ENOENT; - __set_errno (save); - goto lose; - } - else - { - size_t namlen = _D_EXACT_NAMLEN (d); - if ((size_t) (pathp - path) <= namlen) + size_t namlen = _D_EXACT_NAMLEN (d); + + if ((size_t) (pathp - path) <= namlen) + { +#ifndef NO_ALLOCATION + if (size == 0) { - if (size != 0) - { - (void) __closedir (dirstream); - __set_errno (ERANGE); - goto lose; - } - else - { - char *tmp; - size_t oldsize = allocated; + size_t oldsize = allocated; - allocated = 2 * MAX (allocated, namlen); - tmp = realloc (path, allocated); - if (tmp == NULL) - { - (void) __closedir (dirstream); - __set_errno (ENOMEM);/* closedir might have changed it.*/ - goto lose; - } + allocated = 2 * MAX (allocated, namlen); + char *tmp = realloc (path, allocated); + if (tmp == NULL) + goto lose; - /* Move current contents up to the end of the buffer. - This is guaranteed to be non-overlapping. */ - pathp = memcpy (tmp + allocated - (path + oldsize - pathp), - tmp + (pathp - path), - path + oldsize - pathp); - path = tmp; - } + /* Move current contents up to the end of the buffer. + This is guaranteed to be non-overlapping. */ + pathp = memcpy (tmp + allocated - (path + oldsize - pathp), + tmp + (pathp - path), + path + oldsize - pathp); + path = tmp; + } + else +#endif + { + __set_errno (ERANGE); + goto lose; } - pathp -= namlen; - (void) memcpy (pathp, d->d_name, namlen); - *--pathp = '/'; - (void) __closedir (dirstream); } + pathp -= namlen; + (void) memcpy (pathp, d->d_name, namlen); + *--pathp = '/'; thisdev = dotdev; thisino = dotino; } + if (dirstream != NULL && __closedir (dirstream) != 0) + { + dirstream = NULL; + goto lose; + } + if (pathp == &path[allocated - 1]) *--pathp = '/'; +#ifndef __ASSUME_ATFCTS if (dotlist != dots) free ((__ptr_t) dotlist); +#endif - memmove (path, pathp, path + allocated - pathp); + size_t used = path + allocated - pathp; + memmove (path, pathp, used); + + if (size == 0) + /* Ensure that the buffer is only as large as necessary. */ + buf = realloc (path, used); + + if (buf == NULL) + /* Either buf was NULL all along, or `realloc' failed but + we still have the original string. */ + buf = path; /* Restore errno on successful return. */ __set_errno (prev_errno); - return path; + return buf; - lose: + lose:; + int save_errno = errno; +#ifndef __ASSUME_ATFCTS if (dotlist != dots) free ((__ptr_t) dotlist); - lose2: +#endif + if (dirstream != NULL) + __closedir (dirstream); + if (fd_needs_closing) + close_not_cancel_no_status (fd); +#ifndef NO_ALLOCATION if (buf == NULL) free (path); +#endif + __set_errno (save_errno); return NULL; } diff --git a/sysdeps/unix/rewinddir.c b/sysdeps/unix/rewinddir.c index 051e93595e..89b0e6d20d 100644 --- a/sysdeps/unix/rewinddir.c +++ b/sysdeps/unix/rewinddir.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1995-1998, 2005 Free Software Foundation, Inc. +/* Copyright (C) 1991, 1995-1998, 2005, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -35,3 +35,4 @@ rewinddir (dirp) dirp->size = 0; __libc_lock_unlock (dirp->lock); } +libc_hidden_def (rewinddir) diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 7066ffe6da..61fbfb4fc8 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -147,7 +147,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \ endif ifeq ($(subdir),elf) -sysdep-rtld-routines += dl-brk dl-sbrk +sysdep-rtld-routines += dl-brk dl-sbrk dl-getcwd CPPFLAGS-lddlibc4 += -DNOT_IN_libc endif diff --git a/sysdeps/unix/sysv/linux/dl-getcwd.c b/sysdeps/unix/sysv/linux/dl-getcwd.c new file mode 100644 index 0000000000..4bd5657f1e --- /dev/null +++ b/sysdeps/unix/sysv/linux/dl-getcwd.c @@ -0,0 +1 @@ +#include "getcwd.c" diff --git a/sysdeps/unix/sysv/linux/getcwd.c b/sysdeps/unix/sysv/linux/getcwd.c index 911d85f43d..db3e292964 100644 --- a/sysdeps/unix/sysv/linux/getcwd.c +++ b/sysdeps/unix/sysv/linux/getcwd.c @@ -1,5 +1,5 @@ /* Determine current working directory. Linux version. - Copyright (C) 1997,1998,1999,2000,2002,2003,2006 + Copyright (C) 1997,1998,1999,2000,2002,2003,2006,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -45,20 +45,13 @@ compiling under 2.1.92+ the libc still runs under older kernels. */ # define no_syscall_getcwd 0 # define have_new_dcache 1 -/* This is a trick since we don't define generic_getcwd. */ -# define generic_getcwd getcwd #else -/* The "proc" filesystem provides an easy method to retrieve the value. - For each process, the corresponding directory contains a symbolic link - named `cwd'. Reading the content of this link immediate gives us the - information. But we have to take care for systems which do not have - the proc filesystem mounted. Use the POSIX implementation in this case. */ -static char *generic_getcwd (char *buf, size_t size) internal_function; - # if __NR_getcwd /* Kernel 2.1.92 introduced a third way to get the current working directory: a syscall. We've got to be careful that even when - compiling under 2.1.92+ the libc still runs under older kernels. */ + compiling under 2.1.92+ the libc still runs under older kernels. + An additional problem is that the system call does not return + the path of directories longer than one page. */ static int no_syscall_getcwd; static int have_new_dcache; # else @@ -67,6 +60,13 @@ static int have_new_dcache = 1; # endif #endif +/* The "proc" filesystem provides an easy method to retrieve the value. + For each process, the corresponding directory contains a symbolic link + named `cwd'. Reading the content of this link immediate gives us the + information. But we have to take care for systems which do not have + the proc filesystem mounted. Use the POSIX implementation in this case. */ +static char *generic_getcwd (char *buf, size_t size) internal_function; + char * __getcwd (char *buf, size_t size) { @@ -124,6 +124,33 @@ __getcwd (char *buf, size_t size) return buf; } + // XXX This should not be necessary but the full getcwd implementation + // drags in too much for the current build proces of ld.so to handle +#ifndef NOT_IN_libc + /* The system call cannot handle paths longer than a page. + Neither can the magic symlink in /proc/self. Just use the + generic implementation right away. */ + if (errno == ENAMETOOLONG) + { +# ifndef NO_ALLOCATION + if (buf == NULL && size == 0) + { + free (path); + path = NULL; + } +# endif + + result = generic_getcwd (path, size); + +# ifndef NO_ALLOCATION + if (result == NULL && buf == NULL && size != 0) + free (path); +# endif + + return result; + } +#endif + # if __ASSUME_GETCWD_SYSCALL /* It should never happen that the `getcwd' syscall failed because the buffer is too small if we allocated the buffer ourselves @@ -196,7 +223,7 @@ __getcwd (char *buf, size_t size) #ifndef NO_ALLOCATION /* Don't put restrictions on the length of the path unless the user does. */ - if (size == 0) + if (buf == NULL && size == 0) { free (path); path = NULL; @@ -214,9 +241,11 @@ __getcwd (char *buf, size_t size) } weak_alias (__getcwd, getcwd) -#if __ASSUME_GETCWD_SYSCALL == 0 + // XXX This should not be necessary but the full getcwd implementation + // drags in too much for the current build proces of ld.so to handle +#ifndef NOT_IN_libc /* Get the code for the generic version. */ -# define GETCWD_RETURN_TYPE static char * internal_function -# define __getcwd generic_getcwd -# include +#define GETCWD_RETURN_TYPE static char * internal_function +#define __getcwd generic_getcwd +#include #endif