From 9a9592ff7c8a8f0e449515c158e4a5a4895c5c23 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 06:00:01 -0400 Subject: [PATCH 1/7] wt-status: don't flush before running "submodule status" This is a holdover from the original implementation in ac8d5af (builtin-status: submodule summary support, 2008-04-12), which just had the sub-process output to our descriptor; we had to make sure we had flushed any data that we produced before it started writing. Since 3ba7407 (submodule summary: ignore --for-status option, 2013-09-06), however, we pipe the sub-process output back to ourselves. So there's no longer any need to flush (it does not hurt, but it may leave readers wondering why we do it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- wt-status.c | 1 - 1 file changed, 1 deletion(-) diff --git a/wt-status.c b/wt-status.c index 29666d0db..08d40d202 100644 --- a/wt-status.c +++ b/wt-status.c @@ -745,7 +745,6 @@ static void wt_status_print_submodule_summary(struct wt_status *s, int uncommitt sm_summary.git_cmd = 1; sm_summary.no_stdin = 1; - fflush(s->fp); sm_summary.out = -1; run_command(&sm_summary); From d56d966b3b03d2849ef9e20cacd7965106e8fdf0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 06:00:32 -0400 Subject: [PATCH 2/7] wt_status: fix signedness mismatch in strbuf_read call We call strbuf_read(), and want to know whether we got any output. To do so, we assign the result to a size_t, and check whether it is non-zero. But strbuf_read returns a signed ssize_t. If it encounters an error, it will return -1, and we'll end up treating this the same as if we had gotten output. Instead, we can just check whether our buffer has anything in it (which is what we care about anyway, and is the same thing since we know the buffer was empty to begin with). Note that the "len" variable actually has two roles in this function. Now that we've eliminated the first, we can push the declaration closer to the point of use for the second one. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- wt-status.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wt-status.c b/wt-status.c index 08d40d202..05b69dc4d 100644 --- a/wt-status.c +++ b/wt-status.c @@ -729,7 +729,6 @@ static void wt_status_print_submodule_summary(struct wt_status *s, int uncommitt struct strbuf cmd_stdout = STRBUF_INIT; struct strbuf summary = STRBUF_INIT; char *summary_content; - size_t len; argv_array_pushf(&sm_summary.env_array, "GIT_INDEX_FILE=%s", s->index_file); @@ -749,10 +748,10 @@ static void wt_status_print_submodule_summary(struct wt_status *s, int uncommitt run_command(&sm_summary); - len = strbuf_read(&cmd_stdout, sm_summary.out, 1024); + strbuf_read(&cmd_stdout, sm_summary.out, 1024); /* prepend header, only if there's an actual output */ - if (len) { + if (cmd_stdout.len) { if (uncommitted) strbuf_addstr(&summary, _("Submodules changed but not updated:")); else @@ -763,6 +762,7 @@ static void wt_status_print_submodule_summary(struct wt_status *s, int uncommitt strbuf_release(&cmd_stdout); if (s->display_comment_prefix) { + size_t len; summary_content = strbuf_detach(&summary, &len); strbuf_add_commented_lines(&summary, summary_content, len); free(summary_content); From 911ec99b688fc4d5673a0fc8984b22ff2251e490 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 23:53:43 -0400 Subject: [PATCH 3/7] run-command: introduce capture_command helper Something as simple as reading the stdout from a command turns out to be rather hard to do right. Doing: cmd.out = -1; run_command(&cmd); strbuf_read(&buf, cmd.out, 0); can result in deadlock if the child process produces a large amount of output. What happens is: 1. The parent spawns the child with its stdout connected to a pipe, of which the parent is the sole reader. 2. The parent calls wait(), blocking until the child exits. 3. The child writes to stdout. If it writes more data than the OS pipe buffer can hold, the write() call will block. This is a deadlock; the parent is waiting for the child to exit, and the child is waiting for the parent to call read(). So we might try instead: start_command(&cmd); strbuf_read(&buf, cmd.out, 0); finish_command(&cmd); But that is not quite right either. We are examining cmd.out and running finish_command whether start_command succeeded or not, which is wrong. Moreover, these snippets do not do any error handling. If our read() fails, we must make sure to still call finish_command (to reap the child process). And both snippets failed to close the cmd.out descriptor, which they must do (provided start_command succeeded). Let's introduce a run-command helper that can make this a bit simpler for callers to get right. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 16 ++++++++++++++++ run-command.h | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/run-command.c b/run-command.c index 0b432cc97..65ecbe31d 100644 --- a/run-command.c +++ b/run-command.c @@ -833,3 +833,19 @@ int run_hook_le(const char *const *env, const char *name, ...) return ret; } + +int capture_command(struct child_process *cmd, struct strbuf *buf, size_t hint) +{ + cmd->out = -1; + if (start_command(cmd) < 0) + return -1; + + if (strbuf_read(buf, cmd->out, hint) < 0) { + close(cmd->out); + finish_command(cmd); /* throw away exit code */ + return -1; + } + + close(cmd->out); + return finish_command(cmd); +} diff --git a/run-command.h b/run-command.h index d6868dc8c..263b9662a 100644 --- a/run-command.h +++ b/run-command.h @@ -71,6 +71,19 @@ int run_command_v_opt(const char **argv, int opt); */ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); +/** + * Execute the given command, capturing its stdout in the given strbuf. + * Returns -1 if starting the command fails or reading fails, and otherwise + * returns the exit code of the command. The output collected in the + * buffer is kept even if the command returns a non-zero exit. The hint field + * gives a starting size for the strbuf allocation. + * + * The fields of "cmd" should be set up as they would for a normal run_command + * invocation. But note that there is no need to set cmd->out; the function + * sets it up for the caller. + */ +int capture_command(struct child_process *cmd, struct strbuf *buf, size_t hint); + /* * The purpose of the following functions is to feed a pipe by running * a function asynchronously and providing output that the caller reads. From 5c950e9bf098b17bb37e06f7c9f50d24e9d2904f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 23:53:52 -0400 Subject: [PATCH 4/7] wt-status: use capture_command When we spawn "git submodule status" to read its output, we use run_command() followed by strbuf_read() read from the pipe. This can deadlock if the subprocess output is larger than the system pipe buffer. Furthermore, if start_command() fails, we'll try to read from a bogus descriptor (probably "-1" or a descriptor we just closed, but it is a bad idea for us to make assumptions about how start_command implements its error handling). And if start_command succeeds, we leak the file descriptor for the pipe to the child. All of these can be solved by using the capture_command helper. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- wt-status.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/wt-status.c b/wt-status.c index 05b69dc4d..ef232a74b 100644 --- a/wt-status.c +++ b/wt-status.c @@ -744,11 +744,8 @@ static void wt_status_print_submodule_summary(struct wt_status *s, int uncommitt sm_summary.git_cmd = 1; sm_summary.no_stdin = 1; - sm_summary.out = -1; - run_command(&sm_summary); - - strbuf_read(&cmd_stdout, sm_summary.out, 1024); + capture_command(&sm_summary, &cmd_stdout, 1024); /* prepend header, only if there's an actual output */ if (cmd_stdout.len) { From 1d4974c9bcbe3c9c0611cb056730d49c6b0b6b5e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 23:53:56 -0400 Subject: [PATCH 5/7] submodule: use capture_command In is_submodule_commit_present, we call run_command followed by a pipe read, which is prone to deadlock. It is unlikely to happen in this case, as rev-list should never produce more than a single line of output, but it does not hurt to avoid an anti-pattern (and using the helper simplifies the setup and cleanup). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- submodule.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/submodule.c b/submodule.c index d37d400b2..c0e6c81fc 100644 --- a/submodule.c +++ b/submodule.c @@ -576,12 +576,10 @@ static int is_submodule_commit_present(const char *path, unsigned char sha1[20]) cp.env = local_repo_env; cp.git_cmd = 1; cp.no_stdin = 1; - cp.out = -1; cp.dir = path; - if (!run_command(&cp) && !strbuf_read(&buf, cp.out, 1024)) + if (!capture_command(&cp, &buf, 1024) && !buf.len) is_present = 1; - close(cp.out); strbuf_release(&buf); } return is_present; From c5eadcaab1d3969a4fbc009c65be622271edddd9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 23:54:00 -0400 Subject: [PATCH 6/7] trailer: use capture_command When we read from a trailer.*.command sub-program, the current code uses run_command followed by a pipe read, which can result in deadlock (though in practice you would have to have a large trailer for this to be a problem). The current code also leaks the file descriptor for the pipe to the sub-command. Instead, let's use capture_command, which makes this simpler (and we can get rid of our custom helper). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- trailer.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/trailer.c b/trailer.c index 05b3859b4..4b14a567b 100644 --- a/trailer.c +++ b/trailer.c @@ -214,16 +214,6 @@ static struct trailer_item *remove_first(struct trailer_item **first) return item; } -static int read_from_command(struct child_process *cp, struct strbuf *buf) -{ - if (run_command(cp)) - return error("running trailer command '%s' failed", cp->argv[0]); - if (strbuf_read(buf, cp->out, 1024) < 1) - return error("reading from trailer command '%s' failed", cp->argv[0]); - strbuf_trim(buf); - return 0; -} - static const char *apply_command(const char *command, const char *arg) { struct strbuf cmd = STRBUF_INIT; @@ -240,14 +230,16 @@ static const char *apply_command(const char *command, const char *arg) cp.argv = argv; cp.env = local_repo_env; cp.no_stdin = 1; - cp.out = -1; cp.use_shell = 1; - if (read_from_command(&cp, &buf)) { + if (capture_command(&cp, &buf, 1024)) { + error("running trailer command '%s' failed", cmd.buf); strbuf_release(&buf); result = xstrdup(""); - } else + } else { + strbuf_trim(&buf); result = strbuf_detach(&buf, NULL); + } strbuf_release(&cmd); return result; From c29b3962af3df80a43fab4ead4875bd2ca275e4c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 22 Mar 2015 23:54:05 -0400 Subject: [PATCH 7/7] run-command: forbid using run_command with piped output Because run_command both spawns and wait()s for the command before returning control to the caller, any reads from the pipes we open must necessarily happen after wait() returns. This can lead to deadlock, as the child process may block on writing to us while we are blocked waiting for it to exit. Worse, it only happens when the child fills the pipe buffer, which means that the problem may come and go depending on the platform and the size of the output produced by the child. Let's detect and flag this dangerous construct so that we can catch potential bugs early in the test suite rather than having them happen in the field. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/run-command.c b/run-command.c index 65ecbe31d..4184e8d9f 100644 --- a/run-command.c +++ b/run-command.c @@ -561,7 +561,12 @@ int finish_command(struct child_process *cmd) int run_command(struct child_process *cmd) { - int code = start_command(cmd); + int code; + + if (cmd->out < 0 || cmd->err < 0) + die("BUG: run_command with a pipe can cause deadlock"); + + code = start_command(cmd); if (code) return code; return finish_command(cmd);