Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch 'sb/submodule-parallel-update'
A major part of "git submodule update" has been ported to C to take
advantage of the recently added framework to run download tasks in
parallel.

* sb/submodule-parallel-update:
  clone: allow an explicit argument for parallel submodule clones
  submodule update: expose parallelism to the user
  submodule helper: remove double 'fatal: ' prefix
  git submodule update: have a dedicated helper for cloning
  run_processes_parallel: rename parameters for the callbacks
  run_processes_parallel: treat output of children as byte array
  submodule update: direct error message to stderr
  fetching submodules: respect `submodule.fetchJobs` config option
  submodule-config: drop check against NULL
  submodule-config: keep update strategy around
  • Loading branch information
Junio C Hamano committed Apr 6, 2016
2 parents 77e0751 + 72290d6 commit bdebbeb
Show file tree
Hide file tree
Showing 18 changed files with 445 additions and 62 deletions.
6 changes: 6 additions & 0 deletions Documentation/config.txt
Expand Up @@ -2738,6 +2738,12 @@ submodule.<name>.ignore::
"--ignore-submodules" option. The 'git submodule' commands are not
affected by this setting.

submodule.fetchJobs::
Specifies how many submodules are fetched/cloned at the same time.
A positive integer allows up to that number of submodules fetched
in parallel. A value of 0 will give some reasonable default.
If unset, it defaults to 1.

tag.sort::
This variable controls the sort ordering of tags when displayed by
linkgit:git-tag[1]. Without the "--sort=<value>" option provided, the
Expand Down
6 changes: 5 additions & 1 deletion Documentation/git-clone.txt
Expand Up @@ -14,7 +14,7 @@ SYNOPSIS
[-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
[--dissociate] [--separate-git-dir <git dir>]
[--depth <depth>] [--[no-]single-branch]
[--recursive | --recurse-submodules] [--] <repository>
[--recursive | --recurse-submodules] [--jobs <n>] [--] <repository>
[<directory>]

DESCRIPTION
Expand Down Expand Up @@ -219,6 +219,10 @@ objects from the source repository into a pack in the cloned repository.
The result is Git repository can be separated from working
tree.

-j <n>::
--jobs <n>::
The number of submodules fetched at the same time.
Defaults to the `submodule.fetchJobs` option.

<repository>::
The (possibly remote) repository to clone from. See the
Expand Down
7 changes: 6 additions & 1 deletion Documentation/git-submodule.txt
Expand Up @@ -16,7 +16,7 @@ SYNOPSIS
'git submodule' [--quiet] deinit [-f|--force] [--] <path>...
'git submodule' [--quiet] update [--init] [--remote] [-N|--no-fetch]
[-f|--force] [--rebase|--merge] [--reference <repository>]
[--depth <depth>] [--recursive] [--] [<path>...]
[--depth <depth>] [--recursive] [--jobs <n>] [--] [<path>...]
'git submodule' [--quiet] summary [--cached|--files] [(-n|--summary-limit) <n>]
[commit] [--] [<path>...]
'git submodule' [--quiet] foreach [--recursive] <command>
Expand Down Expand Up @@ -377,6 +377,11 @@ for linkgit:git-clone[1]'s `--reference` and `--shared` options carefully.
clone with a history truncated to the specified number of revisions.
See linkgit:git-clone[1]

-j <n>::
--jobs <n>::
This option is only valid for the update command.
Clone new submodules in parallel with as many jobs.
Defaults to the `submodule.fetchJobs` option.

<path>...::
Paths to submodule(s). When specified this will restrict the command
Expand Down
19 changes: 13 additions & 6 deletions builtin/clone.c
Expand Up @@ -51,6 +51,7 @@ static enum transport_family family;
static struct string_list option_config;
static struct string_list option_reference;
static int option_dissociate;
static int max_jobs = -1;

static struct option builtin_clone_options[] = {
OPT__VERBOSITY(&option_verbosity),
Expand All @@ -73,6 +74,8 @@ static struct option builtin_clone_options[] = {
N_("initialize submodules in the clone")),
OPT_BOOL(0, "recurse-submodules", &option_recursive,
N_("initialize submodules in the clone")),
OPT_INTEGER('j', "jobs", &max_jobs,
N_("number of submodules cloned in parallel")),
OPT_STRING(0, "template", &option_template, N_("template-directory"),
N_("directory from which templates will be used")),
OPT_STRING_LIST(0, "reference", &option_reference, N_("repo"),
Expand Down Expand Up @@ -100,10 +103,6 @@ static struct option builtin_clone_options[] = {
OPT_END()
};

static const char *argv_submodule[] = {
"submodule", "update", "--init", "--recursive", NULL
};

static const char *get_repo_path_1(struct strbuf *path, int *is_bundle)
{
static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
Expand Down Expand Up @@ -732,8 +731,16 @@ static int checkout(void)
err |= run_hook_le(NULL, "post-checkout", sha1_to_hex(null_sha1),
sha1_to_hex(sha1), "1", NULL);

if (!err && option_recursive)
err = run_command_v_opt(argv_submodule, RUN_GIT_CMD);
if (!err && option_recursive) {
struct argv_array args = ARGV_ARRAY_INIT;
argv_array_pushl(&args, "submodule", "update", "--init", "--recursive", NULL);

if (max_jobs != -1)
argv_array_pushf(&args, "--jobs=%d", max_jobs);

err = run_command_v_opt(args.argv, RUN_GIT_CMD);
argv_array_clear(&args);
}

return err;
}
Expand Down
2 changes: 1 addition & 1 deletion builtin/fetch.c
Expand Up @@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */
static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity;
static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
static int tags = TAGS_DEFAULT, unshallow, update_shallow;
static int max_children = 1;
static int max_children = -1;
static enum transport_family family;
static const char *depth;
static const char *upload_pack;
Expand Down
256 changes: 254 additions & 2 deletions builtin/submodule--helper.c
Expand Up @@ -249,6 +249,257 @@ static int module_clone(int argc, const char **argv, const char *prefix)
return 0;
}

struct submodule_update_clone {
/* index into 'list', the list of submodules to look into for cloning */
int current;
struct module_list list;
unsigned warn_if_uninitialized : 1;

/* update parameter passed via commandline */
struct submodule_update_strategy update;

/* configuration parameters which are passed on to the children */
int quiet;
const char *reference;
const char *depth;
const char *recursive_prefix;
const char *prefix;

/* Machine-readable status lines to be consumed by git-submodule.sh */
struct string_list projectlines;

/* If we want to stop as fast as possible and return an error */
unsigned quickstop : 1;
};
#define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
SUBMODULE_UPDATE_STRATEGY_INIT, 0, NULL, NULL, NULL, NULL, \
STRING_LIST_INIT_DUP, 0}

/**
* Determine whether 'ce' needs to be cloned. If so, prepare the 'child' to
* run the clone. Returns 1 if 'ce' needs to be cloned, 0 otherwise.
*/
static int prepare_to_clone_next_submodule(const struct cache_entry *ce,
struct child_process *child,
struct submodule_update_clone *suc,
struct strbuf *out)
{
const struct submodule *sub = NULL;
struct strbuf displaypath_sb = STRBUF_INIT;
struct strbuf sb = STRBUF_INIT;
const char *displaypath = NULL;
char *url = NULL;
int needs_cloning = 0;

if (ce_stage(ce)) {
if (suc->recursive_prefix)
strbuf_addf(&sb, "%s/%s", suc->recursive_prefix, ce->name);
else
strbuf_addf(&sb, "%s", ce->name);
strbuf_addf(out, _("Skipping unmerged submodule %s"), sb.buf);
strbuf_addch(out, '\n');
goto cleanup;
}

sub = submodule_from_path(null_sha1, ce->name);

if (suc->recursive_prefix)
displaypath = relative_path(suc->recursive_prefix,
ce->name, &displaypath_sb);
else
displaypath = ce->name;

if (suc->update.type == SM_UPDATE_NONE
|| (suc->update.type == SM_UPDATE_UNSPECIFIED
&& sub->update_strategy.type == SM_UPDATE_NONE)) {
strbuf_addf(out, _("Skipping submodule '%s'"), displaypath);
strbuf_addch(out, '\n');
goto cleanup;
}

/*
* Looking up the url in .git/config.
* We must not fall back to .gitmodules as we only want
* to process configured submodules.
*/
strbuf_reset(&sb);
strbuf_addf(&sb, "submodule.%s.url", sub->name);
git_config_get_string(sb.buf, &url);
if (!url) {
/*
* Only mention uninitialized submodules when their
* path have been specified
*/
if (suc->warn_if_uninitialized) {
strbuf_addf(out,
_("Submodule path '%s' not initialized"),
displaypath);
strbuf_addch(out, '\n');
strbuf_addstr(out,
_("Maybe you want to use 'update --init'?"));
strbuf_addch(out, '\n');
}
goto cleanup;
}

strbuf_reset(&sb);
strbuf_addf(&sb, "%s/.git", ce->name);
needs_cloning = !file_exists(sb.buf);

strbuf_reset(&sb);
strbuf_addf(&sb, "%06o %s %d %d\t%s\n", ce->ce_mode,
sha1_to_hex(ce->sha1), ce_stage(ce),
needs_cloning, ce->name);
string_list_append(&suc->projectlines, sb.buf);

if (!needs_cloning)
goto cleanup;

child->git_cmd = 1;
child->no_stdin = 1;
child->stdout_to_stderr = 1;
child->err = -1;
argv_array_push(&child->args, "submodule--helper");
argv_array_push(&child->args, "clone");
if (suc->quiet)
argv_array_push(&child->args, "--quiet");
if (suc->prefix)
argv_array_pushl(&child->args, "--prefix", suc->prefix, NULL);
argv_array_pushl(&child->args, "--path", sub->path, NULL);
argv_array_pushl(&child->args, "--name", sub->name, NULL);
argv_array_pushl(&child->args, "--url", url, NULL);
if (suc->reference)
argv_array_push(&child->args, suc->reference);
if (suc->depth)
argv_array_push(&child->args, suc->depth);

cleanup:
free(url);
strbuf_reset(&displaypath_sb);
strbuf_reset(&sb);

return needs_cloning;
}

static int update_clone_get_next_task(struct child_process *child,
struct strbuf *err,
void *suc_cb,
void **void_task_cb)
{
struct submodule_update_clone *suc = suc_cb;

for (; suc->current < suc->list.nr; suc->current++) {
const struct cache_entry *ce = suc->list.entries[suc->current];
if (prepare_to_clone_next_submodule(ce, child, suc, err)) {
suc->current++;
return 1;
}
}
return 0;
}

static int update_clone_start_failure(struct strbuf *err,
void *suc_cb,
void *void_task_cb)
{
struct submodule_update_clone *suc = suc_cb;
suc->quickstop = 1;
return 1;
}

static int update_clone_task_finished(int result,
struct strbuf *err,
void *suc_cb,
void *void_task_cb)
{
struct submodule_update_clone *suc = suc_cb;

if (!result)
return 0;

suc->quickstop = 1;
return 1;
}

static int update_clone(int argc, const char **argv, const char *prefix)
{
const char *update = NULL;
int max_jobs = -1;
struct string_list_item *item;
struct pathspec pathspec;
struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT;

struct option module_update_clone_options[] = {
OPT_STRING(0, "prefix", &prefix,
N_("path"),
N_("path into the working tree")),
OPT_STRING(0, "recursive-prefix", &suc.recursive_prefix,
N_("path"),
N_("path into the working tree, across nested "
"submodule boundaries")),
OPT_STRING(0, "update", &update,
N_("string"),
N_("rebase, merge, checkout or none")),
OPT_STRING(0, "reference", &suc.reference, N_("repo"),
N_("reference repository")),
OPT_STRING(0, "depth", &suc.depth, "<depth>",
N_("Create a shallow clone truncated to the "
"specified number of revisions")),
OPT_INTEGER('j', "jobs", &max_jobs,
N_("parallel jobs")),
OPT__QUIET(&suc.quiet, N_("don't print cloning progress")),
OPT_END()
};

const char *const git_submodule_helper_usage[] = {
N_("git submodule--helper update_clone [--prefix=<path>] [<path>...]"),
NULL
};
suc.prefix = prefix;

argc = parse_options(argc, argv, prefix, module_update_clone_options,
git_submodule_helper_usage, 0);

if (update)
if (parse_submodule_update_strategy(update, &suc.update) < 0)
die(_("bad value for update parameter"));

if (module_list_compute(argc, argv, prefix, &pathspec, &suc.list) < 0)
return 1;

if (pathspec.nr)
suc.warn_if_uninitialized = 1;

/* Overlay the parsed .gitmodules file with .git/config */
gitmodules_config();
git_config(submodule_config, NULL);

if (max_jobs < 0)
max_jobs = parallel_submodules();

run_processes_parallel(max_jobs,
update_clone_get_next_task,
update_clone_start_failure,
update_clone_task_finished,
&suc);

/*
* We saved the output and put it out all at once now.
* That means:
* - the listener does not have to interleave their (checkout)
* work with our fetching. The writes involved in a
* checkout involve more straightforward sequential I/O.
* - the listener can avoid doing any work if fetching failed.
*/
if (suc.quickstop)
return 1;

for_each_string_list_item(item, &suc.projectlines)
utf8_fprintf(stdout, "%s", item->string);

return 0;
}

struct cmd_struct {
const char *cmd;
int (*fn)(int, const char **, const char *);
Expand All @@ -258,19 +509,20 @@ static struct cmd_struct commands[] = {
{"list", module_list},
{"name", module_name},
{"clone", module_clone},
{"update-clone", update_clone}
};

int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
{
int i;
if (argc < 2)
die(_("fatal: submodule--helper subcommand must be "
die(_("submodule--helper subcommand must be "
"called with a subcommand"));

for (i = 0; i < ARRAY_SIZE(commands); i++)
if (!strcmp(argv[1], commands[i].cmd))
return commands[i].fn(argc - 1, argv + 1, prefix);

die(_("fatal: '%s' is not a valid submodule--helper "
die(_("'%s' is not a valid submodule--helper "
"subcommand"), argv[1]);
}

0 comments on commit bdebbeb

Please sign in to comment.