diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index e26f01fb1..0e08f567a 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git ls-files' [-z] [-t] [-v] (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])* (-[c|d|o|i|s|u|k|m])* + [--eol] [-x |--exclude=] [-X |--exclude-from=] [--exclude-per-directory=] @@ -147,6 +148,24 @@ a space) at the start of each line: possible for manual inspection; the exact format may change at any time. +--eol:: + Show and of files. + is the file content identification used by Git when + the "text" attribute is "auto" (or not set and core.autocrlf is not false). + is either "-text", "none", "lf", "crlf", "mixed" or "". ++ +"" means the file is not a regular file, it is not in the index or +not accessable in the working tree. ++ + is the attribute that is used when checking out or committing, +it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf". +Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf", +that may change in the future. ++ +Both the in the index ("i/") +and in the working tree ("w/") are shown for regular files, +followed by the ("attr/"). + \--:: Do not interpret any more arguments as options. @@ -161,6 +180,9 @@ which case it outputs: [ ] +'git ls-files --eol' will show + i/w/attr/ + 'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine detailed information on unmerged paths. diff --git a/builtin/ls-files.c b/builtin/ls-files.c index b6a7cb0c7..dedf02dc7 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -27,6 +27,7 @@ static int show_killed; static int show_valid_bit; static int line_terminator = '\n'; static int debug_mode; +static int show_eol; static const char *prefix; static int max_prefix_len; @@ -47,6 +48,23 @@ static const char *tag_modified = ""; static const char *tag_skip_worktree = ""; static const char *tag_resolve_undo = ""; +static void write_eolinfo(const struct cache_entry *ce, const char *path) +{ + if (!show_eol) + return; + else { + struct stat st; + const char *i_txt = ""; + const char *w_txt = ""; + const char *a_txt = get_convert_attr_ascii(path); + if (ce && S_ISREG(ce->ce_mode)) + i_txt = get_cached_convert_stats_ascii(ce->name); + if (!lstat(path, &st) && S_ISREG(st.st_mode)) + w_txt = get_wt_convert_stats_ascii(path); + printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt); + } +} + static void write_name(const char *name) { /* @@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent) return; fputs(tag, stdout); + write_eolinfo(NULL, ent->name); write_name(ent->name); } @@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce) find_unique_abbrev(ce->sha1,abbrev), ce_stage(ce)); } + write_eolinfo(ce, ce->name); write_name(ce->name); if (debug_mode) { const struct stat_data *sd = &ce->ce_stat_data; @@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) OPT_BIT(0, "directory", &dir.flags, N_("show 'other' directories' names only"), DIR_SHOW_OTHER_DIRECTORIES), + OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")), OPT_NEGBIT(0, "empty-directory", &dir.flags, N_("don't show empty directories"), DIR_HIDE_EMPTY_DIRECTORIES), diff --git a/convert.c b/convert.c index 814e81443..4bb4ec1d8 100644 --- a/convert.c +++ b/convert.c @@ -13,6 +13,11 @@ * translation when the "text" attribute or "auto_crlf" option is set. */ +/* Stat bits: When BIN is set, the txt bits are unset */ +#define CONVERT_STAT_BITS_TXT_LF 0x1 +#define CONVERT_STAT_BITS_TXT_CRLF 0x2 +#define CONVERT_STAT_BITS_BIN 0x4 + enum crlf_action { CRLF_GUESS = -1, CRLF_BINARY = 0, @@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat * /* * The same heuristics as diff.c::mmfile_is_binary() + * We treat files with bare CR as binary */ -static int is_binary(unsigned long size, struct text_stat *stats) +static int convert_is_binary(unsigned long size, const struct text_stat *stats) { - + if (stats->cr != stats->crlf) + return 1; if (stats->nul) return 1; if ((stats->printable >> 7) < stats->nonprintable) return 1; - /* - * Other heuristics? Average line length might be relevant, - * as might LF vs CR vs CRLF counts.. - * - * NOTE! It might be normal to have a low ratio of CRLF to LF - * (somebody starts with a LF-only file and edits it with an editor - * that adds CRLF only to lines that are added..). But do we - * want to support CR-only? Probably not. - */ return 0; } +static unsigned int gather_convert_stats(const char *data, unsigned long size) +{ + struct text_stat stats; + if (!data || !size) + return 0; + gather_stats(data, size, &stats); + if (convert_is_binary(size, &stats)) + return CONVERT_STAT_BITS_BIN; + else if (stats.crlf && stats.crlf == stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF; + else if (stats.crlf && stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF; + else if (stats.lf) + return CONVERT_STAT_BITS_TXT_LF; + else + return 0; +} + +static const char *gather_convert_stats_ascii(const char *data, unsigned long size) +{ + unsigned int convert_stats = gather_convert_stats(data, size); + + if (convert_stats & CONVERT_STAT_BITS_BIN) + return "-text"; + switch (convert_stats) { + case CONVERT_STAT_BITS_TXT_LF: + return "lf"; + case CONVERT_STAT_BITS_TXT_CRLF: + return "crlf"; + case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF: + return "mixed"; + default: + return "none"; + } +} + +const char *get_cached_convert_stats_ascii(const char *path) +{ + const char *ret; + unsigned long sz; + void *data = read_blob_data_from_cache(path, &sz); + ret = gather_convert_stats_ascii(data, sz); + free(data); + return ret; +} + +const char *get_wt_convert_stats_ascii(const char *path) +{ + const char *ret = ""; + struct strbuf sb = STRBUF_INIT; + if (strbuf_read_file(&sb, path, 0) >= 0) + ret = gather_convert_stats_ascii(sb.buf, sb.len); + strbuf_release(&sb); + return ret; +} + static enum eol output_eol(enum crlf_action crlf_action) { switch (crlf_action) { @@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, gather_stats(src, len, &stats); if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { - /* - * We're currently not going to even try to convert stuff - * that has bare CR characters. Does anybody do that crazy - * stuff? - */ - if (stats.cr != stats.crlf) - return 0; - - /* - * And add some heuristics for binary vs text, of course... - */ - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; if (crlf_action == CRLF_GUESS) { @@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, return 0; } - /* If we have any bare CR characters, we're not going to touch it */ - if (stats.cr != stats.crlf) - return 0; - - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; } @@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path) return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); } +const char *get_convert_attr_ascii(const char *path) +{ + struct conv_attrs ca; + enum crlf_action crlf_action; + + convert_attrs(&ca, path); + crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + switch (crlf_action) { + case CRLF_GUESS: + return ""; + case CRLF_BINARY: + return "-text"; + case CRLF_TEXT: + return "text"; + case CRLF_INPUT: + return "text eol=lf"; + case CRLF_CRLF: + return "text=auto eol=crlf"; + case CRLF_AUTO: + return "text=auto"; + } + return ""; +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { diff --git a/convert.h b/convert.h index d9d853cd3..ccf436bfb 100644 --- a/convert.h +++ b/convert.h @@ -32,6 +32,9 @@ enum eol { }; extern enum eol core_eol; +extern const char *get_cached_convert_stats_ascii(const char *path); +extern const char *get_wt_convert_stats_ascii(const char *path); +extern const char *get_convert_attr_ascii(const char *path); /* returns 1 if *dst was used */ extern int convert_to_git(const char *path, const char *src, size_t len, diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index b34365150..504e5a02a 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -56,21 +56,16 @@ create_gitattributes () { } create_NNO_files () { - lfname=$1 - crlfname=$2 - lfmixcrlf=$3 - lfmixcr=$4 - crlfnul=$5 for crlf in false true input do for attr in "" auto text -text lf crlf do pfx=NNO_${crlf}_attr_${attr} && - cp $lfname ${pfx}_LF.txt && - cp $crlfname ${pfx}_CRLF.txt && - cp $lfmixcrlf ${pfx}_CRLF_mix_LF.txt && - cp $lfmixcr ${pfx}_LF_mix_CR.txt && - cp $crlfnul ${pfx}_CRLF_nul.txt + cp CRLF_mix_LF ${pfx}_LF.txt && + cp CRLF_mix_LF ${pfx}_CRLF.txt && + cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt && + cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt && + cp CRLF_mix_LF ${pfx}_CRLF_nul.txt done done } @@ -96,7 +91,7 @@ commit_check_warn () { crlfnul=$7 pfx=crlf_${crlf}_attr_${attr} create_gitattributes "$attr" && - for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul + for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul do fname=${pfx}_$f.txt && cp $f $fname && @@ -149,6 +144,27 @@ commit_chk_wrnNNO () { ' } +stats_ascii () { + case "$1" in + LF) + echo lf + ;; + CRLF) + echo crlf + ;; + CRLF_mix_LF) + echo mixed + ;; + LF_mix_CR|CRLF_nul|LF_nul|CRLF_mix_CR) + echo "-text" + ;; + *) + echo error_invalid $1 + ;; + esac + +} + check_files_in_repo () { crlf=$1 attr=$2 @@ -203,35 +219,83 @@ checkout_files () { create_gitattributes $attr && git config core.autocrlf $crlf && pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ && - src=crlf_false_attr__ && for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul do - rm $src$f.txt && + rm crlf_false_attr__$f.txt && if test -z "$eol"; then - git checkout $src$f.txt + git checkout crlf_false_attr__$f.txt else - git -c core.eol=$eol checkout $src$f.txt + git -c core.eol=$eol checkout crlf_false_attr__$f.txt fi done + test_expect_success "ls-files --eol $lfname ${pfx}LF.txt" ' + test_when_finished "rm expect actual" && + sort <<-EOF >expect && + i/crlf w/$(stats_ascii $crlfname) crlf_false_attr__CRLF.txt + i/mixed w/$(stats_ascii $lfmixcrlf) crlf_false_attr__CRLF_mix_LF.txt + i/lf w/$(stats_ascii $lfname) crlf_false_attr__LF.txt + i/-text w/$(stats_ascii $lfmixcr) crlf_false_attr__LF_mix_CR.txt + i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__CRLF_nul.txt + i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__LF_nul.txt + EOF + git ls-files --eol crlf_false_attr__* | + sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" | + sort >actual && + test_cmp expect actual + ' test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" " - compare_ws_file $pfx $lfname ${src}LF.txt + compare_ws_file $pfx $lfname crlf_false_attr__LF.txt " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" " - compare_ws_file $pfx $crlfname ${src}CRLF.txt + compare_ws_file $pfx $crlfname crlf_false_attr__CRLF.txt " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" " - compare_ws_file $pfx $lfmixcrlf ${src}CRLF_mix_LF.txt + compare_ws_file $pfx $lfmixcrlf crlf_false_attr__CRLF_mix_LF.txt " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" " - compare_ws_file $pfx $lfmixcr ${src}LF_mix_CR.txt + compare_ws_file $pfx $lfmixcr crlf_false_attr__LF_mix_CR.txt " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" " - compare_ws_file $pfx $crlfnul ${src}LF_nul.txt + compare_ws_file $pfx $crlfnul crlf_false_attr__LF_nul.txt " } -####### +# Test control characters +# NUL SOH CR EOF==^Z +test_expect_success 'ls-files --eol -o Text/Binary' ' + test_when_finished "rm expect actual TeBi_*" && + STRT=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA && + STR=$STRT$STRT$STRT$STRT && + printf "${STR}BBB\001" >TeBi_127_S && + printf "${STR}BBBB\001">TeBi_128_S && + printf "${STR}BBB\032" >TeBi_127_E && + printf "\032${STR}BBB" >TeBi_E_127 && + printf "${STR}BBBB\000">TeBi_128_N && + printf "${STR}BBB\012">TeBi_128_L && + printf "${STR}BBB\015">TeBi_127_C && + printf "${STR}BB\015\012" >TeBi_126_CL && + printf "${STR}BB\015\012\015" >TeBi_126_CLC && + sort <<-\EOF >expect && + i/ w/-text TeBi_127_S + i/ w/none TeBi_128_S + i/ w/none TeBi_127_E + i/ w/-text TeBi_E_127 + i/ w/-text TeBi_128_N + i/ w/lf TeBi_128_L + i/ w/-text TeBi_127_C + i/ w/crlf TeBi_126_CL + i/ w/-text TeBi_126_CLC + EOF + git ls-files --eol -o | + sed -n -e "/TeBi_/{s!attr/[ ]*!!g + s! ! !g + s! *! !g + p + }" | sort >actual && + test_cmp expect actual +' + test_expect_success 'setup master' ' echo >.gitattributes && git checkout -b master && @@ -480,4 +544,19 @@ checkout_files native true "lf" LF CRLF CRLF_mix_LF LF_mix_CR checkout_files native false "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul checkout_files native true "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul +# Should be the last test case: remove some files from the worktree +test_expect_success 'ls-files --eol -d -z' ' + rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes && + cat >expect <<-\EOF && + i/crlf w/ crlf_false_attr__CRLF.txt + i/lf w/ .gitattributes + i/lf w/ crlf_false_attr__LF.txt + i/mixed w/ crlf_false_attr__CRLF_mix_LF.txt + EOF + git ls-files --eol -d | + sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" | + sort >actual && + test_cmp expect actual +' + test_done