Skip to content

Commit

Permalink
Merge branch 'tb/ls-files-eol'
Browse files Browse the repository at this point in the history
"git ls-files" learned a new "--eol" option to help diagnose
end-of-line problems.

* tb/ls-files-eol:
  ls-files: add eol diagnostics
  • Loading branch information
Junio C Hamano committed Feb 3, 2016
2 parents 1cb3ed3 + a7630bd commit 05f1539
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 49 deletions.
22 changes: 22 additions & 0 deletions Documentation/git-ls-files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ SYNOPSIS
'git ls-files' [-z] [-t] [-v]
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
(-[c|d|o|i|s|u|k|m])*
[--eol]
[-x <pattern>|--exclude=<pattern>]
[-X <file>|--exclude-from=<file>]
[--exclude-per-directory=<file>]
Expand Down Expand Up @@ -147,6 +148,24 @@ a space) at the start of each line:
possible for manual inspection; the exact format may change at
any time.

--eol::
Show <eolinfo> and <eolattr> of files.
<eolinfo> is the file content identification used by Git when
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
+
"" means the file is not a regular file, it is not in the index or
not accessable in the working tree.
+
<eolattr> is the attribute that is used when checking out or committing,
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
that may change in the future.
+
Both the <eolinfo> in the index ("i/<eolinfo>")
and in the working tree ("w/<eolinfo>") are shown for regular files,
followed by the ("attr/<eolattr>").

\--::
Do not interpret any more arguments as options.

Expand All @@ -161,6 +180,9 @@ which case it outputs:

[<tag> ]<mode> <object> <stage> <file>

'git ls-files --eol' will show
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>

'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
detailed information on unmerged paths.

Expand Down
21 changes: 21 additions & 0 deletions builtin/ls-files.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ static int show_killed;
static int show_valid_bit;
static int line_terminator = '\n';
static int debug_mode;
static int show_eol;

static const char *prefix;
static int max_prefix_len;
Expand All @@ -47,6 +48,23 @@ static const char *tag_modified = "";
static const char *tag_skip_worktree = "";
static const char *tag_resolve_undo = "";

static void write_eolinfo(const struct cache_entry *ce, const char *path)
{
if (!show_eol)
return;
else {
struct stat st;
const char *i_txt = "";
const char *w_txt = "";
const char *a_txt = get_convert_attr_ascii(path);
if (ce && S_ISREG(ce->ce_mode))
i_txt = get_cached_convert_stats_ascii(ce->name);
if (!lstat(path, &st) && S_ISREG(st.st_mode))
w_txt = get_wt_convert_stats_ascii(path);
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
}
}

static void write_name(const char *name)
{
/*
Expand All @@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
return;

fputs(tag, stdout);
write_eolinfo(NULL, ent->name);
write_name(ent->name);
}

Expand Down Expand Up @@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
find_unique_abbrev(ce->sha1,abbrev),
ce_stage(ce));
}
write_eolinfo(ce, ce->name);
write_name(ce->name);
if (debug_mode) {
const struct stat_data *sd = &ce->ce_stat_data;
Expand Down Expand Up @@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
OPT_BIT(0, "directory", &dir.flags,
N_("show 'other' directories' names only"),
DIR_SHOW_OTHER_DIRECTORIES),
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
OPT_NEGBIT(0, "empty-directory", &dir.flags,
N_("don't show empty directories"),
DIR_HIDE_EMPTY_DIRECTORIES),
Expand Down
119 changes: 91 additions & 28 deletions convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
* translation when the "text" attribute or "auto_crlf" option is set.
*/

/* Stat bits: When BIN is set, the txt bits are unset */
#define CONVERT_STAT_BITS_TXT_LF 0x1
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
#define CONVERT_STAT_BITS_BIN 0x4

enum crlf_action {
CRLF_GUESS = -1,
CRLF_BINARY = 0,
Expand Down Expand Up @@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *

/*
* The same heuristics as diff.c::mmfile_is_binary()
* We treat files with bare CR as binary
*/
static int is_binary(unsigned long size, struct text_stat *stats)
static int convert_is_binary(unsigned long size, const struct text_stat *stats)
{

if (stats->cr != stats->crlf)
return 1;
if (stats->nul)
return 1;
if ((stats->printable >> 7) < stats->nonprintable)
return 1;
/*
* Other heuristics? Average line length might be relevant,
* as might LF vs CR vs CRLF counts..
*
* NOTE! It might be normal to have a low ratio of CRLF to LF
* (somebody starts with a LF-only file and edits it with an editor
* that adds CRLF only to lines that are added..). But do we
* want to support CR-only? Probably not.
*/
return 0;
}

static unsigned int gather_convert_stats(const char *data, unsigned long size)
{
struct text_stat stats;
if (!data || !size)
return 0;
gather_stats(data, size, &stats);
if (convert_is_binary(size, &stats))
return CONVERT_STAT_BITS_BIN;
else if (stats.crlf && stats.crlf == stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF;
else if (stats.crlf && stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
else if (stats.lf)
return CONVERT_STAT_BITS_TXT_LF;
else
return 0;
}

static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
{
unsigned int convert_stats = gather_convert_stats(data, size);

if (convert_stats & CONVERT_STAT_BITS_BIN)
return "-text";
switch (convert_stats) {
case CONVERT_STAT_BITS_TXT_LF:
return "lf";
case CONVERT_STAT_BITS_TXT_CRLF:
return "crlf";
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
return "mixed";
default:
return "none";
}
}

const char *get_cached_convert_stats_ascii(const char *path)
{
const char *ret;
unsigned long sz;
void *data = read_blob_data_from_cache(path, &sz);
ret = gather_convert_stats_ascii(data, sz);
free(data);
return ret;
}

const char *get_wt_convert_stats_ascii(const char *path)
{
const char *ret = "";
struct strbuf sb = STRBUF_INIT;
if (strbuf_read_file(&sb, path, 0) >= 0)
ret = gather_convert_stats_ascii(sb.buf, sb.len);
strbuf_release(&sb);
return ret;
}

static enum eol output_eol(enum crlf_action crlf_action)
{
switch (crlf_action) {
Expand Down Expand Up @@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
gather_stats(src, len, &stats);

if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
/*
* We're currently not going to even try to convert stuff
* that has bare CR characters. Does anybody do that crazy
* stuff?
*/
if (stats.cr != stats.crlf)
return 0;

/*
* And add some heuristics for binary vs text, of course...
*/
if (is_binary(len, &stats))
if (convert_is_binary(len, &stats))
return 0;

if (crlf_action == CRLF_GUESS) {
Expand Down Expand Up @@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
return 0;
}

/* If we have any bare CR characters, we're not going to touch it */
if (stats.cr != stats.crlf)
return 0;

if (is_binary(len, &stats))
if (convert_is_binary(len, &stats))
return 0;
}

Expand Down Expand Up @@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
}

const char *get_convert_attr_ascii(const char *path)
{
struct conv_attrs ca;
enum crlf_action crlf_action;

convert_attrs(&ca, path);
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
switch (crlf_action) {
case CRLF_GUESS:
return "";
case CRLF_BINARY:
return "-text";
case CRLF_TEXT:
return "text";
case CRLF_INPUT:
return "text eol=lf";
case CRLF_CRLF:
return "text=auto eol=crlf";
case CRLF_AUTO:
return "text=auto";
}
return "";
}

int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{
Expand Down
3 changes: 3 additions & 0 deletions convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ enum eol {
};

extern enum eol core_eol;
extern const char *get_cached_convert_stats_ascii(const char *path);
extern const char *get_wt_convert_stats_ascii(const char *path);
extern const char *get_convert_attr_ascii(const char *path);

/* returns 1 if *dst was used */
extern int convert_to_git(const char *path, const char *src, size_t len,
Expand Down
Loading

0 comments on commit 05f1539

Please sign in to comment.