Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
git-mirror
/
git
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
0
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Security
Insights
Files
abcb865
Documentation
block-sha1
builtin
compat
contrib
ewah
git-gui
gitk-git
gitweb
mergetools
perl
po
ppc
t
templates
vcs-svn
xdiff
.gitattributes
.gitignore
.mailmap
COPYING
GIT-VERSION-GEN
INSTALL
LGPL-2.1
Makefile
README
RelNotes
abspath.c
aclocal.m4
advice.c
advice.h
alias.c
alloc.c
archive-tar.c
archive-zip.c
archive.c
archive.h
argv-array.c
argv-array.h
attr.c
attr.h
base85.c
bisect.c
bisect.h
blob.c
blob.h
branch.c
branch.h
builtin.h
bulk-checkin.c
bulk-checkin.h
bundle.c
bundle.h
cache-tree.c
cache-tree.h
cache.h
check-builtins.sh
check-racy.c
check_bindir
color.c
color.h
column.c
column.h
combine-diff.c
command-list.txt
commit-slab.h
commit.c
commit.h
config.c
config.mak.in
config.mak.uname
configure.ac
connect.c
connect.h
connected.c
connected.h
convert.c
convert.h
copy.c
credential-cache--daemon.c
credential-cache.c
credential-store.c
credential.c
credential.h
csum-file.c
csum-file.h
ctype.c
daemon.c
date.c
decorate.c
decorate.h
delta.h
diff-delta.c
diff-lib.c
diff-no-index.c
diff.c
diff.h
diffcore-break.c
diffcore-delta.c
diffcore-order.c
diffcore-pickaxe.c
diffcore-rename.c
diffcore.h
dir.c
dir.h
editor.c
entry.c
environment.c
exec_cmd.c
exec_cmd.h
fast-import.c
fetch-pack.c
fetch-pack.h
fmt-merge-msg.h
fsck.c
fsck.h
generate-cmdlist.sh
gettext.c
gettext.h
git-add--interactive.perl
git-am.sh
git-archimport.perl
git-bisect.sh
git-compat-util.h
git-cvsexportcommit.perl
git-cvsimport.perl
git-cvsserver.perl
git-difftool--helper.sh
git-difftool.perl
git-filter-branch.sh
git-instaweb.sh
git-merge-octopus.sh
git-merge-one-file.sh
git-merge-resolve.sh
git-mergetool--lib.sh
git-mergetool.sh
git-p4.py
git-parse-remote.sh
git-pull.sh
git-quiltimport.sh
git-rebase--am.sh
git-rebase--interactive.sh
git-rebase--merge.sh
git-rebase.sh
git-relink.perl
git-remote-testgit.sh
git-request-pull.sh
git-send-email.perl
git-sh-i18n.sh
git-sh-setup.sh
git-stash.sh
git-submodule.sh
git-svn.perl
git-web--browse.sh
git.c
git.rc
git.spec.in
gpg-interface.c
gpg-interface.h
graph.c
graph.h
grep.c
grep.h
hashmap.c
hashmap.h
help.c
help.h
hex.c
http-backend.c
http-fetch.c
http-push.c
http-walker.c
http.c
http.h
ident.c
imap-send.c
khash.h
kwset.c
kwset.h
levenshtein.c
levenshtein.h
line-log.c
line-log.h
line-range.c
line-range.h
list-objects.c
list-objects.h
ll-merge.c
ll-merge.h
lockfile.c
log-tree.c
log-tree.h
mailmap.c
mailmap.h
match-trees.c
merge-blobs.c
merge-blobs.h
merge-recursive.c
merge-recursive.h
merge.c
mergesort.c
mergesort.h
name-hash.c
notes-cache.c
notes-cache.h
notes-merge.c
notes-merge.h
notes-utils.c
notes-utils.h
notes.c
notes.h
object.c
object.h
pack-bitmap-write.c
pack-bitmap.c
pack-bitmap.h
pack-check.c
pack-objects.c
pack-objects.h
pack-revindex.c
pack-revindex.h
pack-write.c
pack.h
pager.c
parse-options-cb.c
parse-options.c
parse-options.h
patch-delta.c
patch-ids.c
patch-ids.h
path.c
pathspec.c
pathspec.h
pkt-line.c
pkt-line.h
preload-index.c
pretty.c
prio-queue.c
prio-queue.h
progress.c
progress.h
prompt.c
prompt.h
quote.c
quote.h
reachable.c
reachable.h
read-cache.c
reflog-walk.c
reflog-walk.h
refs.c
refs.h
remote-curl.c
remote-testsvn.c
remote.c
remote.h
replace_object.c
rerere.c
rerere.h
resolve-undo.c
resolve-undo.h
revision.c
revision.h
run-command.c
run-command.h
send-pack.c
send-pack.h
sequencer.c
sequencer.h
server-info.c
setup.c
sh-i18n--envsubst.c
sha1-array.c
sha1-array.h
sha1-lookup.c
sha1-lookup.h
sha1_file.c
sha1_name.c
shallow.c
shell.c
shortlog.h
show-index.c
sideband.c
sideband.h
sigchain.c
sigchain.h
split-index.c
split-index.h
strbuf.c
strbuf.h
streaming.c
streaming.h
string-list.c
string-list.h
submodule.c
submodule.h
symlinks.c
tag.c
tag.h
tar.h
test-chmtime.c
test-config.c
test-ctype.c
test-date.c
test-delta.c
test-dump-cache-tree.c
test-dump-split-index.c
test-genrandom.c
test-hashmap.c
test-index-version.c
test-line-buffer.c
test-match-trees.c
test-mergesort.c
test-mktemp.c
test-parse-options.c
test-path-utils.c
test-prio-queue.c
test-read-cache.c
test-regex.c
test-revision-walking.c
test-run-command.c
test-scrap-cache-tree.c
test-sha1.c
test-sha1.sh
test-sigchain.c
test-string-list.c
test-subprocess.c
test-svn-fe.c
test-urlmatch-normalization.c
test-wildmatch.c
thread-utils.c
thread-utils.h
trace.c
trace.h
transport-helper.c
transport.c
transport.h
tree-diff.c
tree-walk.c
tree-walk.h
tree.c
tree.h
unicode_width.h
unimplemented.sh
unix-socket.c
unix-socket.h
unpack-trees.c
unpack-trees.h
update_unicode.sh
upload-pack.c
url.c
url.h
urlmatch.c
urlmatch.h
usage.c
userdiff.c
userdiff.h
utf8.c
utf8.h
varint.c
varint.h
version.c
version.h
versioncmp.c
walker.c
walker.h
wildmatch.c
wildmatch.h
wrap-for-bin.sh
wrapper.c
write_or_die.c
ws.c
wt-status.c
wt-status.h
xdiff-interface.c
xdiff-interface.h
zlib.c
Breadcrumbs
git
/
diffcore-delta.c
Blame
Blame
Latest commit
History
History
233 lines (213 loc) · 5.36 KB
Breadcrumbs
git
/
diffcore-delta.c
Top
File metadata and controls
Code
Blame
233 lines (213 loc) · 5.36 KB
Raw
#include "cache.h" #include "diff.h" #include "diffcore.h" /* * Idea here is very simple. * * Almost all data we are interested in are text, but sometimes we have * to deal with binary data. So we cut them into chunks delimited by * LF byte, or 64-byte sequence, whichever comes first, and hash them. * * For those chunks, if the source buffer has more instances of it * than the destination buffer, that means the difference are the * number of bytes not copied from source to destination. If the * counts are the same, everything was copied from source to * destination. If the destination has more, everything was copied, * and destination added more. * * We are doing an approximation so we do not really have to waste * memory by actually storing the sequence. We just hash them into * somewhere around 2^16 hashbuckets and count the occurrences. */ /* Wild guess at the initial hash size */ #define INITIAL_HASH_SIZE 9 /* We leave more room in smaller hash but do not let it * grow to have unused hole too much. */ #define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2)) /* A prime rather carefully chosen between 2^16..2^17, so that * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable * size under the current 2<<17 maximum, which can hold this many * different values before overflowing to hashtable of size 2<<18. */ #define HASHBASE 107927 struct spanhash { unsigned int hashval; unsigned int cnt; }; struct spanhash_top { int alloc_log2; int free; struct spanhash data[FLEX_ARRAY]; }; static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig) { struct spanhash_top *new; int i; int osz = 1 << orig->alloc_log2; int sz = osz << 1; new = xmalloc(sizeof(*orig) + sizeof(struct spanhash) * sz); new->alloc_log2 = orig->alloc_log2 + 1; new->free = INITIAL_FREE(new->alloc_log2); memset(new->data, 0, sizeof(struct spanhash) * sz); for (i = 0; i < osz; i++) { struct spanhash *o = &(orig->data[i]); int bucket; if (!o->cnt) continue; bucket = o->hashval & (sz - 1); while (1) { struct spanhash *h = &(new->data[bucket++]); if (!h->cnt) { h->hashval = o->hashval; h->cnt = o->cnt; new->free--; break; } if (sz <= bucket) bucket = 0; } } free(orig); return new; } static struct spanhash_top *add_spanhash(struct spanhash_top *top, unsigned int hashval, int cnt) { int bucket, lim; struct spanhash *h; lim = (1 << top->alloc_log2); bucket = hashval & (lim - 1); while (1) { h = &(top->data[bucket++]); if (!h->cnt) { h->hashval = hashval; h->cnt = cnt; top->free--; if (top->free < 0) return spanhash_rehash(top); return top; } if (h->hashval == hashval) { h->cnt += cnt; return top; } if (lim <= bucket) bucket = 0; } } static int spanhash_cmp(const void *a_, const void *b_) { const struct spanhash *a = a_; const struct spanhash *b = b_; /* A count of zero compares at the end.. */ if (!a->cnt) return !b->cnt ? 0 : 1; if (!b->cnt) return -1; return a->hashval < b->hashval ? -1 : a->hashval > b->hashval ? 1 : 0; } static struct spanhash_top *hash_chars(struct diff_filespec *one) { int i, n; unsigned int accum1, accum2, hashval; struct spanhash_top *hash; unsigned char *buf = one->data; unsigned int sz = one->size; int is_text = !diff_filespec_is_binary(one); i = INITIAL_HASH_SIZE; hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i)); hash->alloc_log2 = i; hash->free = INITIAL_FREE(i); memset(hash->data, 0, sizeof(struct spanhash) * (1<<i)); n = 0; accum1 = accum2 = 0; while (sz) { unsigned int c = *buf++; unsigned int old_1 = accum1; sz--; /* Ignore CR in CRLF sequence if text */ if (is_text && c == '\r' && sz && *buf == '\n') continue; accum1 = (accum1 << 7) ^ (accum2 >> 25); accum2 = (accum2 << 7) ^ (old_1 >> 25); accum1 += c; if (++n < 64 && c != '\n') continue; hashval = (accum1 + accum2 * 0x61) % HASHBASE; hash = add_spanhash(hash, hashval, n); n = 0; accum1 = accum2 = 0; } qsort(hash->data, 1ul << hash->alloc_log2, sizeof(hash->data[0]), spanhash_cmp); return hash; } int diffcore_count_changes(struct diff_filespec *src, struct diff_filespec *dst, void **src_count_p, void **dst_count_p, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added) { struct spanhash *s, *d; struct spanhash_top *src_count, *dst_count; unsigned long sc, la; src_count = dst_count = NULL; if (src_count_p) src_count = *src_count_p; if (!src_count) { src_count = hash_chars(src); if (src_count_p) *src_count_p = src_count; } if (dst_count_p) dst_count = *dst_count_p; if (!dst_count) { dst_count = hash_chars(dst); if (dst_count_p) *dst_count_p = dst_count; } sc = la = 0; s = src_count->data; d = dst_count->data; for (;;) { unsigned dst_cnt, src_cnt; if (!s->cnt) break; /* we checked all in src */ while (d->cnt) { if (d->hashval >= s->hashval) break; la += d->cnt; d++; } src_cnt = s->cnt; dst_cnt = 0; if (d->cnt && d->hashval == s->hashval) { dst_cnt = d->cnt; d++; } if (src_cnt < dst_cnt) { la += dst_cnt - src_cnt; sc += src_cnt; } else sc += dst_cnt; s++; } while (d->cnt) { la += d->cnt; d++; } if (!src_count_p) free(src_count); if (!dst_count_p) free(dst_count); *src_copied = sc; *literal_added = la; return 0; }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
You can’t perform that action at this time.