Skip to content

Commit

Permalink
diffcore-delta: stop using deltifier for packing.
Browse files Browse the repository at this point in the history
This switches the change estimation logic used by break, rename
and copy detection from delta packing code to a more line
oriented one.  This way, thee performance-density tradeoff by
delta packing code can be made without worrying about breaking
the rename detection.

Signed-off-by: Junio C Hamano <junkio@cox.net>
  • Loading branch information
Junio C Hamano committed Mar 1, 2006
1 parent 6541675 commit e29e114
Showing 1 changed file with 113 additions and 28 deletions.
141 changes: 113 additions & 28 deletions diffcore-delta.c
Original file line number Diff line number Diff line change
@@ -1,43 +1,128 @@
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
#include "delta.h"
#include "count-delta.h"

static int diffcore_count_changes_1(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
void *delta;
unsigned long delta_size;

delta = diff_delta(src, src_size,
dst, dst_size,
&delta_size, delta_limit);
if (!delta)
/* If delta_limit is exceeded, we have too much differences */
return -1;

/* Estimate the edit size by interpreting delta. */
if (count_delta(delta, delta_size, src_copied, literal_added)) {
free(delta);
return -1;
struct linehash {
unsigned long bytes;
unsigned long hash;
};

static unsigned long hash_extended_line(const unsigned char **buf_p,
unsigned long left)
{
/* An extended line is zero or more whitespace letters (including LF)
* followed by one non whitespace letter followed by zero or more
* non LF, and terminated with by a LF (or EOF).
*/
const unsigned char *bol = *buf_p;
const unsigned char *buf = bol;
unsigned long hashval = 0;
while (left) {
unsigned c = *buf++;
if (!c)
goto binary;
left--;
if (' ' < c) {
hashval = c;
break;
}
}
while (left) {
unsigned c = *buf++;
if (!c)
goto binary;
left--;
if (c == '\n')
break;
if (' ' < c)
hashval = hashval * 11 + c;
}
free(delta);
*buf_p = buf;
return hashval;

binary:
*buf_p = NULL;
return 0;
}

static int linehash_compare(const void *a_, const void *b_)
{
struct linehash *a = (struct linehash *) a_;
struct linehash *b = (struct linehash *) b_;
if (a->hash < b->hash) return -1;
if (a->hash > b->hash) return 1;
return 0;
}

static struct linehash *hash_lines(const unsigned char *buf,
unsigned long size)
{
const unsigned char *eobuf = buf + size;
struct linehash *line = NULL;
int alloc = 0, used = 0;

while (buf < eobuf) {
const unsigned char *ptr = buf;
unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
if (!buf) {
free(line);
return NULL;
}
if (alloc <= used) {
alloc = alloc_nr(alloc);
line = xrealloc(line, sizeof(*line) * alloc);
}
line[used].bytes = buf - ptr;
line[used].hash = hash;
used++;
}
qsort(line, used, sizeof(*line), linehash_compare);

/* Terminate the list */
if (alloc <= used)
line = xrealloc(line, sizeof(*line) * (used+1));
line[used].bytes = line[used].hash = 0;
return line;
}

int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
return diffcore_count_changes_1(src, src_size,
dst, dst_size,
delta_limit,
src_copied,
literal_added);
struct linehash *src_lines, *dst_lines;
unsigned long sc, la;

src_lines = hash_lines(src, src_size);
if (!src_lines)
return -1;
dst_lines = hash_lines(dst, dst_size);
if (!dst_lines) {
free(src_lines);
return -1;
}
sc = la = 0;
while (src_lines->bytes && dst_lines->bytes) {
int cmp = linehash_compare(src_lines, dst_lines);
if (!cmp) {
sc += src_lines->bytes;
src_lines++;
dst_lines++;
continue;
}
if (cmp < 0) {
src_lines++;
continue;
}
la += dst_lines->bytes;
dst_lines++;
}
while (dst_lines->bytes) {
la += dst_lines->bytes;
dst_lines++;
}
*src_copied = sc;
*literal_added = la;
return 0;
}

0 comments on commit e29e114

Please sign in to comment.