diff --git a/diff-delta.c b/diff-delta.c
index 9f998d0a7..3a737da68 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -246,7 +246,7 @@ create_delta(const struct delta_index *index,
 	     const void *trg_buf, unsigned long trg_size,
 	     unsigned long *delta_size, unsigned long max_size)
 {
-	unsigned int i, outpos, outsize, val;
+	unsigned int i, outpos, outsize, moff, msize, val;
 	int inscnt;
 	const unsigned char *ref_data, *ref_top, *data, *top;
 	unsigned char *out;
@@ -291,30 +291,33 @@ create_delta(const struct delta_index *index,
 	}
 	inscnt = i;
 
+	moff = 0;
+	msize = 0;
 	while (data < top) {
-		unsigned int moff = 0, msize = 0;
-		struct index_entry *entry;
-		val ^= U[data[-RABIN_WINDOW]];
-		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
-		i = val & index->hash_mask;
-		for (entry = index->hash[i]; entry; entry = entry->next) {
-			const unsigned char *ref = entry->ptr;
-			const unsigned char *src = data;
-			unsigned int ref_size = ref_top - ref;
-			if (entry->val != val)
-				continue;
-			if (ref_size > top - src)
-				ref_size = top - src;
-			if (ref_size > 0x10000)
-				ref_size = 0x10000;
-			if (ref_size <= msize)
-				break;
-			while (ref_size-- && *src++ == *ref)
-				ref++;
-			if (msize < ref - entry->ptr) {
-				/* this is our best match so far */
-				msize = ref - entry->ptr;
-				moff = entry->ptr - ref_data;
+		if (msize < 4096) {
+			struct index_entry *entry;
+			val ^= U[data[-RABIN_WINDOW]];
+			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
+			i = val & index->hash_mask;
+			for (entry = index->hash[i]; entry; entry = entry->next) {
+				const unsigned char *ref = entry->ptr;
+				const unsigned char *src = data;
+				unsigned int ref_size = ref_top - ref;
+				if (entry->val != val)
+					continue;
+				if (ref_size > top - src)
+					ref_size = top - src;
+				if (ref_size <= msize)
+					break;
+				while (ref_size-- && *src++ == *ref)
+					ref++;
+				if (msize < ref - entry->ptr) {
+					/* this is our best match so far */
+					msize = ref - entry->ptr;
+					moff = entry->ptr - ref_data;
+					if (msize >= 4096) /* good enough */
+						break;
+				}
 			}
 		}
 
@@ -327,27 +330,13 @@ create_delta(const struct delta_index *index,
 				out[outpos - inscnt - 1] = inscnt;
 				inscnt = 0;
 			}
+			msize = 0;
 		} else {
+			unsigned int left;
 			unsigned char *op;
 
-			if (msize >= RABIN_WINDOW) {
-				const unsigned char *sk;
-				sk = data + msize - RABIN_WINDOW;
-				val = 0;
-				for (i = 0; i < RABIN_WINDOW; i++)
-					val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
-			} else {
-				const unsigned char *sk = data + 1;
-				for (i = 1; i < msize; i++) {
-					val ^= U[sk[-RABIN_WINDOW]];
-					val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
-				}
-			}
-
 			if (inscnt) {
 				while (moff && ref_data[moff-1] == data[-1]) {
-					if (msize == 0x10000)
-						break;
 					/* we can match one byte back */
 					msize++;
 					moff--;
@@ -363,23 +352,40 @@ create_delta(const struct delta_index *index,
 				inscnt = 0;
 			}
 
-			data += msize;
+			/* A copy op is currently limited to 64KB (pack v2) */
+			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
+			msize -= left;
+
 			op = out + outpos++;
 			i = 0x80;
 
-			if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
-			moff >>= 8;
-			if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
-			moff >>= 8;
-			if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
-			moff >>= 8;
-			if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
+			if (moff & 0x000000ff)
+				out[outpos++] = moff >> 0,  i |= 0x01;
+			if (moff & 0x0000ff00)
+				out[outpos++] = moff >> 8,  i |= 0x02;
+			if (moff & 0x00ff0000)
+				out[outpos++] = moff >> 16, i |= 0x04;
+			if (moff & 0xff000000)
+				out[outpos++] = moff >> 24, i |= 0x08;
 
-			if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
-			msize >>= 8;
-			if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
+			if (msize & 0x00ff)
+				out[outpos++] = msize >> 0, i |= 0x10;
+			if (msize & 0xff00)
+				out[outpos++] = msize >> 8, i |= 0x20;
 
 			*op = i;
+
+			data += msize;
+			moff += msize;
+			msize = left;
+
+			if (msize < 4096) {
+				int j;
+				val = 0;
+				for (j = -RABIN_WINDOW; j < 0; j++)
+					val = ((val << 8) | data[j])
+					      ^ T[val >> RABIN_SHIFT];
+			}
 		}
 
 		if (outpos >= outsize - MAX_OP_SIZE) {