diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 8fe9574db9d8b..93a3d408339c4 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -51,16 +51,38 @@ static void mem_flush(unsigned char *buf, size_t buf_size) sb(); } +/* + * Buffer index step advance to workaround HW prefetching interfering with + * the measurements. + * + * Must be a prime to step through all indexes of the buffer. + * + * Some primes work better than others on some architectures (from MBA/MBM + * result stability point of view). + */ +#define FILL_IDX_MULT 23 + static int fill_one_span_read(unsigned char *buf, size_t buf_size) { - unsigned char *end_ptr = buf + buf_size; - unsigned char sum, *p; - - sum = 0; - p = buf; - while (p < end_ptr) { - sum += *p; - p += (CL_SIZE / 2); + unsigned int size = buf_size / (CL_SIZE / 2); + unsigned int i, idx = 0; + unsigned char sum = 0; + + /* + * Read the buffer in an order that is unexpected by HW prefetching + * optimizations to prevent them interfering with the caching pattern. + * + * The read order is (in terms of halves of cachelines): + * i * FILL_IDX_MULT % size + * The formula is open-coded below to avoiding modulo inside the loop + * as it improves MBA/MBM result stability on some architectures. + */ + for (i = 0; i < size; i++) { + sum += buf[idx * (CL_SIZE / 2)]; + + idx += FILL_IDX_MULT; + while (idx >= size) + idx -= size; } return sum;