#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "dedup.h"

struct chunker {
	uint8_t *buf;
	size_t cap;
	size_t rpos;
	size_t wpos;
	size_t discr;
	int fd;
};

static size_t
calc_discr(size_t avg)
{
	return avg / (-1.42888852e-7 * avg + 1.33237515);
}

static int
match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t fp)
{
	if (chunk_size >= BLKSIZE_MAX)
		return 1;
	if (chunk_size < BLKSIZE_MIN)
		return 0;
	return (fp % chunker->discr) == chunker->discr - 1;
}

static size_t
get_chunk_size(struct chunker *chunker)
{
	uint8_t *bp;
	uint32_t fp;
	size_t i, chunk_size;

	chunk_size = chunker->wpos - chunker->rpos;
	if (chunk_size < WINSIZE)
		return chunk_size;

	/*
	 * To achieve better deduplication, we chunk blocks based on a
	 * recurring pattern occuring on the data stream. A fixed window
	 * of WINSIZE bytes is slid over the data, and a rolling hash is
	 * computed for this window.
	 * When the rolling hash matches a given pattern the block is chunked
	 * at the end of that window.
	 */
	bp = &chunker->buf[chunker->rpos];
	fp = buzh_init(bp, WINSIZE);
	for (i = 0; i < chunk_size - WINSIZE; i++) {
		if (i > 0)
			fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1],
			                 WINSIZE);
		if (match_pattern(chunker, i + WINSIZE, fp) == 1)
			return i + WINSIZE;
			
	}
	return chunk_size;
}

struct chunker *
alloc_chunker(size_t cap, int fd)
{
	struct chunker *chunker;

	chunker = malloc(sizeof(*chunker));
	if (chunker == NULL)
		err(1, "malloc");

	chunker->buf = malloc(cap);
	if (chunker->buf == NULL)
		err(1, "malloc");
	chunker->cap = cap;
	chunker->rpos = 0;
	chunker->wpos = 0;
	chunker->fd = fd;
	chunker->discr = calc_discr(BLKSIZE_AVG);

	return chunker;
}

void
free_chunker(struct chunker *chunker)
{
	free(chunker->buf);
	free(chunker);
}

ssize_t
fill_chunker(struct chunker *chunker)
{
	uint8_t *bp;
	ssize_t n;

	bp = &chunker->buf[chunker->wpos];
	n = xread(chunker->fd, bp, chunker->cap - chunker->wpos);
	chunker->wpos += n;
	return chunker->wpos;
}

uint8_t *
get_chunk(struct chunker *chunker, size_t *chunk_size)
{
	uint8_t *bp;

	if (chunker->rpos == chunker->wpos) {
		*chunk_size = 0;
		return NULL;
	}

	bp = &chunker->buf[chunker->rpos];
	*chunk_size = get_chunk_size(chunker);
	chunker->rpos += *chunk_size;
	return bp;
}

void
drain_chunker(struct chunker *chunker)
{
	uint8_t *src, *dst;

	src = &chunker->buf[chunker->rpos];
	dst = chunker->buf;
	memmove(dst, src, chunker->wpos - chunker->rpos);
	chunker->wpos -= chunker->rpos;
	chunker->rpos = 0;
}
