#include <stdint.h>
#include <string.h>
#include <stdio.h>

#define HI(n) ((uint64_t)(n)>>32)
#define LO(n) ((uint64_t)(n)&0xffffffff)
#define U128(high,low) ((__uint128_t)(uint64_t)high<<64 | (uint64_t)low)

static uint64_t magic_table[16] = {
  0x2dd7caaefcf073eb, 0xa9209937349cfe9c,
  0xb84bfc934b0e60ef, 0xff709c157b26e477,
  0x3936fd8735455112, 0xca141bf22338d331,
  0xdd40e749cb64fd02, 0x5e268f564b0deb26,
  0x658239596bdea9ec, 0x31cedf33ac38c624,
  0x12f56816481b0cfd, 0x94e9de155f40f095,
  0x5089c907844c6325, 0xdf887e97d73c50e3,
  0xae8870787ce3c11d, 0xa6767d18c58d2117,
};
#define ROUND_MAGIC U128(0xe3f0d44988bcdfab, 0x081570afdd535ec3)
#define FINAL_MAGIC0 0xce7c4801d683e824
#define FINAL_MAGIC1 0x6823775b1daad522

static __uint128_t hash_muladd(__uint128_t, __uint128_t, __uint128_t);
static __uint128_t hash_chunk(const uint8_t *, int64_t);
static uint64_t read_int64(const uint8_t *);

int POGO_init()
{
  return 0;
}

uint64_t read_int64(const uint8_t *p)
{
  int i;
  // endian-safe read 64-bit integer
  uint64_t n = 0;
  for (i = 7; i >= 0; i--) {
    n = (n << 8) | p[i];
  }
  return n;
}

uint64_t Hash(uint8_t *in, uint32_t len)
{
  uint32_t num_chunks = len / 128;

  // copy tail, pad with zeroes
  uint8_t tail[128] = {0};
  int tail_size = len % 128;
  memcpy(tail, in + len - tail_size, tail_size);

  __uint128_t hash;
  if (num_chunks) {
    // Hash the first 128 bytes
    hash = hash_chunk(in, 128);

  } else {
    // Hash the tail
    hash = hash_chunk(tail, tail_size);
  }

  hash += ROUND_MAGIC;

  if (num_chunks) {
    while (--num_chunks) {
      in += 128;
      hash = hash_muladd(hash, ROUND_MAGIC, hash_chunk(in, 128));
    }

    if (tail_size) {
      hash = hash_muladd(hash, ROUND_MAGIC, hash_chunk(tail, tail_size));
    }
  }

  // Finalize the hash
  hash += U128(tail_size * 8, 0);
  if (hash >= U128(0x7fffffffffffffff,0xffffffffffffffff)) {
    hash++;
  }
  hash = hash << 1 >> 1;

  uint64_t hash_high = hash >> 64;
  uint64_t hash_low = hash;
  uint64_t X = hash_high + HI(hash_low);
  X = HI(X + HI(X) + 1) + hash_high;
  uint64_t Y = (X << 32) + hash_low;

  uint64_t A = X + FINAL_MAGIC0;
  if (A < X) {
    A += 0x101;
  }

  uint64_t B = Y + FINAL_MAGIC1;
  if (B < Y) {
    B += 0x101;
  }

  __uint128_t H = (__uint128_t) A * B;
  H = 0x101 * (H >> 64) + (uint64_t) H;
  H = 0x101 * (H >> 64) + (uint64_t) H;
  if (H >> 64) {
    H += 0x101;
  }
  if ((uint64_t) H > 0xFFFFFFFFFFFFFEFE) {
    H += 0x101;
  }
  return (uint64_t) H;
}

__uint128_t hash_chunk(const uint8_t *chunk, int64_t size)
{
  int i;
  __uint128_t hash = 0;
  for (i = 0; i < 8; i++) {
    int offset = i * 16;
    if (offset >= size) {
      break;
    }
    uint64_t a = read_int64(chunk + offset);
    uint64_t b = read_int64(chunk + offset + 8);
    hash += (__uint128_t) (a + magic_table[i * 2]) *
      (__uint128_t) (b + magic_table[i * 2 + 1]);
  }
  return hash << 2 >> 2;
}

__uint128_t hash_muladd(__uint128_t hash, __uint128_t mul, __uint128_t add)
{
  uint64_t a0 = LO(add), a1 = HI(add), a23 = add >> 64;
  uint64_t m0 = LO(mul),        m1 = HI(mul);
  uint64_t m2 = LO(mul >> 64),  m3 = HI(mul >> 64);
  uint64_t h0 = LO(hash),       h1 = HI(hash);
  uint64_t h2 = LO(hash >> 64), h3 = HI(hash >> 64);

  /* Column sums, before carry */
  uint64_t c0 = (h0 * m0);
  uint64_t c1 = (h0 * m1) + (h1 * m0);
  uint64_t c2 = (h0 * m2) + (h1 * m1) + (h2 * m0);
  uint64_t c3 = (h0 * m3) + (h1 * m2) + (h2 * m1) + (h3 * m0);
  uint64_t c4 = (h1 * m3) + (h2 * m2) + (h3 * m1);
  uint64_t c5 = (h2 * m3) + (h3 * m2);
  uint64_t c6 = (h3 * m3);

  /* Combine, add, and carry (bugs included) */
  uint64_t r2 = c2 + (c6 << 1) + a23;
  uint64_t r3 = c3                   + HI(r2);
  uint64_t r0 = c0 + (c4 << 1) + a0  + (r3 >> 31);
  uint64_t r1 = c1 + (c5 << 1) + a1  + HI(r0);

  /* Return as uint128_t */
  __uint128_t result = ((r3 << 33 >> 1) | LO(r2)) + HI(r1);
  return (result << 64) | (r1 << 32) | LO(r0);
}

void POGO_quit()
{
}

