package soac.java.bloom;

import java.util.Arrays;
import java.util.Random;

//This does a simple 2-layer array-of-arrays layout, in lieu of a full tree
//like the PersistentVector implementation.  The alternative is basing on the
//vector-of infrastructure, but that isn't as performant with multiple updates
//per insertion like we have here, and requires a lot more pointer traversal.
public class PersistentBloomFilter {
	public final long[][] data;
	public final long m; //number of bits
	public final int k; //number of hashes
	public final long n; //number of members
	public final int leafSize; //Size of each sub-array
	
	//Does a monte carlo simulation of the cost of an insert.
	public static double cost(long m, int k, int f){
		final Random rng = new Random();
		final int trials = 250;
		//Each long holds 64 bits
		final int n = (int)Math.ceil(m / 64.0);
		double cost=0;
		//Simulate from top down, averaging 100 iterations
		for(int trial=0; trial<trials; trial++){
			//Smallest power of f greater than n
			for(int width=f; width < n*f; width *= f){
				final int[] hit = new int[width];
				for(int i=0; i<k; i++){
					hit[rng.nextInt(hit.length)] = 1;
				}
				int localCost = 0;
				for(int i=0; i<hit.length; i++){
					localCost += hit[i];
				}
				//We may want to add a fudge factor to account for array header,
				//pointer access & function call overhead.
				cost += localCost * f;
			}
		}		
		cost /= trials;
		return cost;
	}
	
	//Given m and k, create a 2-stage layout that minimizes the total number
	//of pointer / data manipulations per insert.
	public static long[][] optimalDataLayout(long m, int k){
		//Length in longs
		final long totLength = (int)Math.ceil(m / 64.0);
		
		//cost = leafSize * k + (totalLength / leafSize);
		//d(cost)/d(leafSize) = 0 = k - (totLength / (leafSize * leafSize))
		//k = totLength / (leafSize * leafSize)
		//leafSize = sqrt(totLength / k)
		
		int leafSize = (int)Math.round(Math.sqrt((double)totLength / (k)));
		int nLeaves = (int)Math.ceil(totLength / (double)leafSize);
		
		return new long[nLeaves][leafSize];
	}
	
	public double pFalsePositive(){
		return Math.pow((1-Math.exp(-k*n/m)),k);
	}
	
	public static int optimalM(int expectedN, double pFalsePos){
		return (int)Math.ceil(-expectedN * Math.log(pFalsePos) / (Math.log(2) * Math.log(2)));
	}
	
	public static int optimalK(long m, int expectedN){
		return (int)Math.ceil(m / expectedN * Math.log(2));
	}
	
	public PersistentBloomFilter(long m, int k){
		this.n=0;
		this.m=m;
		this.k=k;
		this.data = optimalDataLayout(this.m, this.k);
		this.leafSize = this.data[0].length;
	}
	
	//Constructor derives optimal parameters
	public PersistentBloomFilter(int expectedN, double pFalsePos){
		this.n=0;
		this.m = optimalM(expectedN, pFalsePos);
		this.k = optimalK(m, expectedN);
		this.data = optimalDataLayout(this.m, this.k);
		this.leafSize = this.data[0].length;
	}
	
	private PersistentBloomFilter(long[][] data, long m, int k, long n, int leafSize){
		this.data = data;
		this.m=m;
		this.k=k;
		this.n=n;
		this.leafSize=leafSize;
	}
	
	//For this and the corresponding contains method, we operate directly on
	//the hash.  In normal cases one can just use Object.hashCode(), but this
	//way it supports primitives with less munging.
	public PersistentBloomFilter add(int oHash){
		final int[] leafIdxs = new int[k];
		final int[] elemIdxs = new int[k];
		final int[] bitIdxs = new int[k];
		
		for(int i=0;i<k;i++){
			final int hash = Hasher.murmur3(oHash, i+1);
			final int bitPos = hash % (64 * data.length * leafSize);
			leafIdxs[i] = bitPos / (leafSize*64);
			elemIdxs[i] = (bitPos % leafIdxs[i]) / 64;
			bitIdxs[i] = (bitPos % 64);
		}
		final long[][] out = new long[data.length][];
		
		//Initialize
		for(int i=0; i<out.length; i++){
			out[i] = data[i];
		}
		//Replace appropriate leaves - doing at beginning prevents error when leaves overlap
		for(int i=0; i<k; i++){
			final int leafIdx = leafIdxs[i];
			out[leafIdx] = Arrays.copyOf(data[leafIdx], leafSize);
		}
		
		for(int i=0; i<k; i++){
			final int leafIdx = leafIdxs[i];
			final int elemIdx = elemIdxs[i];
			final int bitIdx = bitIdxs[i];
			
			out[leafIdx][elemIdx] |= 1 << bitIdx;
		}
		
		return new PersistentBloomFilter(out, m, k, n+1, leafSize);
	}
	
	public boolean contains(int oHash){
		for(int i=0;i<k;i++){
			final int hash = Hasher.murmur3(oHash, i+1);
			final int bitPos = hash % (64 * data.length * leafSize);
			final int leafIdx = bitPos / (leafSize*64);
			final int elemIdx = (bitPos % leafIdx) / 64;
			final int bitIdx = (bitPos % 64);
			
			if (0 == (data[leafIdx][elemIdx] & (1 << bitIdx))) {
				return false;
			}
		}
		return true;		
	}
}
