import numpy as np
import pandas as pd

data = pd.read_csv('filter_sizes2.csv', index_col=0)
data.head()

import math
import scipy.optimize

def expected_data_size(c, N, B, S):
    """
    The expected size of data downloaded in bits if a client does not match a block.
    
    c is the number of filter elements the client is watching
    N is the number of filter elements in a block
    S is the size of the full block
    P is the Golomb-Rice parameter
    
    Assuming filter sizes that are N * P bits, this returns the filter size plus block size times false
    positive match probability.
    """
    P = 2.0 ** B
    M = 1.497137 * P
    bits = B + 1.0 / (1 - (1 - 1.0 / M) ** P)
    fp = (1 - (1 - 1.0 / (N * P))**(c*N))
    return N * bits + 8 * S * fp

def generate_func(N, S):
    """
    Generate a numpy-optimized function to compute expected data sizes. Takes numpy arrays with
    N and S and returns a lambda accepting scalar c value.
    """
    B = np.arange(1, 49)
    P = 2.0 ** B
    M = 1.497137 * P
    bits = B + 1.0 / (1 - (1 - 1.0 / M) ** P)
    Y = np.power(1 - 1.0 / np.outer(M, np.maximum(1, N)), N)
    return lambda c: np.outer(bits, N) + 8 * S * (1 - Y ** c)

N = data['output_n'].values
S = data['size'].values
func = generate_func(N, S)

y10 = np.sum(func(10), 1)
y100 = np.sum(func(100), 1)
y1000 = np.sum(func(1000), 1)
y10000 = np.sum(func(10000), 1)

P = pd.RangeIndex(1, 49)
y10 = pd.Series(y10, index=P)
y100 = pd.Series(y100, index=P)
y1000 = pd.Series(y1000, index=P)
y10000 = pd.Series(y10000, index=P)

{
    'M=10': y10.idxmin(),
    'M=100': y100.idxmin(),
    'M=1,000': y1000.idxmin(),
    'M=10,000': y10000.idxmin(),
}

{'M=10': 13, 'M=100': 16, 'M=1,000': 20, 'M=10,000': 23}

import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(10,5))

plt.plot(y10, label='M=10')
plt.plot(y100, label='M=100')
plt.plot(y1000, label='M=1,000')
plt.plot(y10000, label='M=10,000')
plt.title('Expected data size (bits) vs P')
plt.legend()

<matplotlib.legend.Legend at 0x7f810be8a1d0>

	size	basic_size	output_size	prev_size	output_and_prev_size	input_size	txid_size	basic_n	output_n	prev_n	output_and_prev_n	input_n	txid_n
height
1	215	7	4	1	4	1	4	2	1	0	1	0	1
2	215	7	4	1	4	1	4	2	1	0	1	0	1
3	215	7	4	1	4	1	4	2	1	0	1	0	1
4	215	7	4	1	4	1	4	2	1	0	1	0	1
5	215	7	4	1	4	1	4	2	1	0	1	0	1