## The data: import pandas as pd import numpy as np df = pd.DataFrame({'Mileage' : ['Low']*5 + ['High']*12, 'Range' : [240, 241, 236, 240, 240, 230, 227, 232, 255.4381603, 267.2467371, 225, 238, 223, 225, 218, 227, 225]}) ## STEP 1: Calculate the sample statistic ## STEP 2: Randomization distribution ## STEP 3: Calculate p-value. Direction of inequality agrees with Ha above!!! ## The data: import pandas as pd import numpy as np df = pd.DataFrame({'Mileage' : ['Low']*5 + ['High']*12, 'Range' : [240, 241, 236, 240, 240, 230, 227, 232, 255.4381603, 267.2467371, 225, 238, 223, 225, 218, 227, 225]}) ## STEP 1: Calculate the sample statistic means = df.groupby('Mileage').mean() sampdiff = means.loc['Low', 'Range'] - means.loc['High', 'Range'] print("Low Mileage Mean = ", means.loc['Low', 'Range']) print("High Mileage Mean = ", means.loc['High', 'Range']) print("Sample difference of means = ", sampdiff) ## STEP 2: Randomization distribution sim = df.copy(deep = True) N = 1000 n = sim.shape[0] # num. rows xbardiff = np.empty(N) for i in range(N): sim['Range'] = np.random.choice(sim['Range'], size=n, replace=False) means = sim.groupby('Mileage').mean() xbardiff[i] = means.loc['Low', 'Range'] - means.loc['High', 'Range'] ## STEP 3: Calculate p-value. Direction of inequality agrees with Ha above!!! print("p-value =", len(xbardiff[xbardiff >= sampdiff])/N)