Source code for epios.sampler

from epios import DataProcess
import numpy as np
import pandas as pd


[docs] class Sampler(DataProcess): ''' The base sampling class. This class will perform a totally random sampling for a single time. Parameters: ----------- If you want to input new data, you can input that into data argument and set the pre_process to True. If you want to use previous processed data, you can input the data_store_path to read data files, and set the pre_process to False. num_age_group : int This will be used when age stratification is enabled indicating how many age groups are there. *The last group includes age >= some threshold* age_group_width : int This will be used when age stratification is enabled, indicating the width of each age group (except for the last group) mode : str This indicates the specific mode to process the data. This should be the name of the modes that can be identified. **If you want this class sample as originally designed, do not change this value** ''' def __init__(self, data=None, data_store_path='./input/', pre_process=True, num_age_group=None, age_group_width=None, mode='Base'): self.mode = mode if data is not None: if pre_process: super().__init__(data=data, path=data_store_path, num_age_group=num_age_group, mode=self.mode, age_group_width=age_group_width) self.data = pd.read_csv(data_store_path + 'data.csv') else: if pre_process: raise ValueError('You have to input data to continue') else: self.data = pd.read_csv(data_store_path + 'data.csv')
[docs] def sample(self, sample_size: int): ''' This method samples data for a given sample size randomly. Parameters: ----------- sample_size : int The size of sample Output: ------- res : list A list of ID of people who is sampled ''' population_size = len(self.data) if sample_size > population_size: raise ValueError('Sample size should <= population size') choice = np.random.choice(np.arange(population_size), size=sample_size, replace=False) res = [] sample = list(self.data['id']) for i in choice: res.append(sample[i]) return res
[docs] def person_allowed(self, sample: list, choice: str, threshold: int = 3): """ Function to see if the sampled person should be included in the generic sample Parameters: ----------- sample : list List of people who have already been chosen choice : str string id of the person being sampled threshold : int The cap on the number of people sampled per household """ # get the household of the person choice_household = '.'.join(choice.split('.')[:-1]) # list of samples only showing first three numbers, e.g. "0.0.0" or "0.2.1" sample = ['.'.join(s.split('.')[:-1]) for s in sample] # get number of times that household is in sample list sample_count = sample.count(choice_household) # if adding this sample would exceed threshold then reject if sample_count >= threshold: return False # otherwise, return true else: return True