Source code for demos.models.birth

import orca
import numpy as np
import pandas as pd
from templates.utils.models import columns_in_formula
from templates import estimated_models, modelmanager as mm
import time
from logging_logic import log_execution_time
from config import DEMOSConfig, get_config

STEP_NAME = "birth"
REQUIRED_COLUMNS = []


@orca.injectable(autocall=False)
def get_new_person_id(n):
    """
    Generate new unique person IDs for newborns.

    Parameters
    ----------
    n : int
        Number of new person IDs to generate.

    Returns
    -------
    np.ndarray
        Array of new unique person IDs.
    """
    persons = orca.get_table("persons")
    graveyard = orca.get_table("graveyard")
    rebalanced_persons = orca.get_table("rebalanced_persons")

    current_max = max(
        [
            persons.local.index.max(),
            graveyard.local.index.max(),
            rebalanced_persons.local.index.max(),
        ]
    )
    return (
        np.arange(n)  # = [0, 1, 2 ...] up to the number of people
        + current_max  # = [max_person_id, max_person_id + 1, ...]
        + 1
    )


[docs] @orca.step(STEP_NAME) def birth(persons, households, get_new_person_id): """ Simulate household-level births and add new persons to the population. This step applies the birth model to eligible households, determines which have a birth event, and adds new babies to the persons table with default and inferred attributes. Parameters ---------- persons : orca.Table The persons table containing individual-level attributes. households : orca.Table The households table containing household-level attributes. get_new_person_id : callable Function to generate new unique person IDs as needed. Returns ------- None Notes ----- - Adds new rows to the persons table for each birth event. - Babies are assigned default values for most attributes. - Race is assigned based on household head if all members share the same race; otherwise, "other". - Some attributes may be duplicated or missing if not set in input data. """ start_time = time.time() birth_list = run_and_calibrate_birth_model(persons, households) # Get indices of households with babies house_indices = list(birth_list[birth_list == 1].index) # Initialize babies variables in the persons table. babies = pd.DataFrame(house_indices, columns=["household_id"]) babies.index = get_new_person_id(len(babies)) babies.index.name = "person_id" # Set default values babies["age"] = 0 babies["edu"] = 0 babies["earning"] = 0 babies["relate"] = 2 babies["MAR"] = 5 babies["sex"] = np.random.choice([1, 2]) babies["student"] = 0 babies["worker"] = 0 babies["work_at_home"] = 0 # Set race of babies # TODO: There is duplication of information between `race_id` and `race` hh_races = ( persons.local.groupby("household_id") .agg(num_races=("race_id", "nunique")) .reset_index() .merge( households.to_frame( ["hh_head_race_str", "hh_head_race_id", "household_id"] ).reset_index(), on="household_id", ) ).set_index("household_id") one_race_hh_filter = (hh_races.loc[babies.household_id]["num_races"] == 1).values babies["race_id"] = 9 babies.loc[one_race_hh_filter, "race_id"] = hh_races.loc[ babies.loc[one_race_hh_filter, "household_id"], "hh_head_race_id" ].values babies["race"] = babies["race_id"].map({1: "white", 2: "black"}) babies["race"].fillna("other", inplace=True) # Finally add babies to persons table persons.local = pd.concat([persons.local, babies]) log_execution_time(start_time, orca.get_injectable("year"), "birth")
def run_and_calibrate_birth_model(persons, households): ELIGIBILITY_COND = (persons["sex"] == 2) & (persons["age"].between(14, 45)) ELIGIBLE_HH = persons.local.loc[ELIGIBILITY_COND, "household_id"].unique() # Load calibration config demos_config: DEMOSConfig = get_config() calibration_procedure = demos_config.birth_module_config.calibration_procedure # Get model data birth_model = mm.get_step("birth") birth_model_variables = columns_in_formula(birth_model.model_expression) birth_model_data = households.to_frame(birth_model_variables).loc[ELIGIBLE_HH] # Calibrate if needed if calibration_procedure is not None: return calibration_procedure.calibrate_and_run_model( birth_model, birth_model_data ) return birth_model.predict(birth_model_data) # ----------------------------------------------------------------------------------------- # BIRTH MODEL COLUMNS (moved from variables.py) # ----------------------------------------------------------------------------------------- @orca.column("households") def hh_n_persons(households, persons): counts = persons.local.groupby("household_id").size() return households.local.join(counts.rename("hh_n_persons"))["hh_n_persons"] @orca.column("households") def hh_fsize_bin23(households): df = households.to_frame(columns=["hh_n_persons"]) return df["hh_n_persons"].isin([2, 3]) * 1 @orca.column("households") def hh_fsize_bingt3(households): df = households.to_frame(columns=["hh_n_persons"]) return df.gt(3) * 1 @orca.column("households") def hh_birth_age_lt27(persons, households): df = persons.to_frame(columns=["household_id", "relate", "sex", "age"]) df.loc[:, "is_head"] = np.where(df["relate"] == 0, 1, 0) df.loc[:, "is_female"] = np.where(df["sex"] == 2, 1, 0) df.loc[:, "is_head_or_spouse"] = np.where(df["relate"].isin([0, 1, 13]), 1, 0) df.loc[:, "age_head"] = df["age"] * df["is_head"] df.loc[:, "age_female"] = df["age"] * df["is_female"] * df["is_head_or_spouse"] df.loc[:, "is_spouse"] = np.where(df["relate"].isin([1, 13]), 1, 0) df.loc[:, "head_spouse"] = df["is_head"] + df["is_spouse"] df = df.groupby("household_id").agg( age_head=("age_head", "sum"), age_female=("age_female", "sum"), head_spouse=("head_spouse", "sum"), ) df.loc[:, "age_final"] = np.where( df["head_spouse"] >= 2, df["age_female"], df["age_head"] ) return (df["age_final"] <= 27).astype(int) @orca.column("households") def hh_birth_age_27_35(persons, households): df = persons.to_frame(columns=["household_id", "relate", "sex", "age"]) df.loc[:, "is_head"] = np.where(df["relate"] == 0, 1, 0) df.loc[:, "is_female"] = np.where(df["sex"] == 2, 1, 0) df.loc[:, "is_head_or_spouse"] = np.where(df["relate"].isin([0, 1, 13]), 1, 0) df.loc[:, "age_head"] = df["age"] * df["is_head"] df.loc[:, "age_female"] = df["age"] * df["is_female"] * df["is_head_or_spouse"] df.loc[:, "is_spouse"] = np.where(df["relate"].isin([1, 13]), 1, 0) df.loc[:, "head_spouse"] = df["is_head"] + df["is_spouse"] df = df.groupby("household_id").agg( age_head=("age_head", "sum"), age_female=("age_female", "sum"), head_spouse=("head_spouse", "sum"), ) df.loc[:, "age_final"] = np.where( df["head_spouse"] >= 2, df["age_female"], df["age_head"] ) return (df["age_final"].between(27, 35, inclusive="right")).astype(int)