Source code for demos.models.birth
import orca
import numpy as np
import pandas as pd
from templates.utils.models import columns_in_formula
from templates import estimated_models, modelmanager as mm
import time
from logging_logic import log_execution_time
from config import DEMOSConfig, get_config
STEP_NAME = "birth"
REQUIRED_COLUMNS = []
@orca.injectable(autocall=False)
def get_new_person_id(n):
"""
Generate new unique person IDs for newborns.
Parameters
----------
n : int
Number of new person IDs to generate.
Returns
-------
np.ndarray
Array of new unique person IDs.
"""
persons = orca.get_table("persons")
graveyard = orca.get_table("graveyard")
rebalanced_persons = orca.get_table("rebalanced_persons")
current_max = max(
[
persons.local.index.max(),
graveyard.local.index.max(),
rebalanced_persons.local.index.max(),
]
)
return (
np.arange(n) # = [0, 1, 2 ...] up to the number of people
+ current_max # = [max_person_id, max_person_id + 1, ...]
+ 1
)
[docs]
@orca.step(STEP_NAME)
def birth(persons, households, get_new_person_id):
"""
Simulate household-level births and add new persons to the population.
This step applies the birth model to eligible households, determines which have a birth event,
and adds new babies to the persons table with default and inferred attributes.
Parameters
----------
persons : orca.Table
The persons table containing individual-level attributes.
households : orca.Table
The households table containing household-level attributes.
get_new_person_id : callable
Function to generate new unique person IDs as needed.
Returns
-------
None
Notes
-----
- Adds new rows to the persons table for each birth event.
- Babies are assigned default values for most attributes.
- Race is assigned based on household head if all members share the same race; otherwise, "other".
- Some attributes may be duplicated or missing if not set in input data.
"""
start_time = time.time()
birth_list = run_and_calibrate_birth_model(persons, households)
# Get indices of households with babies
house_indices = list(birth_list[birth_list == 1].index)
# Initialize babies variables in the persons table.
babies = pd.DataFrame(house_indices, columns=["household_id"])
babies.index = get_new_person_id(len(babies))
babies.index.name = "person_id"
# Set default values
babies["age"] = 0
babies["edu"] = 0
babies["earning"] = 0
babies["relate"] = 2
babies["MAR"] = 5
babies["sex"] = np.random.choice([1, 2])
babies["student"] = 0
babies["worker"] = 0
babies["work_at_home"] = 0
# Set race of babies
# TODO: There is duplication of information between `race_id` and `race`
hh_races = (
persons.local.groupby("household_id")
.agg(num_races=("race_id", "nunique"))
.reset_index()
.merge(
households.to_frame(
["hh_race_of_head", "hh_race_id_of_head", "household_id"]
).reset_index(),
on="household_id",
)
).set_index("household_id")
one_race_hh_filter = (hh_races.loc[babies.household_id]["num_races"] == 1).values
babies["race_id"] = 9
babies.loc[one_race_hh_filter, "race_id"] = hh_races.loc[
babies.loc[one_race_hh_filter, "household_id"], "hh_race_id_of_head"
].values
babies["race"] = babies["race_id"].map({1: "white", 2: "black"})
babies["race"].fillna("other", inplace=True)
# Finally add babies to persons table
persons.local = pd.concat([persons.local, babies])
log_execution_time(start_time, orca.get_injectable("year"), "birth")
def run_and_calibrate_birth_model(persons, households):
ELIGIBILITY_COND = (persons["sex"] == 2) & (persons["age"].between(14, 45))
ELIGIBLE_HH = persons.local.loc[ELIGIBILITY_COND, "household_id"].unique()
# Load calibration config
demos_config: DEMOSConfig = get_config()
calibration_procedure = demos_config.birth_module_config.calibration_procedure
# Get model data
birth_model = mm.get_step("birth")
birth_model_variables = columns_in_formula(birth_model.model_expression)
birth_model_data = households.to_frame(birth_model_variables).loc[ELIGIBLE_HH]
# Calibrate if needed
if calibration_procedure is not None:
return calibration_procedure.calibrate_and_run_model(
birth_model, birth_model_data
)
return birth_model.predict(birth_model_data)