Source code for demos.models.kids_moving

import orca
from templates import estimated_models, modelmanager as mm
import time
import numpy as np
from logging_logic import log_execution_time
from templates.utils.models import columns_in_formula
from config import DEMOSConfig, KidsMovingModuleConfig, get_config

STEP_NAME = "kids_moving"
REQUIRED_COLUMNS = [
    "persons.age",
    "persons.relate",
]


[docs] @orca.step(STEP_NAME) def kids_moving(persons, households, get_new_households): """ Simulate children or young adults moving out of their parental households. This step applies the `kids_move` estimated model to eligible persons (age >= 16 and specific `relate` codes) to determine who moves out. Movers are assigned to new households, and both the persons and households tables are updated in place. Parameters ---------- persons : orca.Table The persons table containing individual-level attributes. households : orca.Table The households table containing household-level attributes. get_new_households : callable Function to generate new unique household IDs as needed. Returns ------- None Notes ----- - Modifies `persons.household_id`, `persons.relate`, and `households.lcm_county_id` in place. - Only persons with `relate` codes [2, 3, 4, 7, 9, 14] and age >= 16 are considered. - Movers are only reassigned if their departure does not leave the household empty, unless all are moving. - Geographic assignment for new households is inherited from the original household. """ start_time = time.time() kids_moving = run_and_calibrate_model(persons) update_households_after_kids(persons, households, kids_moving, get_new_households) log_execution_time(start_time, orca.get_injectable("year"), "kids_moving")
def update_households_after_kids(persons, households, kids_moving, get_new_households): """ Update persons and households tables after kids move out. Assigns new household IDs to movers, updates their relationship code, and ensures new households inherit the geographic assignment from the original household. Parameters ---------- persons : orca.Table The persons table. households : orca.Table The households table. kids_moving : pandas.Series Boolean Series indicating which persons are moving. get_new_households : callable Function to generate new unique household IDs. Returns ------- None Notes ----- - Movers are only reassigned if their departure does not leave the household empty, unless all are moving. - The `relate` code for movers is set to 0 (head of household). - The geographic assignment column is specified in the module config. """ # Load module config demos_config: DEMOSConfig = get_config() module_config: KidsMovingModuleConfig = demos_config.kids_moving_module_config # Kids moving to a new household conditions ## Condition 1: Kids flagged by kids_moving ## Condition 2: Households with more than 1 people ## Condition 3: Households with some people staying household_sizes = persons.local.groupby("household_id").size() person_household_size_index = ( household_sizes.loc[persons["household_id"]] > 1 ).values ## Compute kids moving per household kids_moving_per_household = kids_moving.groupby( persons.local.loc[kids_moving.index, "household_id"] ).sum() ### This re-index speeds up querying by a lot kids_moving_per_household = kids_moving_per_household.reindex( persons["household_id"].unique() ).fillna(0) ### Household-level filter for condition 3 household_completely_moving_index = ( kids_moving_per_household.loc[household_sizes.index] == household_sizes ) person_completely_moving_index = household_completely_moving_index.loc[ persons["household_id"] ] ### Combine both household conditions to know which kids we need to move eligeble_households_index = ( person_household_size_index & ~person_completely_moving_index ).values # Finally combine all filters into one kids_moving_index = ( kids_moving.reindex(persons.local.index).fillna(0).astype(bool) & eligeble_households_index ) # Get the old household_id for the moving kids to retrieve the county_id old_household_id = persons.local.loc[kids_moving_index, "household_id"].values geoid_assignment = households.local.loc[ old_household_id, module_config.geoid_col ].values county_assignment = households.local.loc[old_household_id, "lcm_county_id"].values new_households = get_new_households(kids_moving_index.sum()) persons.local.loc[kids_moving_index, "household_id"] = new_households persons.local.loc[kids_moving_index, "relate"] = 0 households.local.loc[new_households, module_config.geoid_col] = geoid_assignment households.local.loc[new_households, "lcm_county_id"] = county_assignment def run_and_calibrate_model(persons): # Load module config demos_config: DEMOSConfig = get_config() module_config: KidsMovingModuleConfig = demos_config.kids_moving_module_config child_relate = [ 2, 3, 4, 7, 9, 14, ] # This is more `dependent` because `child` is determined by age target_share = module_config.calibration_target_share max_iter = module_config.max_iter # Get model data model = mm.get_step("kids_move") model_variables = columns_in_formula(model.model_expression) model_filters = (persons.relate.isin(child_relate)) & (persons.age >= 16) model_data = persons.to_frame(model_variables)[model_filters] kids_moving = model.predict(model_data).astype(int) # NOTE: This could be much easier if we set the age at 18 because we could use model_filters adult_filter = persons.age >= 18 age_moved = persons.age.loc[kids_moving[kids_moving == 1].index] adult_stay = (adult_filter & persons.relate.isin(child_relate)).sum() - ( age_moved >= 18 ).sum() observed_share = adult_stay / adult_filter.sum() error = observed_share - target_share print("Calibrating Kids moving model") calibrate_iteration = 0 while ( abs(error) > module_config.calibration_tolerance and calibrate_iteration < max_iter ): print(f"{calibrate_iteration} iteration error: {error}") model.fitted_parameters[0] += np.log(observed_share / target_share) kids_moving = model.predict(model_data).astype(int) age_moved = persons.age.loc[kids_moving[kids_moving == 1].index] adult_stay = (adult_filter & persons.relate.isin(child_relate)).sum() - ( age_moved >= 18 ).sum() observed_share = adult_stay / adult_filter.sum() error = observed_share - target_share calibrate_iteration += 1 print(f"{calibrate_iteration} iteration error: {error}") return kids_moving