Source code for demos.models.education

import orca
import numpy as np
import pandas as pd
from templates import estimated_models, modelmanager as mm
import time
from logging_logic import log_execution_time
from templates.utils.models import columns_in_formula

STEP_NAME = "education"
REQUIRED_COLUMNS = [
    "persons.edu",
    "persons.student",
]


[docs] @orca.step(STEP_NAME) def education( persons, edu_highschool_proportion, edu_highschool_grads_proportion, year ): """ Simulate educational attainment and student status transitions. This step applies the education model to eligible persons (age > 15 and currently students) to determine who drops out. It advances students through grades and degrees, maintains proportions of high school and GED graduates, and updates the persons table in place. Parameters ---------- persons : orca.Table The persons table containing individual-level attributes. edu_highschool_proportion : pandas.Series Proportion of students in 11th and 12th grade. edu_highschool_grads_proportion : pandas.Series Proportion of students with GED or high school diploma. year : int The current simulation year. Returns ------- None Notes ----- - Modifies `persons.edu` and `persons.student` in place. - Only persons older than 15 and currently students are considered for dropout modeling. - Proportions for transitions (e.g., GED vs. diploma) are maintained using observed data. - Some transitions use random assignment based on empirical proportions. """ start_time = time.time() # Run education model model = mm.get_step("education") model_variables = columns_in_formula(model.model_expression) model_filters = (persons.age > 15) & (persons.student == 1) model_data = persons.to_frame(model_variables)[model_filters] stop_student_list = model.predict(model_data).astype(int) reindexed_stop_student = stop_student_list.reindex(persons.local.index).fillna(-99) # Update education years ## Kids persons.local.loc[persons["age"] == 3, "edu"] = 2 persons.local.loc[persons["age"].isin([4, 5]), "edu"] = 4 ## Dropping out persons.local.loc[reindexed_stop_student == 1, "student"] = 0 ## Update those that stayed in school stayed_index = reindexed_stop_student == 0 ### Between 4 and 13, increase by one - Students go all the way to grade 10 tenth_grade_or_below_index = persons["edu"].between(4, 13, inclusive="both") persons.local.loc[stayed_index & tenth_grade_or_below_index, "edu"] += 1 # NOTE: We perform the following operations in reverse order to avoid skipping years ### Students with one year of college move to the next college_index = persons["edu"] == 18 persons.local.loc[stayed_index & college_index, "edu"] = 19 ### Students with GED or HS Degree move to college ged_or_hs_index = persons["edu"].isin([16, 17]) persons.local.loc[stayed_index & ged_or_hs_index, "edu"] = 18 ### Students in grade 12 move to either 16 or 17 based on weights ### Proportion of no diploma to GED students is roughly maintained twelveth_grade_index = persons["edu"] == 15 twelveth_grade_transition = np.random.choice( [16, 17], size=(stayed_index & twelveth_grade_index).sum(), p=[edu_highschool_grads_proportion[16], edu_highschool_grads_proportion[17]], ) persons.local.loc[stayed_index & twelveth_grade_index, "edu"] = ( twelveth_grade_transition ) ### Students in grade 11 move to either 15 or 16 based on weights ### Proportion of 12th grade students to diploma highschool students is roughly maintained eleventh_grade_index = persons["edu"] == 14 eleventh_grade_transition = np.random.choice( [15, 16], size=(stayed_index & eleventh_grade_index).sum(), p=[edu_highschool_proportion[15], edu_highschool_proportion[16]], ) persons.local.loc[stayed_index & eleventh_grade_index, "edu"] = ( eleventh_grade_transition ) log_execution_time(start_time, orca.get_injectable("year"), "education")
@orca.injectable(name="edu_highschool_proportion", cache_scope="forever", cache=True) def edu_highschool_proportion(data="persons.edu"): """ Calculate the proportion of students in 11th and 12th grade. Parameters ---------- data : pandas.Series The `edu` column from the persons table. Returns ------- pandas.Series Proportion of students in 11th (15) and 12th (16) grade. """ return data[data.isin([15, 16])].value_counts(normalize=True) @orca.injectable(name="edu_highschool_grads_proportion") def edu_highschool_grads_proportion(data="persons.edu"): """ Calculate the proportion of students with GED or high school diploma. Parameters ---------- data : pandas.Series The `edu` column from the persons table. Returns ------- pandas.Series Proportion of students with GED (16) or high school diploma (17). """ return data[data.isin([16, 17])].value_counts(normalize=True) @orca.column(table_name="persons") def education_group(data="persons.edu"): """ Assign each person to an education group. Categorizes persons into predefined education intervals for use in modeling and reporting. Parameters ---------- data : pandas.Series The `edu` column from the persons table. Returns ------- pandas.Series Categorical education group labels as strings. """ education_intervals = [0, 18, 22, 200] education_labels = ["lte17", "18-21", "gte22"] return pd.cut( data, bins=education_intervals, labels=education_labels, include_lowest=True ).astype(str)