Source code for compass.utilities.costs

"""COMPASS cost computation utilities"""

LLM_COST_REGISTRY = {
    "o1": {"prompt": 15, "response": 60},
    "o3-mini": {"prompt": 1.1, "response": 4.4},
    "gpt-4.5": {"prompt": 75, "response": 150},
    "gpt-4o": {"prompt": 2.5, "response": 10},
    "gpt-4o-mini": {"prompt": 0.15, "response": 0.6},
    "gpt-4.1": {"prompt": 2, "response": 8},
    "gpt-4.1-mini": {"prompt": 0.4, "response": 1.6},
    "gpt-4.1-nano": {"prompt": 0.1, "response": 0.4},
    "gpt-5": {"prompt": 1.25, "response": 10},
    "gpt-5-mini": {"prompt": 0.25, "response": 2},
    "gpt-5-nano": {"prompt": 0.05, "response": 0.4},
    "gpt-5-chat-latest": {"prompt": 1.25, "response": 10},
    "compassop-gpt-4o": {"prompt": 2.5, "response": 10},
    "compassop-gpt-4o-mini": {"prompt": 0.15, "response": 0.6},
    "compassop-gpt-4.1": {"prompt": 2, "response": 8},
    "compassop-gpt-4.1-mini": {"prompt": 0.4, "response": 1.6},
    "compassop-gpt-4.1-nano": {"prompt": 0.1, "response": 0.4},
    "compassop-gpt-5": {"prompt": 1.25, "response": 10},
    "compassop-gpt-5-mini": {"prompt": 0.25, "response": 2},
    "compassop-gpt-5-nano": {"prompt": 0.05, "response": 0.4},
    "compassop-gpt-5-chat-latest": {"prompt": 1.25, "response": 10},
    "egswaterord-gpt4.1-mini": {"prompt": 0.4, "response": 1.6},
    "wetosa-gpt-4o": {"prompt": 2.5, "response": 10},
    "wetosa-gpt-4o-mini": {"prompt": 0.15, "response": 0.6},
    "wetosa-gpt-4.1": {"prompt": 2, "response": 8},
    "wetosa-gpt-4.1-mini": {"prompt": 0.4, "response": 1.6},
    "wetosa-gpt-4.1-nano": {"prompt": 0.1, "response": 0.4},
    "wetosa-gpt-5": {"prompt": 1.25, "response": 10},
    "wetosa-gpt-5-mini": {"prompt": 0.25, "response": 2},
    "wetosa-gpt-5-nano": {"prompt": 0.05, "response": 0.4},
    "wetosa-gpt-5-chat-latest": {"prompt": 1.25, "response": 10},
    "text-embedding-ada-002": {"prompt": 0.10},
}
"""LLM Costs registry

The registry maps model names to a dictionary that contains the cost
(in $/million tokens) for both prompt and response tokens.
"""



[docs]
def cost_for_model(model_name, prompt_tokens, completion_tokens):
    """Compute the API costs for a model given the token usage

    Parameters
    ----------
    model_name : str
        Name of the model. Needs to be registered as a key in
        :obj:`LLM_COST_REGISTRY` for this method to return a non-zero
        value.
    prompt_tokens, completion_tokens : int
        Number of prompt and completion tokens used, respectively.

    Returns
    -------
    float
        Total cost based on the token usage.
    """
    model_costs = LLM_COST_REGISTRY.get(model_name, {})
    prompt_cost = prompt_tokens / 1e6 * model_costs.get("prompt", 0)
    response_cost = completion_tokens / 1e6 * model_costs.get("response", 0)
    return prompt_cost + response_cost




[docs]
def compute_cost_from_totals(totals):
    """Compute total cost from total tracked usage

    Parameters
    ----------
    totals : dict
        Dictionary where keys are model names and their corresponding
        usage statistics are values. Each usage statistics dictionary
        should contain "prompt_tokens" and "response_tokens" keys
        indicating the number of tokens used for prompts and responses,
        respectively. This dictionary is typically obtained from the
        `tracker_totals` property of a
        :class:`compass.services.usage.UsageTracker` instance.

    Returns
    -------
    float
        Total cost based on the tracked usage.
    """
    return sum(
        cost_for_model(
            model,
            prompt_tokens=usage.get("prompt_tokens", 0),
            completion_tokens=usage.get("response_tokens", 0),
        )
        for model, usage in totals.items()
    )




[docs]
def compute_total_tokens_from_totals(totals):
    """Compute total prompt/response token counts from tracked usage

    Parameters
    ----------
    totals : dict
        Same shape as :func:`compute_cost_from_totals`: maps model name
        to a dict with ``"prompt_tokens"`` and ``"response_tokens"``.

    Returns
    -------
    dict
        ``{"prompt_tokens": int, "response_tokens": int}`` summed across
        all models in ``totals``.
    """
    return {
        "prompt_tokens": sum(
            u.get("prompt_tokens", 0) for u in totals.values()
        ),
        "response_tokens": sum(
            u.get("response_tokens", 0) for u in totals.values()
        ),
    }




[docs]
def compute_total_cost_and_token_from_totals(totals):
    """Compute total cost and token counts together from tracked usage

    Returns
    -------
    dict
        Keys: ``"cost"`` (float), ``"prompt_tokens"`` (int),
        ``"response_tokens"`` (int).
    """
    return {
        "cost": compute_cost_from_totals(totals),
        **compute_total_tokens_from_totals(totals),
    }




[docs]
def compute_total_cost_from_usage(tracked_usage):
    """Compute total cost from total tracked usage

    Parameters
    ----------
    tracked_usage : compass.services.usage.UsageTracker or dict
        Dictionary where keys are usage categories (typically
        jurisdiction names) and values are dictionaries containing usage
        details. The usage details dictionaries should have a
        "tracker_totals" key, which maps to another dictionary. This
        innermost dictionary should have model names as keys and their
        corresponding usage statistics as values. Each usage statistics
        dictionary should contain "prompt_tokens" and "response_tokens"
        keys indicating the number of tokens used for prompts and
        responses, respectively.

    Returns
    -------
    float
        Total LLM cost based on the tracked usage.
    """
    return sum(
        compute_cost_from_totals(usage.get("tracker_totals", {}))
        for usage in tracked_usage.values()
    )