Source code for scCS.scores

"""
scores.py — Core commitment score math engine for scCS.

Implements the generalized k-furcation commitment score framework,
extending the 2-state (homeostatic/activated) formulation from:

    Kriukov et al. (2025) "Single-cell transcriptome of myeloid cells in
    response to transplantation of human retinal neurons reveals reversibility
    of microglial activation"

Mathematical framework
----------------------
Given per-cell RNA velocity vectors (vx_i, vy_i) in the scCS radial embedding:

1.  magnitude_i  = sqrt(vx_i^2 + vy_i^2)                          [Eq. 1]
2.  theta_i      = atan2(vy_i, vx_i) -> [0, 360)                  [Eq. 2-3]
3.  Bin angles into N bins of width 360/N degrees                  [Eq. 4-5]
4.  M_bin(b)     = sum of magnitude_i for all cells in bin b       [Eq. 6]
5.  M_sector(j)  = sum of M_bin(b) for b in sector j              [Eq. 7]
6.  unCS(i,j)    = M_sector(i) / M_sector(j)                      [Eq. 8]
7.  nCS(i,j)     = unCS(i,j) * n_cells(j) / n_cells(i)           [Eq. 9]

Generalization to k fates:
-  CS_vec        = [M_sector(1), ..., M_sector(k)]  (raw)
-  p_vec         = CS_vec / sum(CS_vec)              (normalized)
-  H_pop         = -sum(p_k * log(p_k)) / log(k)    (population entropy)
-  H_cell_j      = mean_i[ h_bin(s_ij) ]            (per-fate cell entropy, k values)
-  H_nn_i        = H( mean_{n in NN(i)}(cell_scores[n]) )  (NN-smoothed per-cell entropy)
-  cell_scores   = dot(unit_velocity_i, unit_direction_to_fate_j)  (per-cell)

Entropy notes
-------------
Three complementary entropy metrics are provided:

``compute_population_entropy(p_vec)``  →  float
    Entropy of the aggregate commitment vector (M_sector / sum(M_sector)).
    Single scalar.  Measures how evenly total velocity mass is distributed.
    **Limitation**: high for any balanced split, even if every cell is decisive.

``compute_per_fate_cell_entropy(cell_scores)``  →  ndarray shape (k,)
    For each fate j: binary entropy of each cell's affinity toward j,
    averaged over all cells.  h_j = mean_i[ H_bin(s_ij, 1-s_ij) ].
    Tells you per-fate how individually decisive cells are toward that fate.
    Low h_j = cells are sharply committed (or sharply not committed) to fate j.
    High h_j = cells are ambiguous about fate j.

``compute_nn_cell_entropy(cell_scores, coords, k_nn)``  →  ndarray shape (n_cells,)
    For each cell: average cell_scores over its k_nn nearest neighbors in
    the scCS embedding (X_sccs), then compute full k-way entropy on the
    smoothed scores.  Spatially local smoothing removes single-cell noise.
    Use the elbow plots (plot_nn_entropy_elbow) to choose k_nn.
"""

from __future__ import annotations

import warnings
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Sequence, Tuple

import numpy as np
import pandas as pd


# ---------------------------------------------------------------------------
# Low-level vector math
# ---------------------------------------------------------------------------


[docs]
def compute_magnitudes(vx: np.ndarray, vy: np.ndarray) -> np.ndarray:
    """Euclidean norm of 2D velocity vectors (Eq. 1).

    Parameters
    ----------
    vx, vy : array-like, shape (n_cells,)
        x and y components of velocity vectors.

    Returns
    -------
    magnitudes : np.ndarray, shape (n_cells,)
        Non-negative magnitudes; NaN inputs yield NaN.
    """
    vx = np.asarray(vx, dtype=float)
    vy = np.asarray(vy, dtype=float)
    return np.sqrt(vx ** 2 + vy ** 2)




[docs]
def compute_angles(vx: np.ndarray, vy: np.ndarray) -> np.ndarray:
    """Angle of each velocity vector in [0, 360) degrees (Eq. 2-3).

    Parameters
    ----------
    vx, vy : array-like, shape (n_cells,)

    Returns
    -------
    angles_deg : np.ndarray, shape (n_cells,)
        Angles in degrees, range [0, 360).  NaN for zero-magnitude vectors.
    """
    vx = np.asarray(vx, dtype=float)
    vy = np.asarray(vy, dtype=float)
    angles_rad = np.arctan2(vy, vx)
    angles_deg = np.degrees(angles_rad) % 360.0
    zero_mask = (vx == 0) & (vy == 0)
    angles_deg[zero_mask] = np.nan
    return angles_deg




[docs]
def bin_angles(
    angles_deg: np.ndarray,
    magnitudes: np.ndarray,
    n_bins: int = 36,
) -> Tuple[np.ndarray, np.ndarray]:
    """Discretize angles and accumulate magnitudes per bin (Eq. 4-6).

    Parameters
    ----------
    angles_deg : np.ndarray, shape (n_cells,)
        Angles in [0, 360).  NaN values are ignored.
    magnitudes : np.ndarray, shape (n_cells,)
        Per-cell magnitudes.  NaN values are ignored.
    n_bins : int
        Number of angular bins.  Default 36 (10° each, as in manuscript).

    Returns
    -------
    bin_edges : np.ndarray, shape (n_bins + 1,)
        Bin edge angles in degrees.
    M_bin : np.ndarray, shape (n_bins,)
        Cumulative magnitude per bin.
    """
    bin_edges = np.linspace(0.0, 360.0, n_bins + 1)
    M_bin = np.zeros(n_bins, dtype=float)

    valid = ~(np.isnan(angles_deg) | np.isnan(magnitudes))
    a = angles_deg[valid]
    m = magnitudes[valid]

    bin_indices = np.searchsorted(bin_edges[1:], a, side="right")
    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
    np.add.at(M_bin, bin_indices, m)

    return bin_edges, M_bin



# ---------------------------------------------------------------------------
# Sector definition helpers
# ---------------------------------------------------------------------------


[docs]
def equal_sectors(k: int, n_bins: int = 36) -> List[List[int]]:
    """Divide n_bins into k equal contiguous sectors.

    Parameters
    ----------
    k : int
        Number of fates / sectors.
    n_bins : int
        Total number of angular bins.

    Returns
    -------
    sectors : list of lists
        Each inner list contains the bin indices belonging to that sector.
    """
    if n_bins % k != 0:
        warnings.warn(
            f"n_bins={n_bins} is not evenly divisible by k={k}. "
            "Sectors will have unequal sizes.",
            stacklevel=2,
        )
    bins_per_sector = n_bins / k
    sectors = []
    for j in range(k):
        start = int(round(j * bins_per_sector))
        end = int(round((j + 1) * bins_per_sector))
        sectors.append(list(range(start, end)))
    return sectors




[docs]
def centroid_sectors(
    fate_centroids: np.ndarray,
    root_centroid: np.ndarray,
    n_bins: int = 36,
) -> Tuple[List[List[int]], np.ndarray]:
    """Define sectors anchored to fate centroid directions from the root.

    Each sector is centered on the angle from the root to the corresponding
    fate centroid.  Sector boundaries are placed at the midpoints between
    adjacent fate angles.

    Parameters
    ----------
    fate_centroids : np.ndarray, shape (k, 2)
        2D embedding coordinates of each fate centroid.
    root_centroid : np.ndarray, shape (2,)
        2D embedding coordinate of the root / progenitor centroid.
    n_bins : int
        Number of angular bins.

    Returns
    -------
    sectors : list of k lists of bin indices
    fate_angles : np.ndarray, shape (k,)
        Central angle (degrees) for each fate.
    """
    fate_centroids = np.asarray(fate_centroids, dtype=float)
    root_centroid = np.asarray(root_centroid, dtype=float)

    deltas = fate_centroids - root_centroid  # (k, 2)
    fate_angles = np.degrees(np.arctan2(deltas[:, 1], deltas[:, 0])) % 360.0

    k = len(fate_angles)
    sort_idx = np.argsort(fate_angles)
    sorted_angles = fate_angles[sort_idx]

    bin_width = 360.0 / n_bins
    bin_centers = np.arange(n_bins) * bin_width + bin_width / 2.0

    sectors: List[List[int]] = [[] for _ in range(k)]
    for b, center in enumerate(bin_centers):
        dists = np.array([
            min(abs(center - fa), 360.0 - abs(center - fa))
            for fa in sorted_angles
        ])
        nearest = int(np.argmin(dists))
        sectors[nearest].append(b)

    inv_sort = np.argsort(sort_idx)
    sectors = [sectors[inv_sort[j]] for j in range(k)]

    return sectors, fate_angles



# ---------------------------------------------------------------------------
# Sector magnitude accumulation
# ---------------------------------------------------------------------------


[docs]
def compute_sector_magnitudes(
    M_bin: np.ndarray,
    sectors: List[List[int]],
) -> np.ndarray:
    """Sum M_bin values within each sector (Eq. 7).

    Parameters
    ----------
    M_bin : np.ndarray, shape (n_bins,)
    sectors : list of k lists of bin indices

    Returns
    -------
    M_sector : np.ndarray, shape (k,)
    """
    return np.array([M_bin[s].sum() for s in sectors], dtype=float)



# ---------------------------------------------------------------------------
# Commitment score computations
# ---------------------------------------------------------------------------


[docs]
def compute_unCS(M_sector_i: float, M_sector_j: float) -> float:
    """Unnormalized commitment score of fate i relative to fate j (Eq. 8).

    unCS > 1  =>  population is more committed to fate i than fate j.

    Parameters
    ----------
    M_sector_i, M_sector_j : float
        Cumulative magnitudes for fates i and j.

    Returns
    -------
    float  (inf if M_sector_j == 0)
    """
    if M_sector_j == 0:
        return np.inf
    return float(M_sector_i / M_sector_j)




[docs]
def compute_nCS(
    M_sector_i: float,
    M_sector_j: float,
    n_cells_i: int,
    n_cells_j: int,
) -> float:
    """Cell-number-normalized commitment score (Eq. 9).

    nCS = (M_sector_i / M_sector_j) * (n_cells_j / n_cells_i)

    Parameters
    ----------
    M_sector_i, M_sector_j : float
    n_cells_i, n_cells_j : int
        Number of cells in each population / trajectory arm.

    Returns
    -------
    float
    """
    if M_sector_j == 0 or n_cells_i == 0:
        return np.inf
    return float((M_sector_i / M_sector_j) * (n_cells_j / n_cells_i))




[docs]
def compute_commitment_vector(M_sector: np.ndarray) -> np.ndarray:
    """Normalize sector magnitudes to a probability-like commitment vector.

    Parameters
    ----------
    M_sector : np.ndarray, shape (k,)

    Returns
    -------
    p_vec : np.ndarray, shape (k,)
        Sums to 1.  All-zero input returns uniform distribution.
    """
    total = M_sector.sum()
    if total == 0:
        return np.ones(len(M_sector)) / len(M_sector)
    return M_sector / total




[docs]
def compute_population_entropy(p_vec: np.ndarray) -> float:
    """Shannon entropy of the aggregate commitment vector, normalized to [0, 1].

    Operates on the *population-level* commitment vector
    ``p_vec = M_sector / sum(M_sector)``, which reflects how total velocity
    mass is distributed across fate sectors.

    H_pop = 0  =>  all velocity mass concentrated in one sector.
    H_pop = 1  =>  velocity mass uniformly spread across all sectors.

    .. warning::
        This metric can be misleading when cells are split between fates.
        A population where 50 % of cells strongly commit to fate A and 50 %
        strongly commit to fate B will yield H_pop ≈ 1 (maximum uncertainty),
        even though every individual cell is decisive.  Use
        :func:`compute_mean_cell_entropy` as the primary commitment metric.

    Parameters
    ----------
    p_vec : np.ndarray, shape (k,)
        Normalized commitment vector (sums to 1).

    Returns
    -------
    float in [0, 1]
    """
    k = len(p_vec)
    if k <= 1:
        return 0.0
    p = p_vec[p_vec > 0]
    H_raw = -np.sum(p * np.log(p))
    H_max = np.log(k)
    return float(H_raw / H_max)



# Backward-compatible alias — emits no warning at function level;
# the DeprecationWarning is raised by CommitmentScoreResult.commitment_entropy

[docs]
compute_commitment_entropy = compute_population_entropy




[docs]
def compute_mean_cell_entropy(cell_scores: np.ndarray) -> float:
    """Mean per-cell Shannon entropy of fate-affinity scores, normalized to [0, 1].

    For each cell *i*, computes the normalized Shannon entropy of its
    row-normalized fate-affinity vector ``s_i = cell_scores[i, :]``::

        h_i = -sum_j( s_ij * log(s_ij) ) / log(k)

    and returns the mean over all cells::

        H_cell = mean_i( h_i )

    This is the **recommended primary entropy metric** because it measures
    individual cell commitment uncertainty rather than population-level
    velocity-mass balance.

    Interpretation
    --------------
    H_cell ≈ 0  =>  cells are individually decisive (each cell strongly
                    favors one fate).  Occurs in committed populations
                    regardless of whether cells split between fates.
    H_cell ≈ 1  =>  cells are individually undecided (each cell's velocity
                    points equally toward all fates).  Occurs in genuinely
                    uncommitted / progenitor-like populations.

    Contrast with :func:`compute_population_entropy`
    -------------------------------------------------
    A population split 50/50 between two strongly committed sub-groups gives:
    - H_pop ≈ 1.0  (misleadingly high — velocity mass is balanced)
    - H_cell ≈ 0.0 (correctly low — each cell is individually committed)

    Parameters
    ----------
    cell_scores : np.ndarray, shape (n_cells, k)
        Per-cell fate-affinity matrix, row-normalized to sum to 1.
        Typically the output of :func:`compute_cell_scores`.

    Returns
    -------
    float in [0, 1]
        Mean normalized per-cell entropy.  Returns 0.0 for a single cell
        or single fate.
    """
    cell_scores = np.asarray(cell_scores, dtype=float)
    n_cells, k = cell_scores.shape
    if k <= 1 or n_cells == 0:
        return 0.0
    H_max = np.log(k)
    with np.errstate(divide="ignore", invalid="ignore"):
        log_s = np.where(cell_scores > 0, np.log(cell_scores), 0.0)
    per_cell_H = -np.sum(cell_scores * log_s, axis=1) / H_max
    return float(np.mean(per_cell_H))




[docs]
def compute_per_fate_cell_entropy(cell_scores: np.ndarray) -> np.ndarray:
    """Per-fate mean binary cell entropy of fate-affinity scores.

    For each fate *j*, treats each cell's affinity score ``s_ij`` as a
    binary distribution ``[s_ij, 1 - s_ij]`` and computes the normalized
    binary Shannon entropy, then averages over all cells::

        h_j = mean_i[ H_bin(s_ij) ]
            = mean_i[ -(s_ij * log(s_ij) + (1-s_ij) * log(1-s_ij)) / log(2) ]

    Interpretation
    --------------
    h_j ≈ 0  =>  cells are sharply decisive about fate j (either strongly
                 committed or strongly not committed).
    h_j ≈ 1  =>  cells are ambiguous about fate j (scores cluster near 0.5).

    This is the per-fate analogue of :func:`compute_mean_cell_entropy`.

    Parameters
    ----------
    cell_scores : np.ndarray, shape (n_cells, k)
        Per-cell fate-affinity matrix, row-normalized to sum to 1.
        Typically the output of :func:`compute_cell_scores`.

    Returns
    -------
    per_fate_entropy : np.ndarray, shape (k,)
        Mean binary entropy for each fate.  Returns zeros for k=0 or n=0.
    """
    cell_scores = np.asarray(cell_scores, dtype=float)
    if cell_scores.ndim != 2 or cell_scores.size == 0:
        return np.zeros(cell_scores.shape[1] if cell_scores.ndim == 2 else 0)
    n_cells, k = cell_scores.shape
    if n_cells == 0 or k == 0:
        return np.zeros(k)

    s = np.clip(cell_scores, 1e-15, 1 - 1e-15)   # avoid log(0)
    one_minus_s = 1.0 - s
    # Binary entropy per cell per fate, normalized by log(2)
    h = -(s * np.log(s) + one_minus_s * np.log(one_minus_s)) / np.log(2)
    return h.mean(axis=0)   # shape (k,)




[docs]
def compute_nn_cell_entropy(
    cell_scores: np.ndarray,
    coords: np.ndarray,
    k_nn: int,
) -> np.ndarray:
    """NN-smoothed per-cell commitment entropy in the scCS embedding.

    For each cell *i*:
    1. Find its ``k_nn`` nearest neighbors in ``coords`` (X_sccs, 2D).
    2. Average ``cell_scores`` over those neighbors (including cell *i* itself).
    3. Compute normalized k-way Shannon entropy on the smoothed scores.

    This removes single-cell velocity noise while preserving local commitment
    structure.  Use :func:`scCS.plot.plot_nn_entropy_elbow` to choose k_nn.

    Parameters
    ----------
    cell_scores : np.ndarray, shape (n_cells, k)
        Per-cell fate-affinity matrix from :func:`compute_cell_scores`.
    coords : np.ndarray, shape (n_cells, 2)
        2D scCS embedding coordinates (``adata_sub.obsm['X_sccs']``).
    k_nn : int
        Number of nearest neighbors to average over (excluding self).
        Self is always included, so the effective window is k_nn + 1 cells.

    Returns
    -------
    nn_entropy : np.ndarray, shape (n_cells,)
        Normalized Shannon entropy of the NN-smoothed fate scores per cell,
        in [0, 1].
    """
    from sklearn.neighbors import NearestNeighbors

    cell_scores = np.asarray(cell_scores, dtype=float)
    coords = np.asarray(coords, dtype=float)
    n_cells, k = cell_scores.shape

    if n_cells == 0 or k <= 1:
        return np.zeros(n_cells)

    # k_nn neighbors + self; cap at n_cells
    n_query = min(k_nn + 1, n_cells)
    nbrs = NearestNeighbors(n_neighbors=n_query, algorithm="ball_tree").fit(coords)
    _, indices = nbrs.kneighbors(coords)   # (n_cells, n_query), first col = self

    # Average cell_scores over neighborhood (including self)
    smoothed = cell_scores[indices].mean(axis=1)   # (n_cells, k)

    # Normalize rows to sum to 1 (averaging may break normalization slightly)
    row_sums = smoothed.sum(axis=1, keepdims=True)
    with np.errstate(invalid="ignore", divide="ignore"):
        smoothed = np.where(row_sums > 0, smoothed / row_sums, 1.0 / k)

    # Normalized k-way Shannon entropy per cell
    H_max = np.log(k)
    with np.errstate(divide="ignore", invalid="ignore"):
        log_s = np.where(smoothed > 0, np.log(smoothed), 0.0)
    nn_entropy = -np.sum(smoothed * log_s, axis=1) / H_max   # (n_cells,)

    return nn_entropy




[docs]
def compute_pairwise_cs_matrix(
    M_sector: np.ndarray,
    n_cells_per_fate: Optional[np.ndarray] = None,
    normalized: bool = True,
) -> np.ndarray:
    """Compute full k x k pairwise commitment score matrix.

    Entry [i, j] = CS(i relative to j).
    Diagonal is 1.0.

    Parameters
    ----------
    M_sector : np.ndarray, shape (k,)
    n_cells_per_fate : np.ndarray, shape (k,), optional
        If provided and normalized=True, computes nCS; else unCS.
    normalized : bool

    Returns
    -------
    cs_matrix : np.ndarray, shape (k, k)
    """
    k = len(M_sector)
    cs_matrix = np.ones((k, k), dtype=float)
    for i in range(k):
        for j in range(k):
            if i == j:
                continue
            if normalized and n_cells_per_fate is not None:
                cs_matrix[i, j] = compute_nCS(
                    M_sector[i], M_sector[j],
                    int(n_cells_per_fate[i]), int(n_cells_per_fate[j]),
                )
            else:
                cs_matrix[i, j] = compute_unCS(M_sector[i], M_sector[j])
    return cs_matrix



# ---------------------------------------------------------------------------
# Per-cell fate affinity scores
# ---------------------------------------------------------------------------


[docs]
def compute_cell_scores(
    vx: np.ndarray,
    vy: np.ndarray,
    fate_centroids: np.ndarray,
    root_centroid: np.ndarray,
    mag_weight: bool = True,
    mag_threshold_pct: float = 5.0,
) -> np.ndarray:
    """Per-cell fate affinity: magnitude-weighted cosine similarity to fate direction.

    For each cell i and fate j, computes:
        raw_score(i, j) = dot(unit_v_i, unit_d_j)
    where unit_d_j is the unit vector from root_centroid to fate_centroid_j.

    Scores are shifted to [0, 1] via (score + 1) / 2, then optionally
    weighted by the cell's velocity magnitude (normalized to [0, 1]).
    Cells with velocity magnitude below ``mag_threshold_pct`` percentile
    are down-weighted toward the uniform distribution (1/k), reducing noise
    from near-zero-velocity cells (typically progenitors at the origin).

    Parameters
    ----------
    vx, vy : np.ndarray, shape (n_cells,)
    fate_centroids : np.ndarray, shape (k, 2)
    root_centroid : np.ndarray, shape (2,)
    mag_weight : bool
        If True (default), weight each cell's score by its normalized velocity
        magnitude.  Low-magnitude cells are pulled toward the uniform
        distribution (1/k), reducing noise from near-stationary cells.
        If False, all cells contribute equally (original behavior).
    mag_threshold_pct : float
        Percentile of velocity magnitudes below which cells are considered
        near-stationary and receive zero weight (replaced by 1/k).
        Default: 5th percentile.  Only used when mag_weight=True.

    Returns
    -------
    cell_scores : np.ndarray, shape (n_cells, k)
        Per-cell affinity for each fate, row-normalized to sum to 1.
        Low-magnitude cells have scores close to 1/k (uniform).
    """
    vx = np.asarray(vx, dtype=float)
    vy = np.asarray(vy, dtype=float)
    fate_centroids = np.asarray(fate_centroids, dtype=float)
    root_centroid = np.asarray(root_centroid, dtype=float)
    k = len(fate_centroids)

    mag = compute_magnitudes(vx, vy)
    with np.errstate(invalid="ignore", divide="ignore"):
        uvx = np.where(mag > 0, vx / mag, 0.0)
        uvy = np.where(mag > 0, vy / mag, 0.0)

    deltas = fate_centroids - root_centroid  # (k, 2)
    delta_mag = np.linalg.norm(deltas, axis=1, keepdims=True)
    with np.errstate(invalid="ignore", divide="ignore"):
        unit_dirs = np.where(delta_mag > 0, deltas / delta_mag, 0.0)  # (k, 2)

    V = np.stack([uvx, uvy], axis=1)  # (n_cells, 2)
    scores = V @ unit_dirs.T          # (n_cells, k), cosine similarities in [-1, 1]

    # Shift to [0, 1]
    scores = (scores + 1.0) / 2.0

    if mag_weight:
        # Normalize magnitudes to [0, 1] for weighting
        mag_threshold = np.percentile(mag[mag > 0], mag_threshold_pct) if (mag > 0).any() else 0.0
        mag_clipped = np.where(mag >= mag_threshold, mag, 0.0)
        mag_max = mag_clipped.max()
        if mag_max > 0:
            w = mag_clipped / mag_max  # (n_cells,) in [0, 1]
        else:
            w = np.ones(len(mag))
        # Blend: w * directional_score + (1-w) * uniform
        uniform = np.full((len(vx), k), 1.0 / k)
        scores = w[:, None] * scores + (1.0 - w[:, None]) * uniform

    # Row-normalize to sum to 1
    row_sums = scores.sum(axis=1, keepdims=True)
    with np.errstate(invalid="ignore", divide="ignore"):
        scores = np.where(row_sums > 0, scores / row_sums, 1.0 / k)

    return scores



# ---------------------------------------------------------------------------
# Bootstrap confidence intervals for CS values
# ---------------------------------------------------------------------------


[docs]
def bootstrap_cs(
    vx: np.ndarray,
    vy: np.ndarray,
    sectors: List[List[int]],
    n_cells_per_fate: np.ndarray,
    n_bins: int = 36,
    n_bootstrap: int = 500,
    ci: float = 0.95,
    seed: int = 42,
    normalized: bool = True,
    stratified: bool = False,
    fate_cell_indices: Optional[Sequence] = None,
) -> Dict:
    """Bootstrap confidence intervals for pairwise commitment scores.

    Resamples cells with replacement ``n_bootstrap`` times, recomputes
    unCS and nCS for each bootstrap replicate, and returns the empirical
    CI bounds.

    Parameters
    ----------
    vx, vy : np.ndarray, shape (n_cells,)
        Velocity components in scCS space.
    sectors : list of k lists of bin indices
        Sector definition from centroid_sectors() or equal_sectors().
    n_cells_per_fate : np.ndarray, shape (k,)
        Number of cells per fate arm (used for nCS normalization).
    n_bins : int
        Number of angular bins.
    n_bootstrap : int
        Number of bootstrap replicates.  Default 500.
    ci : float
        Confidence interval level.  Default 0.95 (95% CI).
    seed : int
    normalized : bool
        If True, return CI for nCS; if False, for unCS.
    stratified : bool
        If True, resample cells within each fate arm separately (preserving
        arm cell counts), then concatenate.  Prevents bootstrap replicates
        with very few cells in one arm.  Requires ``fate_cell_indices``.
        Default False (uniform resampling, original behavior).
    fate_cell_indices : sequence of array-like, optional
        List of k arrays, each containing the integer indices of cells
        belonging to that fate arm.  Required when ``stratified=True``.
        Typically ``fate_map.fate_cell_indices``.

    Returns
    -------
    dict with keys:
        'mean'   : np.ndarray (k, k) — mean CS across replicates
        'ci_low' : np.ndarray (k, k) — lower CI bound
        'ci_high': np.ndarray (k, k) — upper CI bound
        'std'    : np.ndarray (k, k) — standard deviation across replicates
        'n_bootstrap': int
        'ci_level': float
    """
    rng = np.random.default_rng(seed)
    n_cells = len(vx)
    k = len(sectors)
    alpha = (1.0 - ci) / 2.0

    boot_matrices = np.zeros((n_bootstrap, k, k), dtype=float)

    if stratified and fate_cell_indices is not None:
        # Validate: all indices must be within [0, n_cells)
        for arm_idx in fate_cell_indices:
            arm_arr = np.asarray(arm_idx)
            if len(arm_arr) == 0:
                raise ValueError(
                    "stratified=True requires non-empty fate_cell_indices for each arm."
                )
    elif stratified:
        warnings.warn(
            "stratified=True requires fate_cell_indices. "
            "Falling back to uniform resampling.",
            stacklevel=2,
        )

    for b in range(n_bootstrap):
        if stratified and fate_cell_indices is not None:
            # Resample within each fate arm separately, then concatenate
            boot_idx_parts = []
            for arm_idx in fate_cell_indices:
                arm_arr = np.asarray(arm_idx)
                boot_idx_parts.append(
                    rng.choice(arm_arr, size=len(arm_arr), replace=True)
                )
            idx = np.concatenate(boot_idx_parts)
        else:
            idx = rng.integers(0, n_cells, size=n_cells)

        vx_b, vy_b = vx[idx], vy[idx]

        mag_b = compute_magnitudes(vx_b, vy_b)
        ang_b = compute_angles(vx_b, vy_b)
        _, M_bin_b = bin_angles(ang_b, mag_b, n_bins=n_bins)
        M_sector_b = compute_sector_magnitudes(M_bin_b, sectors)

        boot_matrices[b] = compute_pairwise_cs_matrix(
            M_sector_b,
            n_cells_per_fate=n_cells_per_fate if normalized else None,
            normalized=normalized,
        )

    # Replace inf with nan for statistics
    boot_matrices = np.where(np.isinf(boot_matrices), np.nan, boot_matrices)

    return {
        "mean":        np.nanmean(boot_matrices, axis=0),
        "ci_low":      np.nanpercentile(boot_matrices, alpha * 100, axis=0),
        "ci_high":     np.nanpercentile(boot_matrices, (1 - alpha) * 100, axis=0),
        "std":         np.nanstd(boot_matrices, axis=0),
        "n_bootstrap": n_bootstrap,
        "ci_level":    ci,
    }



# ---------------------------------------------------------------------------
# Result container
# ---------------------------------------------------------------------------

@dataclass

[docs]
class CommitmentScoreResult:
    """Container for all commitment score outputs.

    Attributes
    ----------
    fate_names : list of str
    M_bin : np.ndarray, shape (n_bins,)
        Cumulative magnitude per angular bin.
    bin_edges : np.ndarray, shape (n_bins + 1,)
    sectors : list of k lists of bin indices
    M_sector : np.ndarray, shape (k,)
        Cumulative magnitude per fate sector.
    n_cells_per_fate : np.ndarray, shape (k,)
    commitment_vector : np.ndarray, shape (k,)
        Normalized (sums to 1).
    population_entropy : float
        Normalized Shannon entropy of the aggregate commitment vector in [0, 1].
        Single scalar.  See :func:`compute_population_entropy`.
    mean_cell_entropy : float
        Mean normalized per-cell Shannon entropy in [0, 1].
        See :func:`compute_mean_cell_entropy`.
        NaN when cell_level=False.
    per_fate_entropy : np.ndarray, shape (k,)
        Mean binary cell entropy for each fate individually.
        per_fate_entropy[j] = mean over cells of H_bin(s_ij, 1-s_ij).
        See :func:`compute_per_fate_cell_entropy`.
        All-NaN array when cell_level=False.
    pairwise_unCS : np.ndarray, shape (k, k)
    pairwise_nCS : np.ndarray, shape (k, k)
    cell_scores : np.ndarray, shape (n_cells, k), optional
    fate_angles : np.ndarray, shape (k,), optional
        Angle (degrees) of each fate axis in the radial embedding.
    nn_cell_entropy : np.ndarray, shape (n_cells,), optional
        NN-smoothed per-cell entropy.  Set when k_nn > 0 in score().
        Also written to adata_sub.obs['cs_nn_entropy'].
    nn_k : int, optional
        The k_nn value used to compute nn_cell_entropy.
    dominant_fate : str
        Fate with highest M_sector.
    """

[docs]
    fate_names: List[str]


[docs]
    M_bin: np.ndarray


[docs]
    bin_edges: np.ndarray


[docs]
    sectors: List[List[int]]


[docs]
    M_sector: np.ndarray


[docs]
    n_cells_per_fate: np.ndarray


[docs]
    commitment_vector: np.ndarray


[docs]
    population_entropy: float


[docs]
    mean_cell_entropy: float


[docs]
    per_fate_entropy: np.ndarray          # shape (k,) — binary entropy per fate


[docs]
    pairwise_unCS: np.ndarray


[docs]
    pairwise_nCS: np.ndarray


[docs]
    cell_scores: Optional[np.ndarray] = None


[docs]
    fate_angles: Optional[np.ndarray] = None


[docs]
    cell_obs_names: Optional[np.ndarray] = None


[docs]
    nn_cell_entropy: Optional[np.ndarray] = None   # shape (n_cells,), set when k_nn>0


[docs]
    nn_k: Optional[int] = None                     # k_nn used to compute nn_cell_entropy


[docs]
    bootstrap_ci: Optional[Dict[str, Any]] = None  # output of bootstrap_cs(), set when n_bootstrap>0


    # ------------------------------------------------------------------
    # Backward-compatibility alias
    # ------------------------------------------------------------------

    @property

[docs]
    def commitment_entropy(self) -> float:
        """Alias for ``population_entropy`` (deprecated, use ``mean_cell_entropy``)."""
        warnings.warn(
            "CommitmentScoreResult.commitment_entropy is deprecated. "
            "Use .mean_cell_entropy (recommended) or .population_entropy instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        return self.population_entropy


    @property

[docs]
    def k(self) -> int:
        return len(self.fate_names)


    @property

[docs]
    def dominant_fate(self) -> str:
        return self.fate_names[int(np.argmax(self.M_sector))]



[docs]
    def to_dataframe(self) -> pd.DataFrame:
        """Summary DataFrame with one row per fate."""
        rows = []
        for j, name in enumerate(self.fate_names):
            rows.append({
                "fate": name,
                "M_sector": self.M_sector[j],
                "n_cells": self.n_cells_per_fate[j],
                "commitment_fraction": self.commitment_vector[j],
            })
        return pd.DataFrame(rows)



[docs]
    def pairwise_to_dataframe(self, normalized: bool = True) -> pd.DataFrame:
        """Pairwise CS matrix as a labeled DataFrame."""
        mat = self.pairwise_nCS if normalized else self.pairwise_unCS
        return pd.DataFrame(mat, index=self.fate_names, columns=self.fate_names)



[docs]
    def summary(self) -> str:
        lines = [
            "=== CommitmentScoreResult ===",
            f"  Fates ({len(self.fate_names)}): {', '.join(self.fate_names)}",
            f"  Dominant fate: {self.dominant_fate}",
            "",
            "  Entropy metrics:",
            f"    Population entropy:           {self.population_entropy:.4f}"
            + "  [aggregate velocity-mass balance]",
        ]
        if not np.isnan(self.mean_cell_entropy):
            lines.append(
                f"    Mean cell entropy:            {self.mean_cell_entropy:.4f}"
                + "  [per-cell average, k-way]"
            )
            lines.append("    Per-fate cell entropy:")
            for name, h in zip(self.fate_names, self.per_fate_entropy):
                lines.append(f"      {name}: {h:.4f}")
        else:
            lines.append(
                "    Mean cell entropy:            [not computed — set cell_level=True]"
            )
        if self.nn_cell_entropy is not None:
            lines.append(
                f"    NN-smoothed entropy (k={self.nn_k}):  "
                f"mean={self.nn_cell_entropy.mean():.4f}  "
                f"[per-cell, stored in adata_sub.obs['cs_nn_entropy']]"
            )
        lines += [
            "",
            "  Commitment vector (normalized):",
        ]
        for name, p in zip(self.fate_names, self.commitment_vector):
            lines.append(f"    {name}: {p:.4f}")
        lines += ["", "  Pairwise nCS matrix:"]
        df = self.pairwise_to_dataframe(normalized=True)
        lines.append(df.to_string())
        # Footnote when any off-diagonal entry is inf
        k = len(self.fate_names)
        off_diag_vals = [self.pairwise_nCS[i, j] for i in range(k) for j in range(k) if i != j]
        if off_diag_vals and any(np.isinf(v) for v in off_diag_vals):
            lines.append(
                "  (inf = fate arm has 0 cells in this subset; "
                "expected for progenitor-only subsets)"
            )
        if self.bootstrap_ci is not None:
            ci_lvl = int(self.bootstrap_ci.get("ci_level", 0.95) * 100)
            n_boot = self.bootstrap_ci.get("n_bootstrap", "?")
            lines += [
                "",
                f"  Bootstrap {ci_lvl}% CI on nCS (n={n_boot}):",
                f"    CI low:\n{pd.DataFrame(self.bootstrap_ci['ci_low'], index=self.fate_names, columns=self.fate_names).round(3).to_string()}",
                f"    CI high:\n{pd.DataFrame(self.bootstrap_ci['ci_high'], index=self.fate_names, columns=self.fate_names).round(3).to_string()}",
            ]
        return "\n".join(lines)