Main API
Reference-based imputation
expTransImp(
df_ref: pd.DataFrame,
df_tgt: pd.DataFrame,
train_gene: list,
test_gene: list,
classes: list=None,
ct_list: list=None,
autocorr_method: str='moranI',
signature_mode: str='cluster',
mapping_mode: str='full',
mapping_lowdim: int=256,
spa_adj: sparse.coo_array=None,
lr: float=1e-2,
weight_decay: float=1e-2,
n_epochs: int=2000,
clip_max: int=10,
wt_spa: float=1.0,
locations: np.array=None,
n_simulation: int=None,
convert_uncertainty_score: bool=True,
device: torch.device=None,
seed: int=None)
"""Main function for transimp
Args:
df_ref (pd.DataFrame): Dataframe of reference single cell
df_tgt (pd.DataFrame): Dataframe of ST for training
train_gene (list): Training genes
test_gene (list): Genes for ST prediction, should be in df_ref
classes (list, optional): Single-cell type annotations. Defaults to None.
ct_list (list, optional): List of cell type labels. Defaults to None.
autocorr_method (str, optional): Autocorrelation method. Defaults to 'moranI'.
signature_mode (str, optional): Mode for creating compressed signature. Defaults to 'cluster'.
mapping_mode (str, optional): 'lowrank' or 'full' mapping matrix. Defaults to 'full'.
mapping_lowdim (int, optional): Defaults to 256.
spa_adj (sparse.coo_array, optional): Spatial adjacency matrix. Defaults to None.
lr (float, optional): Defaults to 1e-2.
weight_decay (float, optional): Defaults to 1e-2.
n_epochs (int, optional): Defaults to 1000.
clip_max (int, optional): Defaults to 10.
wt_spa (float, optional): Defaults to 1.0.
locations (np.array, optional): Spatial coordinates of the ST dataset. Defaults to None.
n_simulation (int, optional): Indicater & the number of local bootstraps for performance uncertainty estimation. Defaults to None.
convert_uncertainty_score (bool, optional): whether or not to convert uncertainty score to certainty score with $sigmoid(-pred.var.)$,
device (torch.device, optional): Defaults to None.
seed (int, optional): Defaults to None.
Returns:
list: results
"""
TransImpLR:
expTransImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
train_gene=train_gene,
test_gene=test_gene,
signature_mode='cell',
mapping_mode='lowrank',
n_epochs=2000,
seed=seed,
device=device
)
TransImpLR (with Uncertainty Score):
expTransImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
train_gene=train_gene,
test_gene=test_gene,
signature_mode='cell',
mapping_mode='lowrank',
n_epochs=2000,
n_simulation=200,
classes=classes,
seed=seed,
device=device
)
TransImpCls:
expTransImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
train_gene=train_gene,
test_gene=test_gene,
ct_list=ct_list,
classes=classes,
n_epochs=2000,
signature_mode='cluster',
mapping_mode='full',
seed=seed,
device=device
)
TransImpSpa:
expTransImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
train_gene=train_gene,
test_gene=test_gene,
signature_mode='cell',
mapping_mode='lowrank',
n_epochs=2000,
spa_adj=spa_adata.obsp['spatial_connectivities'].tocoo(),
seed=seed,
device=device
)
TransImpClsSpa:
expTransImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
train_gene=train_gene,
test_gene=test_gene,
ct_list=ct_list,
classes=classes,
spa_adj=spa_adata.obsp['spatial_connectivities'].tocoo(),
signature_mode='cluster',
mapping_mode='full',
wt_spa=0.1,
n_epochs=2000,
seed=seed,
device=device
)
Cell-type deconvolution
expDeconv(adata_ref: sc.AnnData=None,
adata_tgt: sc.AnnData=None,
label_key: str='Class',
df_ref: pd.DataFrame=None,
df_tgt: pd.DataFrame=None,
classes: np.array=None,
ct_list: np.array=None,
lr: float=1e-2,
weight_decay: float=1e-3,
tau: float=None,
n_epochs: int=8000,
n_top_genes: int=2000,
topk: int=50,
wt_spa: float=1.0,
wt_l1: float=5.0,
wt_abd: float=0.5,
wt_l2_G: float=2.0,
wt_l2_S: float=2.0,
wt_js: float=2.0,
autocorr_method: str='moranI',
spa_adj: sparse.coo_array=None,
spa_adata: sc.AnnData=None,
calibrate: float=0.0,
gene_mask: pd.DataFrame=None,
normalize_sig: bool=False,
raw_counts: bool=None,
smart_markers: bool=False,
spatial_markers: bool=False,
score_init: bool=True,
cluster_mapping: bool=True,
cosine_lr: bool=True,
device: torch.device=None,
seed: int=None)
"""Cell type deconvolution.
Fits a linear translation model from reference cell-type gene signatures
to spatial gene profiles, and returns predicted per-spot cell-type weights.
Data can be provided in two ways:
- AnnData mode: pass adata_ref and adata_tgt. The function extracts
df_ref, df_tgt, classes, and ct_list automatically using label_key.
- DataFrame mode (legacy): pass df_ref, df_tgt, classes, and ct_list
explicitly.
Args:
adata_ref (sc.AnnData, optional): Reference scRNA-seq AnnData.
adata_tgt (sc.AnnData, optional): Spatial transcriptomics AnnData.
label_key (str): Column in adata_ref.obs for cell-type labels.
df_ref, df_tgt, classes, ct_list: Legacy DataFrame-mode inputs.
lr, weight_decay: Optimiser hyperparameters.
tau (float, optional): Softmax temperature.
n_epochs (int): Training epochs.
n_top_genes (int): Variable genes (ignored when topk is set).
topk (int): Marker genes per cell type via DE test.
wt_spa (float): Spatial regularization weight.
wt_l1 (float): L1 regularization weight on translation matrix.
wt_abd (float): Abundance signature loss weight.
wt_l2_G (float): L2 regularization weight on gene scalers.
wt_l2_S (float): L2 regularization weight on spot scalers.
wt_js (float): Jensen-Shannon divergence loss weight.
autocorr_method (str): 'moranI' or 'gearyC'.
spa_adj: Spatial adjacency matrix.
spa_adata: Spatial AnnData for Leiden clustering.
calibrate (float): Post-hoc calibration strength.
gene_mask: Boolean mask [n_types x n_genes].
normalize_sig (bool): L2-normalize cell-type signatures.
raw_counts (bool, optional): Auto-detected if None.
smart_markers (bool): Abundance-aware marker selection.
spatial_markers (bool): Augment with spatial-derived markers.
score_init (bool): sc.tl.score_genes warm-start.
cluster_mapping (bool): Cluster-to-celltype mapping regularization.
cosine_lr (bool): Use cosine annealing learning rate schedule.
device: Torch device.
seed: Random seed.
Returns:
np.array, np.ndarray: predicted ST expression, weight matrix
"""
Recommended configuration (TransDeconvV2):
expDeconv(
adata_ref=adata_ref,
adata_tgt=adata_tgt,
label_key='Class', # change to the cell type key in reference adata.obs
score_init=True,
cluster_mapping=True,
topk=50,
cosine_lr=True,
weight_decay=1e-3,
wt_abd=0.5,
wt_js=2.0,
n_epochs=8000,
seed=seed,
device=device
)
Legacy DataFrame mode:
expDeconv(
df_ref=df_ref,
df_tgt=df_tgt,
classes=classes,
ct_list=ct_list,
n_epochs=8000,
seed=seed,
device=device
)
ST Velocity estimation
expVeloImp(df_ref: pd.DataFrame,
df_tgt: pd.DataFrame,
S: np.array,
U: np.array,
V: np.array,
train_gene: list,
test_gene: list,
classes: list=None,
ct_list: list=None,
autocorr_method: str='moranI',
signature_mode: str='cell',
mapping_mode: str='lowrank',
mapping_lowdim: int=256,
spa_adj: sparse.coo_array=None,
lr: float=1e-2,
weight_decay: float=1e-2,
n_epochs: int=1000,
clip_max: int=10,
wt_spa: float=1.0,
locations: np.array=None,
n_simulation: int=None,
device: torch.device=None,
seed: int=None)
"""ST Velocity estimation
Args:
df_ref (pd.DataFrame): Dataframe of reference single cell
df_tgt (pd.DataFrame): Dataframe of ST for training
S (np.array): Spliced expression matrix
U (np.array): Unspliced expression matrix
V (np.array): SC velocity matrix
train_gene (list): Training genes
test_gene (list): Genes for ST prediction, should be in df_ref
classes (list, optional): Single-cell type annotations. Defaults to None.
ct_list (list, optional): List of cell type labels. Defaults to None.
autocorr_method (str, optional): Autocorrelation method. Defaults to 'moranI'.
signature_mode (str, optional): Mode for creating compressed signature. Defaults to 'cell'.
mapping_mode (str, optional): 'lowrank' or 'full' mapping matrix. Defaults to 'lowrank'.
mapping_lowdim (int, optional): Defaults to 256.
spa_adj (sparse.coo_array, optional): Spatial adjacency matrix. Defaults to None.
lr (float, optional): Defaults to 1e-2.
weight_decay (float, optional): Defaults to 1e-2.
n_epochs (int, optional): Defaults to 1000.
clip_max (int, optional): Defaults to 10.
wt_spa (float, optional): Defaults to 1.0.
locations (np.array, optional): Spatial coordinates of the ST dataset. Defaults to None.
n_simulation (int, optional): Indicater & the number of local bootstraps for performance uncertainty estimation. Defaults to None.
device (torch.device, optional): Defaults to None.
seed (int, optional): Defaults to None.
Returns:
tuple(np.array): ST results
"""
example:
expVeloImp(
df_ref=raw_scrna_df,
df_tgt=raw_spatial_df,
S=RNA.layers['spliced'],
U=RNA.layers['unspliced'],
V=RNA.layers['spliced'],
train_gene=raw_shared_gene,
test_gene=RNA.var_names,
signature_mode='cell',
mapping_mode='lowrank',
classes='celltype_prediction',
n_epochs=1000,
seed=seed,
device=device
)