Loading...
Loading...
Guide Claude through omicverse's single-cell clustering workflow, covering preprocessing, QC, multimethod clustering, topic modeling, cNMF, and cross-batch integration as demonstrated in t_cluster.ipynb and t_single_batch.ipynb.
npx skill4agent add starlitnightly/omicverse single-cell-clustering-and-batch-correction-with-omicverset_cluster.ipynbt_single_batch.ipynbAnnDataomicverse as ovscanpy as scscvelo as scvov.plot_set()ov.utils.ov_plot_set()scv.datasets.dentategyrus().h5adov.read()adata.obs['batch']adata.X = adata.X.astype(np.int64)ov.pp.qc(adata, tresh={'mito_perc': 0.2, 'nUMIs': 500, 'detected_genes': 250}, batch_key='batch')adata.write_h5ad(...)ov.pp.preprocess(adata, mode='shiftlog|pearson', n_HVGs=3000, batch_key=None)adata.raw = adataadata.var.highly_variable_featuresov.pp.scale(adata)ov.pp.pca(adata, layer='scaled', n_pcs=50)ov.utils.plot_pca_variance_ratio(adata)sc.pp.neighbors(adata, n_neighbors=15, n_pcs=50, use_rep='scaled|original|X_pca')ov.pp.neighbors(...)ov.utils.cluster(adata, method='leiden'|'louvain', resolution=1)ov.single.leiden(adata, resolution=1.0)ov.pp.leiden(adata, resolution=1)# Before clustering: check neighbors graph exists
if 'neighbors' not in adata.uns:
if 'X_pca' in adata.obsm:
ov.pp.neighbors(adata, n_neighbors=15, use_rep='X_pca')
else:
raise ValueError("PCA must be computed before neighbors graph")
# Before plotting by cluster: check clustering was performed
if 'leiden' not in adata.obs:
ov.single.leiden(adata, resolution=1.0)ov.pl.embedding(adata, basis='X_umap', color=['clusters','leiden'], frameon='small', wspace=0.5)color=adata.obsmodel = ov.utils.cluster(adata, method='scICE', use_rep='scaled|original|X_pca', resolution_range=(4,20), n_boot=50, n_steps=11)model.plot_ic(figsize=(6,4))model.best_kov.utils.cluster(..., method='GMM', n_components=21, covariance_type='full', tol=1e-9, max_iter=1000)LDA_obj = ov.utils.LDA_topic(...)LDA_obj.plot_topic_contributions(6)LDA_obj.predicted(k)LDA_obj.get_results_rfc(...)cnmf_obj = ov.single.cNMF(... components=np.arange(5,11), n_iter=20, num_highvar_genes=2000, output_dir=...)factorizecombinek_selection_plotcnmf_obj.get_results(...)cnmf_obj.get_results_rfc(...)adjusted_rand_score(adata.obs['clusters'], adata.obs['leiden'])ov.utils.mde(...)adata.obsm["scaled|original|X_pca"]ov.utils.embedding(..., color=['batch','cell_type'])ov.pl.embeddingov.single.batch_correction(adata, batch_key='batch', methods='harmony'|'combat'|'scanorama'|'scVI'|'CellANOVA', n_pcs=50, ...)adata.obsmX_harmonyX_combatX_scanoramaX_scVIX_cellanovascVIn_latent=30gene_likelihood="nb"control_dictov.utils.mdebatchcell_typeadata.write_h5ad('neurips2021_batch_all.h5ad', compression='gzip')scib_metrics.benchmark.Benchmarker["X_pca", "X_combat", "X_harmony", "X_cellanova", "X_scanorama", "X_mira_topic", "X_mira_feature", "X_scVI"]bm.benchmark()bm.plot_results_table(min_max_scale=False)adata.rawuse_rep='scaled|original|X_pca'adata.obsmn_itercomponents'neighbors' in adata.unsadata.obsclustersbatchcell_typet_cluster.ipynbt_single_batch.ipynbreference.md