RNA速率scVelo
import scvelo as scv
adata = scv.read("E:/data_exercise/day1_27/scV/data/Pancreas/adata_dynamics.h5ad", cache=True)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics'
obsm: 'X_pca', 'X_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced'
obsp: 'connectivities', 'distances'
# 计算速度矢量
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:06) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
finished (0:00:11) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
#汇出嵌入模型
scv.pl.velocity_embedding_grid(adata, basis='umap')
computing velocity embedding
finished (0:00:01) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)

#汇出嵌入模型
scv.pl.velocity_embedding_stream(adata, basis='umap')

#计算cell cycle score并可视化
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis="umap", smooth=True, perc=[5, 95])
calculating cell cycle phase
--> 'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)

#3.5 计算细胞状态并可视化
scv.tl.terminal_states(adata)
scv.pl.scatter(adata, color=[ 'root_cells', 'end_points'], basis='umap')
#'root_cells', root cells of Markov diffusion process (adata.obs)
#'end_points', end points of Markov diffusion process (adata.obs)
computing terminal states
WARNING: Uncertain or fuzzy root cell identification. Please verify.
identified 2 regions of root cells and 1 region of end points .
finished (0:00:00) --> added
'root_cells', root cells of Markov diffusion process (adata.obs)
'end_points', end points of Markov diffusion process (adata.obs)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
#计算pseudotime,可视化,与导出
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata=adata, color='velocity_pseudotime', cmap='gnuplot', basis="umap")
adata.obs["velocity_pseudotime"].to_csv('velocyto_pseudotime.csv')

#查看特定基因的velocity
var_names = ['Snhg6', 'Sbspon', 'Eif2s3y', 'Sntg1'] #adata.var 下的index就是基因名
scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
df = adata.var
df
| highly_variable_genes | gene_count_corr | means | dispersions | dispersions_norm | highly_variable | fit_r2 | fit_alpha | fit_beta | fit_gamma | ... | fit_std_s | fit_likelihood | fit_u0 | fit_s0 | fit_pval_steady | fit_steady_u | fit_steady_s | fit_variance | fit_alignment_scaling | velocity_genes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| index | |||||||||||||||||||||
| Sntg1 | True | NaN | 0.005065 | 0.131135 | 0.214469 | True | 0.401981 | 0.015726 | 0.005592 | 0.088792 | ... | 0.030838 | 0.406523 | 0.0 | 0.0 | 0.159472 | 2.470675 | 0.094304 | 0.149138 | 5.355590 | False |
| Snhg6 | False | NaN | 0.375835 | 0.158585 | 0.040765 | True | 0.125072 | 0.389126 | 2.981982 | 0.260322 | ... | 0.245248 | 0.243441 | 0.0 | 0.0 | 0.403409 | 0.106128 | 0.596630 | 0.762252 | 2.037296 | True |
| Ncoa2 | False | NaN | 0.106945 | 0.145879 | 0.298358 | True | -2.313923 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False |
| Sbspon | True | NaN | 0.143032 | 0.277016 | 1.044508 | True | 0.624803 | 0.464865 | 2.437113 | 0.379645 | ... | 0.178859 | 0.252135 | 0.0 | 0.0 | 0.182088 | 0.164805 | 0.430623 | 0.674312 | 1.193015 | True |
| Ube2w | False | NaN | 0.018522 | 0.109459 | 0.091136 | True | -5.324582 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Tmem27 | True | NaN | 1.297619 | 2.475960 | 3.254982 | True | 0.527855 | 3.954723 | 21.314334 | 0.253763 | ... | 5.014219 | 0.292956 | 0.0 | 0.0 | 0.496411 | 0.168456 | 12.425889 | 0.389388 | 3.163113 | False |
| Uty | False | NaN | 0.018673 | 0.182256 | 0.505338 | True | -0.555835 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False |
| Ddx3y | True | NaN | 0.165256 | 0.350978 | 1.465338 | True | -2.538731 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False |
| Eif2s3y | True | NaN | 0.182643 | 0.318686 | 1.281603 | True | 0.352190 | 0.193458 | 0.746410 | 0.295085 | ... | 0.158051 | 0.262140 | 0.0 | 0.0 | 0.390710 | 0.219716 | 0.424033 | 0.748614 | 2.312621 | True |
| Erdr1 | True | NaN | 0.344720 | 0.215833 | 0.288351 | True | -0.193263 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | False |
2000 rows × 23 columns
df.to_csv("adata.var.csv",index=True, header=True )
# 数据框筛选那些基因
df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]
kwargs = dict(xscale='log', fontsize=16)
df
| highly_variable_genes | gene_count_corr | means | dispersions | dispersions_norm | highly_variable | fit_r2 | fit_alpha | fit_beta | fit_gamma | ... | fit_std_s | fit_likelihood | fit_u0 | fit_s0 | fit_pval_steady | fit_steady_u | fit_steady_s | fit_variance | fit_alignment_scaling | velocity_genes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| index | |||||||||||||||||||||
| Snhg6 | False | NaN | 0.375835 | 0.158585 | 0.040765 | True | 0.125072 | 0.389126 | 2.981982 | 0.260322 | ... | 0.245248 | 0.243441 | 0.0 | 0.0 | 0.403409 | 0.106128 | 0.596630 | 0.762252 | 2.037296 | True |
| Sbspon | True | NaN | 0.143032 | 0.277016 | 1.044508 | True | 0.624803 | 0.464865 | 2.437113 | 0.379645 | ... | 0.178859 | 0.252135 | 0.0 | 0.0 | 0.182088 | 0.164805 | 0.430623 | 0.674312 | 1.193015 | True |
| Fam135a | True | NaN | 0.257955 | 0.171546 | 0.096820 | True | 0.384662 | 0.172335 | 0.118088 | 0.204538 | ... | 0.149868 | 0.283343 | 0.0 | 0.0 | 0.387921 | 1.345830 | 0.393197 | 0.671096 | 3.390194 | True |
| Tbc1d8 | True | NaN | 0.070612 | 0.123167 | 0.169130 | True | 0.613194 | 0.083656 | 0.118139 | 0.227401 | ... | 0.081595 | 0.246594 | 0.0 | 0.0 | 0.392402 | 0.659434 | 0.240100 | 0.829478 | 2.914215 | True |
| Fhl2 | True | NaN | 0.232482 | 0.818012 | 2.892669 | True | 0.325223 | 0.224027 | 0.330934 | 0.121456 | ... | 0.317963 | 0.262849 | 0.0 | 0.0 | 0.476793 | 0.539878 | 1.073687 | 0.695997 | 4.785407 | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Map3k15 | True | NaN | 0.180941 | 0.427601 | 1.901315 | True | 0.437069 | 0.252467 | 0.348783 | 0.275921 | ... | 0.225956 | 0.289070 | 0.0 | 0.0 | 0.434454 | 0.611380 | 0.560048 | 0.510572 | 2.208756 | True |
| Rai2 | True | NaN | 0.231965 | 0.651755 | 2.173640 | True | 0.769033 | 0.792172 | 2.885604 | 0.647345 | ... | 0.276047 | 0.271263 | 0.0 | 0.0 | 0.311969 | 0.243741 | 0.716996 | 0.707595 | 1.063908 | True |
| Rbbp7 | False | NaN | 1.017536 | 0.422314 | 0.153713 | True | 0.344213 | 1.446806 | 5.332362 | 0.326539 | ... | 0.796754 | 0.281498 | 0.0 | 0.0 | 0.477365 | 0.208398 | 2.985787 | 0.704522 | 2.100311 | True |
| Ap1s2 | True | NaN | 0.127856 | 0.319473 | 1.286081 | True | 0.716659 | 0.202643 | 0.596662 | 0.392769 | ... | 0.140455 | 0.286893 | 0.0 | 0.0 | 0.140955 | 0.315635 | 0.366964 | 0.671211 | 1.775751 | True |
| Eif2s3y | True | NaN | 0.182643 | 0.318686 | 1.281603 | True | 0.352190 | 0.193458 | 0.746410 | 0.295085 | ... | 0.158051 | 0.262140 | 0.0 | 0.0 | 0.390710 | 0.219716 | 0.424033 | 0.748614 | 2.312621 | True |
1006 rows × 23 columns
#RNA转录,剪接和降解的速率。
with scv.GridSpec(ncols=3) as pl:
pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
#latent_time
scv.tl.latent_time(adata)
computing latent time using root_cells as prior
finished (0:00:02) --> added
'latent_time', shared time (adata.obs)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)

top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
top_genes
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
'Pim2', 'Smoc1',
...
'Cyr61', 'Ptprn2', 'Rpa2', 'Nit2', 'Cdc20', 'Hmmr', 'Ankrd12', 'Itpr1',
'Auts2', 'Mdfi'],
dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)

top = df['fit_likelihood'].sort_values(ascending=False).index[:300]
top
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
'Pim2', 'Smoc1',
...
'Trp53inp2', 'Ccnb1', 'Atp2a2', 'Dtl', 'Kmt2a', 'Emb', 'Rcan2',
'Gm28172', 'Rab3b', 'Gk'],
dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top, sortby='latent_time', col_color='clusters', n_convolve=100)

# 驱动基因
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:15], ncols=5, frameon=False)

var_names = ['Actn4', 'Ppp3ca', 'Cpe', 'Nnat']
scv.pl.scatter(adata, var_names, frameon=False)
scv.pl.scatter(adata, x='latent_time', y=var_names, frameon=False)


#特定于簇的最高似然基因
scv.tl.rank_dynamical_genes(adata, groupby='clusters')
df = scv.get_df(adata, 'rank_dynamical_genes/names')
df.head(5)
ranking genes by cluster-specific likelihoods
finished (0:00:05) --> added
'rank_dynamical_genes', sorted scores by group ids (adata.uns)
| Ductal | Ngn3 low EP | Ngn3 high EP | Pre-endocrine | Beta | Alpha | Delta | Epsilon | |
|---|---|---|---|---|---|---|---|---|
| 0 | Dcdc2a | Dcdc2a | Rbfox3 | Abcc8 | Pcsk2 | Cpe | Pcsk2 | Tox3 |
| 1 | Top2a | Adk | Mapre3 | Tmem163 | Ank | Gnao1 | Rap1b | Rnf130 |
| 2 | Nfib | Mki67 | Btbd17 | Gnao1 | Tmem163 | Pak3 | Pak3 | Meis2 |
| 3 | Wfdc15b | Rap1gap2 | Sulf2 | Ank | Tspan7 | Pim2 | Abcc8 | Adk |
| 4 | Cdk1 | Top2a | Tcp11 | Tspan7 | Map1b | Map1b | Slc7a14 | Rap1gap2 |
for cluster in ['Ductal', 'Ngn3 high EP', 'Pre-endocrine', 'Beta']:
scv.pl.scatter(adata, df[cluster][:5], ylabel=cluster, frameon=False)




df = scv.get_df(adata, 'rank_dynamical_genes/scores')
df.head(5)
| Ductal | Ngn3 low EP | Ngn3 high EP | Pre-endocrine | Beta | Alpha | Delta | Epsilon | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.59 | 0.59 | 0.53 | 0.73 | 0.90 | 0.61 | 0.96 | 0.61 |
| 1 | 0.56 | 0.53 | 0.50 | 0.54 | 0.70 | 0.57 | 0.70 | 0.61 |
| 2 | 0.56 | 0.51 | 0.47 | 0.53 | 0.59 | 0.54 | 0.67 | 0.59 |
| 3 | 0.50 | 0.50 | 0.46 | 0.53 | 0.59 | 0.54 | 0.62 | 0.57 |
| 4 | 0.49 | 0.49 | 0.45 | 0.51 | 0.57 | 0.51 | 0.60 | 0.55 |
[参考链接1](https://scvelo.readthedocs.io/DynamicalModeling.html)
[参考链接2](https://www.bilibili.com/read/cv7764833/)
浙公网安备 33010602011771号