example notebook

In [ ]:

Copied!

from grnndata import GRNAnnData
from grnndata import utils
from grnndata import GRNAnnData
from grnndata import utils

In [ ]:

Copied!

subdata
subdata

View of AnnData object with n_obs × n_vars = 142 × 2000
    obs: 'n_genes', 'n_counts', 'percent_mito', 'donor_id', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'author_cell_type', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'batch_id'
    var: 'chromosome', 'featureend', 'featurestart', 'n_cells', 'percent_cells', 'robust', 'highly_variable_features', 'mean', 'var', 'hvf_loess', 'hvf_rank', 'gene_symbols', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'id_in_vocab', 'gene_ids', 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm'
    uns: 'cell_type_ontology_term_id_colors', 'default_embedding', 'schema_version', 'title', 'log1p', 'hvg'
    obsm: 'X_diffmap', 'X_diffmap_pca', 'X_fitsne', 'X_fle', 'X_pca', 'X_phi', 'X_umap', 'bin_edges'
    layers: 'X_normed', 'X_log1p', 'X_binned'

In [ ]:

Copied!

grn
grn

array([[0.        , 0.        , 0.00184461, ..., 0.00069608, 0.        ,
        0.00147827],
       [0.        , 0.        , 0.00195379, ..., 0.00608598, 0.        ,
        0.00054822],
       [0.        , 0.        , 0.00406754, ..., 0.01028988, 0.        ,
        0.00044465],
       ...,
       [0.        , 0.        , 0.00234615, ..., 0.00053798, 0.        ,
        0.        ],
       [0.        , 0.        , 0.00029704, ..., 0.00185636, 0.00043659,
        0.00055524],
       [0.        , 0.        , 0.00134493, ..., 0.01271029, 0.        ,
        0.        ]], dtype=float32)

In [ ]:

Copied!

# if create a view of an AnnData, need to call copy()
# the first value in this array is the cell embedding, not a gene.
grn = GRNAnnData(subdata.copy(), grn=grn[1:,1:])
# if create a view of an AnnData, need to call copy()
# the first value in this array is the cell embedding, not a gene.
grn = GRNAnnData(subdata.copy(), grn=grn[1:,1:])

ask basic questions to the GRN¶

In [ ]:

Copied!

# microglia
top_central_genes = utils.get_centrality(grn)
grn.var.loc[[i[0] for i in top_central_genes],'feature_name']
# microglia
top_central_genes = utils.get_centrality(grn)
grn.var.loc[[i[0] for i in top_central_genes],'feature_name']

Top central genes: [('ENSG00000002933', 0.02610535850416804), ('ENSG00000007168', 0.02610535850416804), ('ENSG00000010278', 0.02610535850416804), ('ENSG00000014641', 0.02610535850416804), ('ENSG00000018280', 0.02610535850416804), ('ENSG00000026025', 0.02610535850416804), ('ENSG00000026297', 0.02610535850416804), ('ENSG00000046653', 0.02610535850416804), ('ENSG00000051523', 0.02610535850416804), ('ENSG00000059804', 0.02610535850416804), ('ENSG00000065135', 0.02610535850416804), ('ENSG00000067064', 0.02610535850416804), ('ENSG00000067225', 0.02610535850416804), ('ENSG00000067560', 0.02610535850416804), ('ENSG00000068697', 0.02610535850416804), ('ENSG00000074800', 0.02610535850416804), ('ENSG00000075142', 0.02610535850416804), ('ENSG00000075415', 0.02610535850416804), ('ENSG00000075624', 0.02610535850416804), ('ENSG00000078668', 0.02610535850416804), ('ENSG00000081237', 0.02610535850416804), ('ENSG00000082074', 0.02610535850416804), ('ENSG00000085063', 0.02610535850416804), ('ENSG00000086300', 0.02610535850416804), ('ENSG00000086730', 0.02610535850416804), ('ENSG00000087460', 0.02610535850416804), ('ENSG00000089220', 0.02610535850416804), ('ENSG00000089327', 0.02610535850416804), ('ENSG00000090104', 0.02610535850416804), ('ENSG00000090238', 0.02610535850416804)]

gene_ids
ENSG00000002933    TMEM176A
ENSG00000007168    PAFAH1B1
ENSG00000010278         CD9
ENSG00000014641        MDH1
ENSG00000018280     SLC11A1
ENSG00000026025         VIM
ENSG00000026297     RNASET2
ENSG00000046653       GPM6B
ENSG00000051523        CYBA
ENSG00000059804      SLC2A3
ENSG00000065135       GNAI3
ENSG00000067064        IDI1
ENSG00000067225         PKM
ENSG00000067560        RHOA
ENSG00000068697     LAPTM4A
ENSG00000074800        ENO1
ENSG00000075142         SRI
ENSG00000075415     SLC25A3
ENSG00000075624        ACTB
ENSG00000078668       VDAC3
ENSG00000081237       PTPRC
ENSG00000082074        FYB1
ENSG00000085063        CD59
ENSG00000086300       SNX10
ENSG00000086730        LAT2
ENSG00000087460        GNAS
ENSG00000089220       PEBP1
ENSG00000089327       FXYD5
ENSG00000090104        RGS1
ENSG00000090238       YPEL3
Name: feature_name, dtype: category
Categories (2000, object): ['A2M', 'AACS', 'AANAT', 'ABCB1', ..., 'ZMAT4', 'ZNF804A', 'ZNF812P', 'ZNRF1']

In [ ]:

Copied!

top_central_genes = utils.get_centrality(grn)
grn.var.loc[[i[0] for i in top_central_genes],'feature_name']
top_central_genes = utils.get_centrality(grn)
grn.var.loc[[i[0] for i in top_central_genes],'feature_name']

Top central genes: [('ENSG00000001461', 0.030758883171933152), ('ENSG00000010404', 0.030758883171933152), ('ENSG00000014641', 0.030758883171933152), ('ENSG00000022267', 0.030758883171933152), ('ENSG00000042753', 0.030758883171933152), ('ENSG00000046653', 0.030758883171933152), ('ENSG00000047849', 0.030758883171933152), ('ENSG00000051620', 0.030758883171933152), ('ENSG00000052802', 0.030758883171933152), ('ENSG00000057757', 0.030758883171933152), ('ENSG00000059804', 0.030758883171933152), ('ENSG00000060138', 0.030758883171933152), ('ENSG00000067064', 0.030758883171933152), ('ENSG00000067225', 0.030758883171933152), ('ENSG00000067560', 0.030758883171933152), ('ENSG00000067606', 0.030758883171933152), ('ENSG00000068697', 0.030758883171933152), ('ENSG00000068971', 0.030758883171933152), ('ENSG00000069849', 0.030758883171933152), ('ENSG00000074317', 0.030758883171933152), ('ENSG00000074800', 0.030758883171933152), ('ENSG00000075142', 0.030758883171933152), ('ENSG00000075415', 0.030758883171933152), ('ENSG00000075624', 0.030758883171933152), ('ENSG00000075785', 0.030758883171933152), ('ENSG00000075945', 0.030758883171933152), ('ENSG00000076043', 0.030758883171933152), ('ENSG00000078668', 0.030758883171933152), ('ENSG00000078902', 0.030758883171933152), ('ENSG00000079459', 0.030758883171933152)]

gene_ids
ENSG00000001461     NIPAL3
ENSG00000010404        IDS
ENSG00000014641       MDH1
ENSG00000022267       FHL1
ENSG00000042753      AP2S1
ENSG00000046653      GPM6B
ENSG00000047849       MAP4
ENSG00000051620      HEBP2
ENSG00000052802      MSMO1
ENSG00000057757     PITHD1
ENSG00000059804     SLC2A3
ENSG00000060138       YBX3
ENSG00000067064       IDI1
ENSG00000067225        PKM
ENSG00000067560       RHOA
ENSG00000067606      PRKCZ
ENSG00000068697    LAPTM4A
ENSG00000068971    PPP2R5B
ENSG00000069849     ATP1B3
ENSG00000074317       SNCB
ENSG00000074800       ENO1
ENSG00000075142        SRI
ENSG00000075415    SLC25A3
ENSG00000075624       ACTB
ENSG00000075785      RAB7A
ENSG00000075945     KIFAP3
ENSG00000076043      REXO2
ENSG00000078668      VDAC3
ENSG00000078902     TOLLIP
ENSG00000079459      FDFT1
Name: feature_name, dtype: category
Categories (2000, object): ['A2M', 'AACS', 'AANAT', 'ABCB1', ..., 'ZMAT4', 'ZNF804A', 'ZNF812P', 'ZNRF1']

In [ ]:

Copied!

grn.var_names = grn.var['feature_name']
grn.var['TFs'] = [True if i in utils.TF else False for i in grn.var_names]
grn.var_names = grn.var['feature_name']
grn.var['TFs'] = [True if i in utils.TF else False for i in grn.var_names]

/home/ml4ig1/miniconda3/envs/training-gpt/lib/python3.10/site-packages/anndata/_core/anndata.py:949: UserWarning: 
AnnData expects .var.index to contain strings, but got values like:
    ['CFH', 'NIPAL3', 'WNT16', 'MAD1L1', 'TMEM176A']

    Inferred to be: categorical

  names = self._prep_dim_index(names, "var")

In [ ]:

Copied!

#microglia
grn.grn.sum(1).sort_values(ascending=False).head(20)
#microglia
grn.grn.sum(1).sort_values(ascending=False).head(20)

feature_name
MT-CO1      3.971675
IFI27       3.719180
MT-CO3      3.642034
CYP17A1     3.640886
MT-RNR2     3.578026
SERPING1    3.559206
CD74        3.540354
IFIT2       3.535392
TF          3.524321
NR4A3       3.513894
B2M         3.493739
HLA-DPB1    3.457864
TYROBP      3.434500
TMSB4X      3.430675
HLA-DRA     3.387647
HLA-DRB1    3.360535
CLU         3.353451
S100A9      3.321588
C1QTNF3     3.308875
QPCT        3.290879
dtype: float32

In [ ]:

Copied!

#cones
grn.grn.sum(1).sort_values(ascending=False).head(20)
#cones
grn.grn.sum(1).sort_values(ascending=False).head(20)

feature_name
CHST8       4.046652
RSPO3       3.987358
CA14        3.946459
CCNA1       3.943601
COL21A1     3.925835
LYVE1       3.911230
NOS1        3.910461
SCD         3.900852
QPCT        3.894938
CSRP1       3.882580
OR7E115P    3.877537
KCNK1       3.863492
NGFR        3.852417
NEUROD2     3.843404
ZCCHC12     3.827054
PARM1       3.822857
CDH7        3.822110
SLC15A3     3.821029
ANXA8L1     3.811610
KCNH5       3.808449
dtype: float32

In [ ]:

Copied!

grn.write('grn.h5ad')
grn.write('grn.h5ad')

... storing 'batch_id' as categorical

In [ ]:

Copied!

grn
grn

AnnData object with n_obs × n_vars = 12239 × 2000
    obs: 'n_genes', 'n_counts', 'percent_mito', 'donor_id', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'author_cell_type', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'batch_id'
    var: 'chromosome', 'featureend', 'featurestart', 'n_cells', 'percent_cells', 'robust', 'highly_variable_features', 'mean', 'var', 'hvf_loess', 'hvf_rank', 'gene_symbols', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'id_in_vocab', 'gene_ids', 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'centrality', 'TFs'
    uns: 'cell_type_ontology_term_id_colors', 'default_embedding', 'schema_version', 'title', 'log1p', 'hvg'
    obsm: 'X_diffmap', 'X_diffmap_pca', 'X_fitsne', 'X_fle', 'X_pca', 'X_phi', 'X_umap', 'bin_edges'
    layers: 'X_normed', 'X_log1p', 'X_binned'
    varp: 'GRN'

In [ ]:

Copied!

utils.enrichment(grn, of='Regulators')
utils.enrichment(grn, of='Regulators')

2024-01-11 14:36:36,661 [WARNING] Duplicated values found in preranked stats: 5.50% of genes
The order of those genes will be arbitrary, which may produce unexpected results.
2024-01-11 14:36:36,663 [INFO] Parsing data files for GSEA.............................
2024-01-11 14:36:36,665 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:36,676 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,185 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,238 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,241 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,251 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,876 [ERROR] No supported gene_sets: GTEx_Tissue_Sample_Gene_Expression_Profiles_up
2024-01-11 14:36:37,879 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,905 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:37,929 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:36:38,560 [INFO] 1941 gene_sets have been filtered out when max_size=1000 and min_size=5
2024-01-11 14:36:38,563 [INFO] 2148 gene_sets used for further statistical testing.....
2024-01-11 14:36:38,565 [INFO] Start to run GSEA...Might take a while..................
2024-01-11 14:36:49,816 [INFO] Congratulations. GSEApy runs successfully................

[]

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

/home/ml4ig1/Documents code/scGPT/mytests/cleanup_run.ipynb Cell 37 line 1

----> <a href='vscode-notebook-cell://ssh-remote%2Bperso/home/ml4ig1/Documents%20code/scGPT/mytests/cleanup_run.ipynb#Y151sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a> utils.enrichment(grn, of='Regulators')



File ~/Documents code/GRnnData/grnndata/utils.py:92, in enrichment(grn, of, doplot, top_k, **kwargs)

     90 # plot results

     91 if doplot:

---> 92     ax = dotplot(

     93         pre_res.res2d[

     94             (pre_res.res2d["FDR q-val"] < 0.1) & (pre_res.res2d["NES"] > 1)

     95         ].sort_values(by=["NES"], ascending=False),

     96         column="FDR q-val",

     97         title="enrichment of " + of + " in the grn",

     98         size=6,  # adjust dot size

     99         figsize=(4, 5),

    100         cutoff=0.25,

    101         show_ring=False,

    102     )

    104 return val



File ~/miniconda3/envs/training-gpt/lib/python3.10/site-packages/gseapy/plot.py:1150, in dotplot(df, column, x, y, x_order, y_order, title, cutoff, top_term, size, figsize, cmap, ofname, xticklabels_rot, yticklabels_rot, marker, show_ring, **kwargs)

   1147     warnings.warn("group is deprecated; use x instead", DeprecationWarning, 2)

   1148     return

-> 1150 dot = DotPlot(

   1151     df=df,

   1152     x=x,

   1153     y=y,

   1154     x_order=x_order,

   1155     y_order=y_order,

   1156     hue=column,

   1157     title=title,

   1158     thresh=cutoff,

   1159     n_terms=int(top_term),

   1160     dot_scale=size,

   1161     figsize=figsize,

   1162     cmap=cmap,

   1163     ofname=ofname,

   1164     marker=marker,

   1165 )

   1166 ax = dot.scatter(outer_ring=show_ring)

   1168 if xticklabels_rot:



File ~/miniconda3/envs/training-gpt/lib/python3.10/site-packages/gseapy/plot.py:649, in DotPlot.__init__(self, df, x, y, hue, dot_scale, x_order, y_order, thresh, n_terms, title, figsize, cmap, ofname, **kwargs)

    647 self.n_terms = n_terms

    648 self.thresh = thresh

--> 649 self.data = self.process(df)

    650 plt.rcParams.update({"pdf.fonttype": 42, "ps.fonttype": 42})



File ~/miniconda3/envs/training-gpt/lib/python3.10/site-packages/gseapy/plot.py:674, in DotPlot.process(self, df)

    672 if len(df) < 1:

    673     msg = "Warning: No enrich terms when cutoff = %s" % self.thresh

--> 674     raise ValueError(msg)

    675 self.cbar_title = self.colname

    676 # clip GSEA lower bounds

    677 # if self.colname in ["NOM p-val", "FDR q-val"]:

    678 #     df[self.colname].clip(1e-5, 1.0, inplace=True)

    679 # sorting the dataframe for better visualization



ValueError: Warning: No enrich terms when cutoff = 0.25

In [ ]:

Copied!

utils.enrichment(grn, of='Targets')
utils.enrichment(grn, of='Targets')

2024-01-11 14:37:43,492 [INFO] Parsing data files for GSEA.............................
2024-01-11 14:37:43,495 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:43,504 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,017 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,074 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,077 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,086 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,711 [ERROR] No supported gene_sets: GTEx_Tissue_Sample_Gene_Expression_Profiles_up
2024-01-11 14:37:44,714 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,741 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:44,764 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:37:45,383 [INFO] 1941 gene_sets have been filtered out when max_size=1000 and min_size=5
2024-01-11 14:37:45,386 [INFO] 2148 gene_sets used for further statistical testing.....
2024-01-11 14:37:45,388 [INFO] Start to run GSEA...Might take a while..................
2024-01-11 14:37:55,976 [INFO] Congratulations. GSEApy runs successfully................

['KEGG_2016__Neuroactive ligand-receptor interaction Homo sapiens hsa04080', 'KEGG_2016__Staphylococcus aureus infection Homo sapiens hsa05150', 'GO_Cellular_Component_2015__integral component of plasma membrane (GO:0005887)', 'GO_Molecular_Function_2015__extracellular ligand-gated ion channel activity (GO:0005230)', 'Chromosome_Location__chr5p15', 'GO_Molecular_Function_2015__ligand-gated channel activity (GO:0022834)', 'GO_Molecular_Function_2015__ligand-gated ion channel activity (GO:0015276)', 'KEGG_2016__Systemic lupus erythematosus Homo sapiens hsa05322', 'GO_Cellular_Component_2015__chloride channel complex (GO:0034707)', 'GO_Cellular_Component_2015__extracellular region (GO:0005576)']

	Term	ES	NES	NOM p-val	FDR q-val	FWER p-val	Tag %	Gene %	Lead_genes
264	KEGG_2016__Neuroactive ligand-receptor interac...	0.390721	2.965037	0.0	0.000531	0.001	39/57	34.25%	P2RX5;P2RY1;S1PR3;HTR5A;CHRNA2;GRIN2B;OPRK1;CY...
315	KEGG_2016__Staphylococcus aureus infection Hom...	0.487666	2.538607	0.0	0.011416	0.039	19/23	36.40%	FCGR1A;CFI;HLA-DPB1;HLA-DMB;FPR1;C1QC;FCGR2B;C...
341	GO_Cellular_Component_2015__integral component...	0.213427	2.412604	0.0	0.029735	0.137	117/192	45.30%	LYVE1;NGFR;KCNH5;TPBG;P2RX5;P2RY1;GPR37;S1PR3;...
354	GO_Molecular_Function_2015__extracellular liga...	0.441347	2.29744	0.001427	0.069558	0.351	16/23	29.25%	P2RX5;CHRNA2;GRIN2B;GABRG1;GABRR1;GRIA4;GABRA1...
361	Chromosome_Location__chr5p15	0.774824	2.274557	0.0	0.06754	0.41	6/6	22.75%	IRX2;MYO10;IRX4;IRX1;SEMA5A;CTNND2
365	GO_Molecular_Function_2015__ligand-gated chann...	0.356361	2.266412	0.0	0.051884	0.431	23/37	31.20%	KCNK1;P2RX5;KCNJ10;KCNJ6;CHRNA2;GRIN2B;GABRG1;...
366	GO_Molecular_Function_2015__ligand-gated ion c...	0.356361	2.266412	0.0	0.051884	0.431	23/37	31.20%	KCNK1;P2RX5;KCNJ10;KCNJ6;CHRNA2;GRIN2B;GABRG1;...
372	KEGG_2016__Systemic lupus erythematosus Homo s...	0.418975	2.225912	0.001387	0.062125	0.526	18/24	36.40%	FCGR1A;GRIN2B;HLA-DPB1;HLA-DMB;C1QC;HLA-DPA1;H...
376	GO_Cellular_Component_2015__chloride channel c...	0.53178	2.214056	0.001541	0.060709	0.554	10/13	26.90%	GABRG1;GABRR1;GABRA1;GABRA2;GLRA3;GABRG3;FXYD3...
387	GO_Cellular_Component_2015__extracellular regi...	0.183872	2.160125	0.0	0.085328	0.704	130/219	46.95%	RSPO3;COL21A1;NGFR;EFEMP1;TENM1;LY96;LPL;LUZP2...

No description has been provided for this image

In [ ]:

Copied!

res = utils.enrichment(grn, of='Central')
res = utils.enrichment(grn, of='Central')

Top central genes: []

2024-01-11 14:52:38,615 [WARNING] Duplicated values found in preranked stats: 13.55% of genes
The order of those genes will be arbitrary, which may produce unexpected results.
2024-01-11 14:52:38,617 [INFO] Parsing data files for GSEA.............................
2024-01-11 14:52:38,619 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:38,626 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:38,960 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:38,977 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:38,979 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:38,985 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:39,601 [ERROR] No supported gene_sets: GTEx_Tissue_Sample_Gene_Expression_Profiles_up
2024-01-11 14:52:39,604 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:39,628 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:39,651 [INFO] Enrichr library gene sets already downloaded in: /home/ml4ig1/.cache/gseapy, use local file
2024-01-11 14:52:40,318 [INFO] 1941 gene_sets have been filtered out when max_size=1000 and min_size=5
2024-01-11 14:52:40,321 [INFO] 2148 gene_sets used for further statistical testing.....
2024-01-11 14:52:40,322 [INFO] Start to run GSEA...Might take a while..................
2024-01-11 14:52:51,020 [INFO] Congratulations. GSEApy runs successfully................

['ENCODE_TF_ChIP-seq_2014__NRF1 HELA-S3', 'ENCODE_TF_ChIP-seq_2014__ETS1 A549', 'PPI_Hub_Proteins__SLC2A4', 'ENCODE_TF_ChIP-seq_2014__SIX5 A549', 'ENCODE_TF_ChIP-seq_2014__NRF1 K562', 'ENCODE_TF_ChIP-seq_2014__PML K562', 'ENCODE_TF_ChIP-seq_2014__GABP H3LA3', 'ENCODE_TF_ChIP-seq_2014__ZBTB33 A549', 'ENCODE_TF_ChIP-seq_2014__ELF1 A549', 'ENCODE_TF_ChIP-seq_2014__TRIM28 K562', 'ENCODE_TF_ChIP-seq_2014__GTF2B K562', 'ENCODE_TF_ChIP-seq_2014__MYBL2 HEPG2', 'ENCODE_TF_ChIP-seq_2014__TAF1 GM12892', 'ENCODE_TF_ChIP-seq_2014__GABP HEPG2', 'ENCODE_TF_ChIP-seq_2014__SP2 K562', 'ENCODE_TF_ChIP-seq_2014__NRF1 HEPG2', 'ENCODE_TF_ChIP-seq_2014__POL2 GM12891', 'ENCODE_TF_ChIP-seq_2014__TAF1 PFSK1', 'ENCODE_TF_ChIP-seq_2014__GABP GM12878', 'PPI_Hub_Proteins__IKBKE', 'ENCODE_TF_ChIP-seq_2014__SP1 K562', 'ENCODE_TF_ChIP-seq_2014__CREB1 K562', 'ENCODE_TF_ChIP-seq_2014__GABP HELA-S3', 'ENCODE_TF_ChIP-seq_2014__BCLAF1 K562', 'ENCODE_TF_ChIP-seq_2014__PML GM12878', 'ENCODE_TF_ChIP-seq_2014__SIX5 GM12878', 'ENCODE_TF_ChIP-seq_2014__BRCA1 GM12878', 'ENCODE_TF_ChIP-seq_2014__ELK1 K562', 'ENCODE_TF_ChIP-seq_2014__MBD4 HEPG2', 'ENCODE_TF_ChIP-seq_2014__NFIC HEPG2']