160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257 | def enrichment(
grn: GRNAnnData,
of: str = "Targets",
doplot: bool = True,
top_k: int = 30,
gene_sets: list = [
"KEGG_2016",
"ENCODE_TF_ChIP-seq_2014",
"GO_Molecular_Function_2015",
{"TFs": TF},
file_dir + "/celltype.gmt",
"OMIM_Disease",
"WikiPathways_2016",
"GO_Cellular_Component_2015",
"GTEx_Tissue_Sample_Gene_Expression_Profiles_up",
"TargetScan_microRNA",
"Chromosome_Location",
"PPI_Hub_Proteins",
],
min_size: int = 3,
max_size: int = 4000,
permutation_num: int = 1000, # reduce number to speed up testing
**kwargs,
):
"""
This function performs enrichment analysis on a given gene regulatory network (grn).
Args:
grn (GRNAnnData): The gene regulatory network to analyze.
of (str, optional): The specific component of the grn to focus on.
top_k (int, optional): The number of top results to return. Defaults to 10.
doplot (bool, optional): Whether to generate a plot of the results. Defaults to False.
gene_sets (list, optional): The gene sets to use for the enrichment analysis. Defaults to the gene sets provided in the function.
min_size (int, optional): The minimum size of the gene sets to consider. Defaults to 3.
max_size (int, optional): The maximum size of the gene sets to consider. Defaults to 4000.
permutation_num (int, optional): The number of permutations to use for the enrichment analysis. Defaults to 1000.
**kwargs: Additional keyword arguments to be passed to the gseapy.prerank function.
Returns:
pandas.DataFrame: A DataFrame containing the results of the enrichment analysis.
"""
if of == "Targets":
rnk = grn.grn.sum(1).sort_values(ascending=False)
elif of == "Regulators":
rnk = grn.grn.sum(0).sort_values(ascending=False)
elif of == "Central":
try:
get_centrality(grn, top_k_to_disp=0)
except nx.PowerIterationFailedConvergence:
print("PowerIterationFailedConvergence")
return None
rnk = grn.var["centrality"].sort_values(ascending=False)
else:
raise ValueError("of must be one of 'Targets', 'Regulators', or 'Central'")
rnk.name = None
rnk = rnk[rnk != 0]
if rnk.nunique() == 1:
print("The DataFrame contains only the same values.")
return None
# run enrichment analysis
previous_level = logging.root.manager.disable
logging.disable(logging.WARNING)
try:
pre_res = gp.prerank(
rnk=rnk, # or rnk = rnk,
gene_sets=gene_sets,
min_size=min_size,
max_size=max_size,
background=grn.var.index.tolist(), # or "hsapiens_gene_ensembl", or int, or text file or a list of genes
permutation_num=permutation_num,
**kwargs,
)
logging.disable(previous_level)
except LookupError:
print("raised a lookup error")
return None
val = (
pre_res.res2d[(pre_res.res2d["FDR q-val"] < 0.1) & (pre_res.res2d["NES"] > 1)]
.sort_values(by=["NES"], ascending=False)
.drop(columns=["Name"])
)
# plot results
if doplot:
print(val.Term.tolist()[:top_k])
_ = dotplot(
pre_res.res2d[
(pre_res.res2d["FDR q-val"] < 0.1) & (pre_res.res2d["NES"] > 1)
].sort_values(by=["NES"], ascending=False),
column="FDR q-val",
title="enrichment of " + of + " in the grn",
size=6, # adjust dot size
figsize=(4, 5),
cutoff=0.25,
show_ring=False,
)
return pre_res
|