Source code for emmaa.priors.prior_stmts

import logging
from typing import List

from indra.databases import hgnc_client
from indra.statements import Statement, stmts_from_json
from indra.tools import assemble_corpus as ac
from indra_db.client.principal import get_raw_stmt_jsons_from_agents

__all__ = [
    "get_stmts_for_gene",
    "get_stmts_for_gene_list",
]

logger = logging.getLogger(__name__)


[docs]def get_stmts_for_gene(gene: str, max_stmts: int = 100000) -> List[Statement]: """Return all existing Statements for a given gene from the DB. Parameters ---------- gene : The HGNC symbol of a gene to query. max_stmts: The maximum number of statements to return Returns ------- : A list of INDRA Statements in which the given gene is involved. """ hgnc_id = hgnc_client.get_current_hgnc_id(gene) if hgnc_id is None: return [] agents = [ (None, hgnc_id, "HGNC"), ] res = get_raw_stmt_jsons_from_agents(agents=agents, max_stmts=max_stmts) return stmts_from_json(res.values())
[docs]def get_stmts_for_gene_list(gene_list, other_entities): """Return all Statements between genes in a given list. Parameters ---------- gene_list : list[str] A list of HGNC symbols for genes to query. other_entities : list[str] A list of other entities to keep as part of the set of Statements. Returns ------- list[indra.statements.Statement] A list of INDRA Statements between the given list of genes and other entities specified. """ stmts = [] for gene in gene_list: logger.info(f'Querying {gene}') st = get_stmts_for_gene(gene) logger.info(f'Got {len(st)} statements for {gene}') stmts += st stmts = ac.filter_gene_list(stmts, gene_list + other_entities, policy='all') return stmts