Source code for emmaa.priors.gene_list_prior

from indra.sources import tas
from indra.statements import Agent
from indra.databases import hgnc_client
from . import SearchTerm, get_drugs_for_gene
from . prior_stmts import get_stmts_for_gene_list
import datetime
from emmaa.statements import EmmaaStatement
from emmaa.model import EmmaaModel, save_config_to_s3


[docs]class GeneListPrior(object): """Class to manage the construction of a model from a list of genes. Parameters ---------- gene_list : list[str] A list of HGNC gene symbols name : str The name of the model (all lower case, no spaces or special characters) human_readable_name : str The human readable name (display name) of the model """ def __init__(self, gene_list, name, human_readable_name): self.name = name self.gene_list = gene_list self.human_readable_name = human_readable_name self.stmts = [] self.search_terms = []
[docs] def make_search_terms(self, drug_gene_stmts=None): """Generate search terms from the gene list.""" if not drug_gene_stmts: drug_gene_stmts = tas.process_from_web().statements already_added = set() terms = [] for gene in self.gene_list: # Gene search term agent = agent_from_gene_name(gene) term = SearchTerm(type='gene', name=agent.name, search_term=f'"{agent.name}"', db_refs={'HGNC': agent.db_refs['HGNC'], 'UP': agent.db_refs['UP']}) terms.append(term) # Drug search term drug_terms = get_drugs_for_gene(drug_gene_stmts, agent.db_refs['HGNC']) for drug_term in drug_terms: if drug_term.name not in already_added: terms.append(drug_term) already_added.add(drug_term.name) self.search_terms = terms return terms
[docs] def make_gene_statements(self): """Generate Statements from the gene list.""" drug_names = [st.name for st in self.search_terms if st.type == 'drug'] indra_stmts = get_stmts_for_gene_list(self.gene_list, drug_names) estmts = [EmmaaStatement(stmt, datetime.datetime.now(), []) for stmt in indra_stmts] self.stmts = estmts
[docs] def make_config(self): """Generate a configuration based on attributes.""" if not self.search_terms: self.make_search_terms() if not self.stmts: self.make_gene_statements() config = dict() config['name'] = self.name config['human_readable_name'] = self.human_readable_name config['search_terms'] = [st.to_json() for st in self.search_terms] config['assembly'] = { 'belief_cutoff': 0.8, 'filter_ungrounded': True } return config
[docs] def make_model(self): """Make an EmmaaModel and upload it along with the config to S3.""" config = self.make_config() em = EmmaaModel(self.name, config) em.stmts = self.stmts ndex_uuid = em.upload_to_ndex() config['ndex'] = {'network': ndex_uuid} save_config_to_s3(self.name, config) em.save_to_s3()
[docs]def agent_from_gene_name(gene_name): """Return an Agent based on a gene name.""" hgnc_id = hgnc_client.get_hgnc_id(gene_name) up_id = hgnc_client.get_uniprot_id(hgnc_id) agent = Agent(gene_name, db_refs={'HGNC': hgnc_id, 'UP': up_id}) return agent