Source code for metapub.medgenconcept

"""metapub.medgenconcept -- MedGenConcept class instantiated by supplying ESummary XML string."""

import logging
from lxml import etree

from .base import MetaPubObject
from .exceptions import MetaPubError

logger = logging.getLogger()


[docs] class MedGenConcept(MetaPubObject):
[docs] def __init__(self, xmlstr, *args, **kwargs): super(MedGenConcept, self).__init__(xmlstr, 'DocumentSummarySet/DocumentSummary', args, kwargs) if self._get('error'): raise MetaPubError('Supplied XML for MedGenConcept contained explicit error: %s' % self._get('error')) # sometimes, ConceptMeta is an XML document embedded within the XML response. Boo-urns. try: self.meta = etree.fromstring('<ConceptMeta>'+self.content.find('ConceptMeta').text+'</ConceptMeta>') except TypeError: self.meta = self.content.find('ConceptMeta') self.modes_of_inheritance = self._get_modes_of_inheritance() self.OMIM = self._get_OMIM() # is a list, since sometimes there are more than one. self.names = self._get_names() self.CUI = self._get_CUI() self.title = self._get_title() self.definition = self._get_definition() self.semantic_id = self._get_semantic_id() self.semantic_type = self._get_semantic_type() self.associated_genes = self._get_associated_genes() self.cytogenic = self._get_cytogenic() self.chromosome = self._get_chromosome() self.definitions = self._get_definitions() self.uid = self._get_medgen_uid()
[docs] def to_dict(self): """ returns a dictionary composed of all extractable properties of this concept. """ return {'CUI': self.CUI, 'title': self.title, 'definition': self.definition, 'semantic_id': self.semantic_id, 'semantic_type': self.semantic_type, 'modes_of_inheritance': self.modes_of_inheritance, 'associated_genes': self.associated_genes, 'medgen_uid': self.medgen_uid, 'names': self.names, 'OMIM': self.OMIM, 'cytogenic': self.cytogenic, 'chromosome': self.chromosome}
@property def synonyms(self): """Returns a list of the 'name' values from self.names.""" return [named['name'] for named in self.names] @property def medgen_uid(self): """Synonym for "uid". Sometimes when juggling concepts from multiple places, this helps.""" return self.uid def _get_CUI(self): return self._get('ConceptId') def _get_title(self): return self._get('Title') def _get_definition(self): return self._get('Definition') def _get_semantic_id(self): return self._get('SemanticId') def _get_semantic_type(self): return self._get('SemanticType') def _get_medgen_uid(self): return self.content.get('uid') def _get_modes_of_inheritance(self): """ returns a list of all known ModesOfInheritance, in format: [ { 'CUI': 'CNxxxx', 'name': 'some name', 'medgen_uid': 'xxxxxx', 'tui': 'A000 }, ... ] """ output_list = [] modes = self.meta.find('ModesOfInheritance').getchildren() extra_key_dict = {'CUI': None, 'TUI': None, 'medgen_uid': None, } for mode in modes: mode_dict = extra_key_dict.copy() try: mode_dict['semantic_type'] = mode.find('SemanticType').text except AttributeError: pass try: mode_dict['definition'] = mode.find('Definition').text except AttributeError: pass mode_dict['name'] = mode.find('Name').text for item in extra_key_dict.keys(): try: mode.get(item) except AttributeError: pass output_list.append(mode_dict) return output_list def _get_associated_genes(self): """ returns a list of AssociatedGenes, in format: [ { 'gene_id': 'xxx', 'chromosome': 'X', 'cytogen_loc': 'X9234235', 'hgnc': 'GENE' }, ] if not available, returns None. """ genes = [] try: for gene in self.meta.find('AssociatedGenes').getchildren(): genes.append({'gene_id': gene.get('gene_id'), 'hgnc': gene.text, 'chromosome': gene.get('chromosome'), 'cytogen_loc': gene.get('cytogen_loc') }) return genes except AttributeError: return None def _get_names(self): """ Returns a list of this concept's equivalent Names in various dictionaries, in format: {'SDUI': '300555', 'SCUI': 'xxx', 'CODE': '300555', 'SAB': 'OMIM' 'TTY': 'PT', 'type': 'syn', 'name': 'DENT DISEASE 2'} """ names = [] # not every ID is present in each Name (e.g. SCUI only appears sometimes). possible_keys = ['SDUI', 'SCUI', 'CODE', 'SAB', 'TTY', 'PT', 'type'] for name in self.meta.find('Names').getchildren(): outd = {'name': name.text} for key in possible_keys: try: outd[key] = name.get(key) except AttributeError: pass names.append(outd) return names def _get_OMIM(self): """ Returns this concept's OMIM ids (list of strings), when available, else returns []. """ # <OMIM><MIM>600376</MIM></OMIM> omim_root = self.meta.find('OMIM') out = [] for item in omim_root: out.append(item.text) return out def _get_chromosome(self): """returns this concept's affected chromosome, if applicable/available""" try: return self.meta.find('Chromosome').text except AttributeError: return None def _get_cytogenic(self): """returns this concept's cytogenic property, if applicable/available""" try: return self.meta.find('Cytogenic').text except AttributeError: return None def _get_definitions(self): """returns this concept's definitions as LIST of strings.""" out = [] for item in self.meta.find('Definitions'): out.append(item.text) return out
# TODO # ClinicalFeatures / ClinicalFeature # <ClinicalFeatures><ClinicalFeature uid="9232" CUI="C0019322" TUI="T190" SDUI="HP:0001537"><Name>Umbilical hernia</Name><SemanticType>Anatomical Abnormality</SemanticType></ClinicalFeature><ClinicalFeature uid="87607" CUI="C0349588" TUI="T033" SDUI="HP:0004322"><Name>Short stature</Name><SemanticType>Finding</SemanticType></ClinicalFeature><ClinicalFeature uid="333360" CUI="C1839606" TUI="T033" SDUI="HP:0003126"><Name>Low-molecular-weight proteinuria</Name><SemanticType>Finding</SemanticType></ClinicalFeature><ClinicalFeature uid="383844" CUI="C1856145" TUI="T033" SDUI="HP:0100543"><Name>Cognitive impairment</Name><SemanticType>Finding</SemanticType></ClinicalFeature><ClinicalFeature uid="349145" CUI="C1859342" TUI="T033" SDUI="HP:0000114"><Name>Proximal tubulopathy</Name><SemanticType>Finding</SemanticType></ClinicalFeature><ClinicalFeature uid="504348" CUI="CN000117" TUI="T033" SDUI="HP:0000121"><Name>Nephrocalcinosis</Name><SemanticType>Finding</SemanticType><Definition>Nephrocalcinosis is the deposition of calcium salts in renal parenchyma.</Definition></ClinicalFeature><ClinicalFeature uid="504774" CUI="CN001157" TUI="T033" SDUI="HP:0001263"><Name>Global developmental delay</Name><SemanticType>Finding</SemanticType><Definition>A delay in the achievement of motor or mental milestones in the domains of development of a child, including motor skills, speech and language, cognitive skills, and social and emotional skills. This term should only be used to describe children younger than five years of age.</Definition></ClinicalFeature><ClinicalFeature uid="505127" CUI="CN001948" TUI="T033" SDUI="HP:0002150"><Name>Hypercalciuria</Name><SemanticType>Finding</SemanticType></ClinicalFeature><ClinicalFeature uid="505493" CUI="CN002923" TUI="T033" SDUI="HP:0003236"><Name>Elevated serum creatine phosphokinase</Name><SemanticType>Finding</SemanticType><Definition>An elevation of the level of the enzyme creatine kinase (also known as creatine phosphokinase, CPK; EC 2.7.3.2) in the blood. CPK levels can be elevated in a number of clinical disorders such as myocardial infarction, rhabdomyolysis, and muscular dystrophy.</Definition></ClinicalFeature><ClinicalFeature uid="425142" CUI="CN003029" TUI="T033" SDUI="HP:0003355"><Name>Aminoaciduria</Name><SemanticType>Finding</SemanticType><Definition>An increased concentration of an amino acid in the urine.</Definition></ClinicalFeature><ClinicalFeature uid="776439" CUI="CN183891" TUI="T033" SDUI="HP:0012622"><Name>Chronic kidney disease</Name><SemanticType>Finding</SemanticType><Definition>Functional anomaly of the kidney persisting for at least three months.</Definition></ClinicalFeature></ClinicalFeatures><PhenotypicAbnormalities><Category CUI="CN000115" name="Abnormality of the genitourinary system"><ClinicalFeature uid="504348" CUI="CN000117" TUI="T033" SDUI="HP:0000121"><SemanticType>Finding</SemanticType><Name>Nephrocalcinosis</Name><Definition>Nephrocalcinosis is the deposition of calcium salts in renal parenchyma.</Definition></ClinicalFeature><ClinicalFeature uid="425142" CUI="CN003029" TUI="T033" SDUI="HP:0003355"><SemanticType>Finding</SemanticType><Name>Aminoaciduria</Name><Definition>An increased concentration of an amino acid in the urine.</Definition></ClinicalFeature><ClinicalFeature uid="776439" CUI="CN183891" TUI="T033" SDUI="HP:0012622"><SemanticType>Finding</SemanticType><Name>Chronic kidney disease</Name><Definition>Functional anomaly of the kidney persisting for at least three months.</Definition></ClinicalFeature></Category><Category CUI="CN000664" name="Abnormality of the nervous system"><ClinicalFeature uid="504774" CUI="CN001157" TUI="T033" SDUI="HP:0001263"><SemanticType>Finding</SemanticType><Name>Global developmental delay</Name><Definition>A delay in the achievement of motor or mental milestones in the domains of development of a child, including motor skills, speech and language, cognitive skills, and social and emotional skills. This term should only be used to describe children younger than five years of age.</Definition></ClinicalFeature></Category><Category CUI="CN001754" name="Abnormality of metabolism/homeostasis"><ClinicalFeature uid="505493" CUI="CN002923" TUI="T033" SDUI="HP:0003236"><SemanticType>Finding</SemanticType><Name>Elevated serum creatine phosphokinase</Name><Definition>An elevation of the level of the enzyme creatine kinase (also known as creatine phosphokinase, CPK; EC 2.7.3.2) in the blood. CPK levels can be elevated in a number of clinical disorders such as myocardial infarction, rhabdomyolysis, and muscular dystrophy.</Definition></ClinicalFeature><ClinicalFeature uid="425142" CUI="CN003029" TUI="T033" SDUI="HP:0003355"><SemanticType>Finding</SemanticType><Name>Aminoaciduria</Name><Definition>An increased concentration of an amino acid in the urine.</Definition></ClinicalFeature> # TODO # PhenotypicAbnormalities # <PhenotypicAbnormalities><Category CUI="CN000115" name="Abnormality of the genitourinary system"><ClinicalFeature uid="504348" CUI="CN000117" TUI="T033" SDUI="HP:0000121"><SemanticType>Finding</SemanticType><Name>Nephrocalcinosis</Name><Definition>Nephrocalcinosis is the deposition of calcium salts in renal parenchyma.</Definition></ClinicalFeature><ClinicalFeature uid="425142" CUI="CN003029" TUI="T033" SDUI="HP:0003355"><SemanticType>Finding</SemanticType><Name>Aminoaciduria</Name><Definition>An increased concentration of an amino acid in the urine.</Definition></ClinicalFeature><ClinicalFeature uid="776439" CUI="CN183891" TUI="T033" SDUI="HP:0012622"><SemanticType>Finding</SemanticType><Name>Chronic kidney disease</Name><Definition>Functional anomaly of the kidney persisting for at least three months.</Definition></ClinicalFeature></Category><Category CUI="CN000664" name="Abnormality of the nervous system"><ClinicalFeature uid="504774" CUI="CN001157" TUI="T033" SDUI="HP:0001263"><SemanticType>Finding</SemanticType><Name>Global developmental delay</Name><Definition>A delay in the achievement of motor or mental milestones in the domains of development of a child, including motor skills, speech and language, cognitive skills, and social and emotional skills. This term should only be used to describe children younger than five years of age.</Definition></ClinicalFeature></Category><Category CUI="CN001754" name="Abnormality of metabolism/homeostasis"><ClinicalFeature uid="505493" CUI="CN002923" TUI="T033" SDUI="HP:0003236"><SemanticType>Finding</SemanticType><Name>Elevated serum creatine phosphokinase</Name><Definition>An elevation of the level of the enzyme creatine kinase (also known as creatine phosphokinase, CPK; EC 2.7.3.2) in the blood. CPK levels can be elevated in a number of clinical disorders such as myocardial infarction, rhabdomyolysis, and muscular dystrophy.</Definition></ClinicalFeature><ClinicalFeature uid="425142" CUI="CN003029" TUI="T033" SDUI="HP:0003355"><SemanticType>Finding</SemanticType><Name>Aminoaciduria</Name><Definition>An increased concentration of an amino acid in the urine.</Definition></ClinicalFeature></Category></PhenotypicAbnormalities> # TODO # <RelatedDisorders></RelatedDisorders> # TODO # <SNOMEDCT></SNOMEDCT> # known others not planned for inclusion: # <PharmacologicResponse></PharmacologicResponse>