gene.schemas#

Contains data models for representing VICC normalized gene records.

class gene.schemas.Annotation(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for annotations when gene location is absent.

ALT_LOC = 'alternate reference locus'[source]#

NOT_FOUND_ON_REFERENCE = 'not on reference assembly'[source]#

RESERVED = 'reserved'[source]#

UNPLACED = 'unplaced'[source]#

class gene.schemas.BaseGene(**data)[source]#

Base gene model. Provide shared resources for records produced by /search and /normalize_unmerged.

aliases: List[Annotated[str]][source]#

associated_with: List[Annotated[str]][source]#

concept_id: Annotated[str][source]#

gene_type: Optional[Annotated[str]][source]#

label: Optional[Annotated[str]][source]#

location_annotations: List[Annotated[str]][source]#

locations: Union[List[SequenceLocation], List[GeneSequenceLocation]][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

previous_symbols: List[Annotated[str]][source]#

strand: Optional[Strand][source]#

symbol: Annotated[str][source]#

symbol_status: Optional[SymbolStatus][source]#

xrefs: List[Annotated[str]][source]#

class gene.schemas.BaseNormalizationService(**data)[source]#

Base method providing shared attributes to Normalization service classes.

match_type: MatchType[source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

query: Annotated[str][source]#

service_meta_: ServiceMeta[source]#

warnings: List[Dict][source]#

class gene.schemas.Chromosome(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for chromosomes.

MITOCHONDRIA = 'MT'[source]#

class gene.schemas.DataLicenseAttributes(**data)[source]#

Define constraints for data license attributes.

attribution: Annotated[bool][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

non_commercial: Annotated[bool][source]#

share_alike: Annotated[bool][source]#

class gene.schemas.Gene(**data)[source]#

match_type: MatchType[source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

class gene.schemas.GeneGroup(**data)[source]#

A grouping of genes based on common attributes.

description: Annotated[str][source]#

genes: List[Gene][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

type_identifier: Annotated[str][source]#

class gene.schemas.GeneSequenceLocation(**data)[source]#

Sequence Location model when storing in DynamoDB.

end: Annotated[int][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

sequence_id: Annotated[str][source]#

start: Annotated[int][source]#

type: Literal['SequenceLocation'][source]#

class gene.schemas.GeneTypeFieldName(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Designate source-specific gene type field names for Extensions and internal records.

ENSEMBL = 'ensembl_biotype'[source]#

HGNC = 'hgnc_locus_type'[source]#

NCBI = 'ncbi_gene_type'[source]#

class gene.schemas.MatchType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for use in Match Type attributes.

ALIAS = 60[source]#

ASSOCIATED_WITH = 60[source]#

CONCEPT_ID = 100[source]#

FUZZY_MATCH = 20[source]#

NO_MATCH = 0[source]#

PREV_SYMBOL = 80[source]#

SYMBOL = 100[source]#

XREF = 60[source]#

class gene.schemas.MatchesNormalized(**data)[source]#

Matches associated with normalized concept from a single source.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

records: List[BaseGene][source]#

source_meta_: SourceMeta[source]#

class gene.schemas.NamespacePrefix(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for namespace prefixes on concept IDs.

CCDS = 'ccds'[source]#

CD = 'hcdmdb'[source]#

COSMIC = 'cosmic'[source]#

ENA = 'ena.embl'[source]#

ENSEMBL = 'ensembl'[source]#

ENTREZ = 'ncbigene'[source]#

HGNC = 'hgnc'[source]#

HOMEODB = 'homeodb'[source]#

HORDE = 'hordedb'[source]#

IMGT = 'imgt'[source]#

IMGT_GENE_DB = 'imgt/gene-db'[source]#

IUPHAR = 'iuphar'[source]#

KZNF = 'knzfgc'[source]#

LNCRNADB = 'lncrnadb'[source]#

MAMIT = 'mamittrnadb'[source]#

MEROPS = 'merops'[source]#

MIRBASE = 'mirbase'[source]#

NCBI = 'ncbigene'[source]#

OMIM = 'omim'[source]#

ORPHANET = 'orphanet'[source]#

PSEUDOGENE = 'pseudogene.org'[source]#

PUBMED = 'pubmed'[source]#

REFSEQ = 'refseq'[source]#

RFAM = 'rfam'[source]#

SNORNABASE = 'snornabase'[source]#

UCSC = 'ucsc'[source]#

UNIPROT = 'uniprot'[source]#

VEGA = 'vega'[source]#

class gene.schemas.NormalizeService(**data)[source]#

Define model for returning normalized concept.

gene: Optional[Gene][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

normalized_id: Optional[str][source]#

source_meta_: Dict[SourceName, SourceMeta][source]#

class gene.schemas.RecordType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Record item types.

IDENTITY = 'identity'[source]#

MERGER = 'merger'[source]#

class gene.schemas.RefType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Reference item types.

ALIASES = 'alias'[source]#

ASSOCIATED_WITH = 'associated_with'[source]#

PREVIOUS_SYMBOLS = 'prev_symbol'[source]#

SYMBOL = 'symbol'[source]#

XREFS = 'xref'[source]#

class gene.schemas.SearchService(**data)[source]#

Define model for returning highest match typed concepts from sources.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

query: Annotated[str][source]#

service_meta_: ServiceMeta[source]#

source_matches: Dict[SourceName, SourceSearchMatches][source]#

warnings: List[Dict][source]#

class gene.schemas.ServiceMeta(**data)[source]#

Metadata regarding the gene-normalization service.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

name: Literal['gene-normalizer'][source]#

response_datetime: Annotated[str][source]#

url: Literal['https://github.com/cancervariants/gene-normalization'][source]#

version: Annotated[str][source]#

class gene.schemas.SourceIDAfterNamespace(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints after namespace.

ENSEMBL = 'ENSG'[source]#

HGNC = ''[source]#

NCBI = ''[source]#

class gene.schemas.SourceMeta(**data)[source]#

Metadata for a given source to return in response object.

data_license: Annotated[str][source]#

data_license_attributes: Dict[Annotated[str], Annotated[bool]][source]#

data_license_url: Annotated[str][source]#

data_url: Dict[Annotated[str], Annotated[str]][source]#

genome_assemblies: List[Annotated[str]][source]#

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

rdp_url: Optional[Annotated[str]][source]#

version: Annotated[str][source]#

class gene.schemas.SourceName(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints to ensure consistent capitalization.

ENSEMBL = 'Ensembl'[source]#

HGNC = 'HGNC'[source]#

NCBI = 'NCBI'[source]#

class gene.schemas.SourcePriority(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define priorities for sources when building merged concepts.

ENSEMBL = 2[source]#

HGNC = 1[source]#

NCBI = 3[source]#

class gene.schemas.SourceSearchMatches(**data)[source]#

Container for matching information from an individual source.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

records: List[Gene][source]#

source_meta_: SourceMeta[source]#

class gene.schemas.Strand(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for strand attribute.

FORWARD = '+'[source]#

REVERSE = '-'[source]#

class gene.schemas.SymbolStatus(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for symbol status attribute.

APPROVED = 'approved'[source]#

DISCONTINUED = 'discontinued'[source]#

WITHDRAWN = 'withdrawn'[source]#

class gene.schemas.UnmergedNormalizationService(**data)[source]#

Response providing source records corresponding to normalization of user query. Enables retrieval of normalized concept while retaining sourcing for accompanying attributes.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

normalized_concept_id: Optional[Annotated[str]][source]#

source_matches: Dict[SourceName, MatchesNormalized][source]#