gene.schemas#

Contains data models for representing VICC normalized gene records.

class gene.schemas.Annotation(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for annotations when gene location is absent.

ALT_LOC = 'alternate reference locus'[source]#
NOT_FOUND_ON_REFERENCE = 'not on reference assembly'[source]#
RESERVED = 'reserved'[source]#
UNPLACED = 'unplaced'[source]#
class gene.schemas.BaseGene(**data)[source]#

Base gene model. Provide shared resources for records produced by /search and /normalize_unmerged.

aliases: List[Annotated[str]][source]#
associated_with: List[Annotated[str]][source]#
concept_id: Annotated[str][source]#
gene_type: Optional[Annotated[str]][source]#
label: Optional[Annotated[str]][source]#
location_annotations: List[Annotated[str]][source]#
locations: Union[List[SequenceLocation], List[GeneSequenceLocation]][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

previous_symbols: List[Annotated[str]][source]#
strand: Optional[Strand][source]#
symbol: Annotated[str][source]#
symbol_status: Optional[SymbolStatus][source]#
xrefs: List[Annotated[str]][source]#
class gene.schemas.BaseNormalizationService(**data)[source]#

Base method providing shared attributes to Normalization service classes.

match_type: MatchType[source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

query: Annotated[str][source]#
service_meta_: ServiceMeta[source]#
warnings: List[Dict][source]#
class gene.schemas.Chromosome(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for chromosomes.

MITOCHONDRIA = 'MT'[source]#
class gene.schemas.DataLicenseAttributes(**data)[source]#

Define constraints for data license attributes.

attribution: Annotated[bool][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

non_commercial: Annotated[bool][source]#
share_alike: Annotated[bool][source]#
class gene.schemas.Gene(**data)[source]#
match_type: MatchType[source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

class gene.schemas.GeneGroup(**data)[source]#

A grouping of genes based on common attributes.

description: Annotated[str][source]#
genes: List[Gene][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

type_identifier: Annotated[str][source]#
class gene.schemas.GeneSequenceLocation(**data)[source]#

Sequence Location model when storing in DynamoDB.

end: Annotated[int][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

sequence_id: Annotated[str][source]#
start: Annotated[int][source]#
type: Literal['SequenceLocation'][source]#
class gene.schemas.GeneTypeFieldName(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Designate source-specific gene type field names for Extensions and internal records.

ENSEMBL = 'ensembl_biotype'[source]#
HGNC = 'hgnc_locus_type'[source]#
NCBI = 'ncbi_gene_type'[source]#
class gene.schemas.MatchType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for use in Match Type attributes.

ALIAS = 60[source]#
ASSOCIATED_WITH = 60[source]#
CONCEPT_ID = 100[source]#
FUZZY_MATCH = 20[source]#
NO_MATCH = 0[source]#
PREV_SYMBOL = 80[source]#
SYMBOL = 100[source]#
XREF = 60[source]#
class gene.schemas.MatchesNormalized(**data)[source]#

Matches associated with normalized concept from a single source.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

records: List[BaseGene][source]#
source_meta_: SourceMeta[source]#
class gene.schemas.NamespacePrefix(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for namespace prefixes on concept IDs.

CCDS = 'ccds'[source]#
CD = 'hcdmdb'[source]#
COSMIC = 'cosmic'[source]#
ENA = 'ena.embl'[source]#
ENSEMBL = 'ensembl'[source]#
ENTREZ = 'ncbigene'[source]#
HGNC = 'hgnc'[source]#
HOMEODB = 'homeodb'[source]#
HORDE = 'hordedb'[source]#
IMGT = 'imgt'[source]#
IMGT_GENE_DB = 'imgt/gene-db'[source]#
IUPHAR = 'iuphar'[source]#
KZNF = 'knzfgc'[source]#
LNCRNADB = 'lncrnadb'[source]#
MAMIT = 'mamittrnadb'[source]#
MEROPS = 'merops'[source]#
MIRBASE = 'mirbase'[source]#
NCBI = 'ncbigene'[source]#
OMIM = 'omim'[source]#
ORPHANET = 'orphanet'[source]#
PSEUDOGENE = 'pseudogene.org'[source]#
PUBMED = 'pubmed'[source]#
REFSEQ = 'refseq'[source]#
RFAM = 'rfam'[source]#
SNORNABASE = 'snornabase'[source]#
UCSC = 'ucsc'[source]#
UNIPROT = 'uniprot'[source]#
VEGA = 'vega'[source]#
class gene.schemas.NormalizeService(**data)[source]#

Define model for returning normalized concept.

gene: Optional[Gene][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

normalized_id: Optional[str][source]#
source_meta_: Dict[SourceName, SourceMeta][source]#
class gene.schemas.RecordType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Record item types.

IDENTITY = 'identity'[source]#
MERGER = 'merger'[source]#
class gene.schemas.RefType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Reference item types.

ALIASES = 'alias'[source]#
ASSOCIATED_WITH = 'associated_with'[source]#
PREVIOUS_SYMBOLS = 'prev_symbol'[source]#
SYMBOL = 'symbol'[source]#
XREFS = 'xref'[source]#
class gene.schemas.SearchService(**data)[source]#

Define model for returning highest match typed concepts from sources.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

query: Annotated[str][source]#
service_meta_: ServiceMeta[source]#
source_matches: Dict[SourceName, SourceSearchMatches][source]#
warnings: List[Dict][source]#
class gene.schemas.ServiceMeta(**data)[source]#

Metadata regarding the gene-normalization service.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

name: Literal['gene-normalizer'][source]#
response_datetime: Annotated[str][source]#
url: Literal['https://github.com/cancervariants/gene-normalization'][source]#
version: Annotated[str][source]#
class gene.schemas.SourceIDAfterNamespace(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints after namespace.

ENSEMBL = 'ENSG'[source]#
HGNC = ''[source]#
NCBI = ''[source]#
class gene.schemas.SourceMeta(**data)[source]#

Metadata for a given source to return in response object.

data_license: Annotated[str][source]#
data_license_attributes: Dict[Annotated[str], Annotated[bool]][source]#
data_license_url: Annotated[str][source]#
data_url: Dict[Annotated[str], Annotated[str]][source]#
genome_assemblies: List[Annotated[str]][source]#
model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

rdp_url: Optional[Annotated[str]][source]#
version: Annotated[str][source]#
class gene.schemas.SourceName(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints to ensure consistent capitalization.

ENSEMBL = 'Ensembl'[source]#
HGNC = 'HGNC'[source]#
NCBI = 'NCBI'[source]#
class gene.schemas.SourcePriority(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define priorities for sources when building merged concepts.

ENSEMBL = 2[source]#
HGNC = 1[source]#
NCBI = 3[source]#
class gene.schemas.SourceSearchMatches(**data)[source]#

Container for matching information from an individual source.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

records: List[Gene][source]#
source_meta_: SourceMeta[source]#
class gene.schemas.Strand(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for strand attribute.

FORWARD = '+'[source]#
REVERSE = '-'[source]#
class gene.schemas.SymbolStatus(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Define string constraints for symbol status attribute.

APPROVED = 'approved'[source]#
DISCONTINUED = 'discontinued'[source]#
WITHDRAWN = 'withdrawn'[source]#
class gene.schemas.UnmergedNormalizationService(**data)[source]#

Response providing source records corresponding to normalization of user query. Enables retrieval of normalized concept while retaining sourcing for accompanying attributes.

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}[source]#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

normalized_concept_id: Optional[Annotated[str]][source]#
source_matches: Dict[SourceName, MatchesNormalized][source]#