import os
[docs]class Probe:
"""
This class holds probe info.
USAGE:
.. code:: python
my_probe = Annotation.Probe()
"""
def __init__(self):
self.id = None
self.seq = None
self.name = None
self.chr = None
self.cord = None
self.strand = None
self.gene = None
self.refseq = None
self.beta = None
self.tour = None
self.loc = None
[docs]class SNP:
"""
This class defines the SNPs in probes. Can be used to filter probes.
"""
def __init__(self):
self.probeid = None
self.snpid = None
[docs]class ChrLoc:
"""
defines a chromosomal interval.
USAGE:
.. code:: python
my_probe = Annotation.ChrLoc("X", 122333232, 123334444)
"""
def __init__(self, chromosome, start, end):
self.chr = chromosome
self.start = start
self.end = end
[docs]class Location:
"""
Probe location is defined here.
"""
BODY = "Body"
TSS200 = "TSS200"
TSS1500 = "TSS1500"
UTR5 = "5'UTR"
UTR3 = "3'UTR"
EXON = "Exon"
[docs]class CpG_location:
"""
CpG location is defined here.
"""
ISLAND = "Island"
NSHORE = "N_Shore"
SSHORE = "S_Shore"
NSHELF = "N_Shelf"
SSHELF = "S_Shelf"
[docs]class Feature:
"""
This class parse features associated with probes.
"""
def __init__(self, feature):
self.feature_title = None
self.feature = None
if feature in ["Body", "TSS200", "TSS1500", "5'UTR", "3'UTR", "Exon"]:
self.feature_title = "Location"
self.feature = feature
elif feature in ["Island", "N_Shore", "S_Shore", "N_Shelf", "S_Shelf"]:
self.feature_title = "cpg_loc"
self.feature = feature
else:
self.feature_title = "gene"
self.feature = feature
[docs]class Annotator:
"""
This class parse all information about Illumina probes.
USAGE:
.. code:: python
annotations = Annotation.Annotator()
"""
def __init__(self):
ann_file = os.path.abspath("Data/config.ini")
for i in open(ann_file, mode="r"):
self.ann = os.path.join("Data/", i.strip("\n").strip("\r"))
self.probe = {}
self.__run__()
def __run__(self):
"""
Run the annotation initial setup.
:return: nothing. A static function.
"""
for i in open(self.ann, mode="r"):
if i.startswith("cg"):
data = i.split(",")
# Assigning probe information.
new_probe = Probe()
new_probe.id = data[0]
new_probe.name = data[1]
new_probe.seq = data[13]
new_probe.chr = str(data[11])
new_probe.cord = int(data[12])
new_probe.strand = data[16]
new_probe.gene = data[21].split(";")
new_probe.refseq = data[22]
locs = data[23].split(";")
list_locs = []
for i in locs:
if i not in list_locs:
list_locs.append(i)
new_probe.loc = list_locs
new_probe.tour = data[25]
newcpg = {new_probe.id: new_probe}
self.probe.update(newcpg)
[docs] def remove_snp_probes(self):
"""
This function will removes all SNPs associated with probes.
:return: returns a new probe listing.
"""
snp_list = []
snp_file = open("Data/humanmethylation450_dbsnp137.snpupdate.table.v2.sorted.txt", "r")
for line in snp_file:
if line.startswith("cg"):
line = line.strip("\n").strip("\r").split("\t")
new_snp = SNP()
new_snp.probeid = line[0]
new_snp.snpid = line[1]
snp_list.append(new_snp)
for snp in snp_list:
self.probe.pop(snp.probeid)
[docs] def get_all_probe_ids(self):
"""
Get all probe ids.
:return: a list of probe ids.
"""
return self.probe
[docs] def get_probes_id_from_gene(self, gene_name):
"""
Get all probes ids associated with a gene.
:param gene_name:
:return: a lst of probe ids.
"""
probes = {k: self.probe[k] for k in self.probe if gene_name in self.probe[k].gene}
return self.get_keys(probes.keys())
[docs] def get_probes_id_from_loc(self, probe_loc):
"""
Get all probes ids associated with genomic locations.
:param probe_loc:
:return: a list of probe ids.
"""
probes = {k: self.probe[k] for k in self.probe if probe_loc in self.probe[k].loc}
return self.get_keys(probes.keys())
[docs] def get_probes_id_from_cpg(self, cpg_loc):
"""
Get all probes ids associated with CpG sites.
:param cpg_loc:
:return: a list of probe ids.
"""
probes = {k: self.probe[k] for k in self.probe if cpg_loc in self.probe[k].tour}
return self.get_keys(probes.keys())
[docs] def get_probes_id_from_probe(self, probe_list):
"""
Get all probes ids from a list of probe objects.
:param probe_list: A list of probe ids.
:return: a list of probe ids.
"""
return self.get_keys(probe_list.keys())
[docs] def get_keys(self, dic_keys):
"""
Get Probe id from probe dictionaries
:param dic_keys: Probe dict.
:return: returns a list of probe id.
"""
l = []
for i in dic_keys:
l.append(i)
return l
[docs] def get_probe(self, probe_id):
"""
This function returns the info associated with an id.
:param probe_id: ILLUMINA ID
:return: all info
"""
try:
probe = self.probe[probe_id]
except Exception as ex:
probe = None
print("WARNING: No probe with id of %s found." % probe_id)
return probe
[docs] def get_probes(self, list_of_ids):
"""
This function returns a list of probe object from a list of ids.
:param list_of_ids:
:return: A list of probe objects.
"""
out_list = []
for probe_id in list_of_ids:
out_list.append(self.get_probe(probe_id))
return out_list
[docs] def get_all_probes(self):
"""
Get a list of all probes.
:return: A list of all probes in the Annotation object.
"""
probe_list = []
for probe in self.probe.iterkeys():
probe_list.append(self.get_probe(probe))
return probe_list
[docs] def get_probes_from_gene(self, gene_name):
"""
Get a list probe objects from an associated gene name.
:param gene_name: Gene name in string format
:return: A probe.
"""
return self.get_probes(self.get_probes_id_from_gene(gene_name))
[docs] def get_probes_from_loc(self, loc):
"""
Get a list probe objects from genomic location.
:param loc: from Location object.
:return: probes.
"""
return self.get_probes(self.get_probes_id_from_loc(loc))
[docs] def get_probes_from_cpg(self, cpg_loc):
"""
Get a list probe objects from cpg location.
:param cpg_loc: from CpG object
:return: probes.
"""
return self.get_probes(self.get_probes_from_cpg(cpg_loc))
[docs] def get_probes_from_chr_loc(self, chr_loc):
"""
Get a list of probes that are within a genomic region
:param chr_loc: Genomic location interval
:return: A list of probes.
"""
chrom = chr_loc.chr
start = int(chr_loc.start)
end = int(chr_loc.end)
probes = {k: self.probe[k] for k in self.probe if
self.probe[k].chr == chrom and start < self.probe[k].cord < end}
return probes
[docs] def get_probes_id_from_chr_loc(self, chr_loc):
"""
Get a list of probe ids that are witihn a genomic region
:param chr_loc: Genomic location interval
:return: A list of probe ids.
"""
probes = self.get_probes_from_chr_loc(chr_loc)
return self.get_keys(probes)
[docs] def get_number(self):
"""
Get numbers of probes
:return: An integer representing the number of probes.
"""
number = 0
for probe_id in self.probe.iterkeys():
number += 1
return number
[docs] def get_coord(self, probe):
"""
Get genomic coordinate of a probe.
:param probe: A probe object
:return: An integer indicator of probe numbers
"""
return probe.cord
[docs] def sort_coord_probe(self, probes):
"""
This function sorts probes based on the probe genomic location. Best used in combination with plotting module.
:param probes: Input probe list.
:return: A sorted probe list.
"""
soreted_probes = sorted(probes, key=self.get_coord)
return soreted_probes
[docs]def get_probes(annotations, probes_ids):
"""
Get a list of probes from probe ids
:param annotations: Annotation object that has been initiated properly.
:param probes_ids: A list of probe ids.
:return: A list of probes.
"""
return annotations.get_probes(probes_ids)
[docs]def get_probes_from_feature(probes_ids, filter_val):
"""
This function returns filters probes based on a feature.
:param probes_ids: The probe ids that you would like to filter.
:param filter_val: Feature to be filtered.
:return: Returns a list of probes.
"""
probes = probes_ids
out_probes = []
if filter_val.feature_title == "Location":
out_probes = [probe for probe in probes if filter_val.feature in probe.loc]
print("Location")
return out_probes
elif filter_val.feature_title == "cpg_loc":
print("CPG")
out_probes = [probe for probe in probes if filter_val.feature in probe.tour]
return out_probes
elif filter_val.feature_title == "gene":
out_probes = [probe for probe in probes if filter_val.feature in probe.gene]
return out_probes
elif filter_val is None:
return None