import os
[docs]class ParseConfig:
def __init__(self):
self.cutoff = None
[docs]class Group:
"""
Create a new Group groups.
:param list_of_samples: A list of sample objects.
:return: A Group object.
"""
def __init__(self, list_of_samples):
self.samples = list_of_samples
[docs]class Sample:
"""
Sample data object.
Each *sample* has a name which is a string type and Probe methylation data which is a dictionary type.
:param name: Name of the sample.
:param probes: methylation data of the sample - in dict type.
:return: A Sample object.
"""
def __init__(self, name=None, probes=None):
self.name = name
self.probes = probes
[docs]class ParseBatch:
"""
Parse a series of data in a folder.
:param folder: A string that represent a folder.
:param delim: delimitation character used in the data file [default = tab].
:param avg_beta_header: A string that represents average beta values [default = .AVG_Beta].
:return: A ParseBatch object. Use get_samples() function to retrieve sample information.
"""
def __init__(self, folder, delim="\t", avg_beta_header=".AVG_Beta"):
self.folder = folder
self.samples = []
self.delim = delim
self.avg_header = avg_beta_header
for file in os.listdir(os.path.abspath(self.folder)):
if file.endswith(".txt"):
# Parse file.
parsed_file = ParseFile(os.path.abspath(os.path.join(self.folder, file)), delim=self.delim,
avg_beta_header=self.avg_header).get_samples()
self.samples.extend(parsed_file)
print("%d groups processed." % len(self.samples))
for i in self.samples:
print(i.name)
[docs] def get_samples(self):
"""
Return all sample objects created from all files
"""
return self.samples
[docs]class ParseFile:
"""
Parse a single file. The file could still have multiple groups. This module automatically finds and parses them.
:param filename: A string that represent a data file name.
:param delim: delimitation character used in the data file [default = tab].
:param avg_beta_header: A string that represents average beta values [default = .AVG_Beta].
:return: A ParseFile object. Use get_samples() function to retrieve sample information.
"""
def __init__(self, filename, delim="\t", avg_beta_header=".AVG_Beta"):
self.delim = delim
name_cols = []
avg_cols = []
beta_vals = []
self.samples = []
# Check weather the file exists.
if self.check_file(filename):
# Open the file and parse the content.
for line in open(filename, mode="r"):
# Parse a single file content.
if line.startswith("TargetID"):
cols = line.strip("\n").strip("\r").split(self.delim)
for i, col in enumerate(cols):
if col.endswith(avg_beta_header):
name_cols.append(col.strip(avg_beta_header))
avg_cols.append(i)
beta_vals = []
for i in avg_cols:
beta_vals.append({})
if line.startswith("cg"):
cols = line.strip("\n").strip("\r").split(self.delim)
for i, avg_col in enumerate(avg_cols):
average = cols[avg_col].strip()
if average is not None and average != "":
average = float(average)
beta_vals[i].update({cols[0]: average})
# Create Samples.
for i, betas in enumerate(beta_vals):
samples_file = Sample(name=name_cols[i], probes= betas)
self.samples.append(samples_file)
else:
print("Critical Error: File not found.")
[docs] def get_samples(self):
"""
Returns all groups in this file.
"""
if len(self.samples) != 0:
return self.samples
else:
return None
[docs] def check_file(self, filename):
"""
Check input filename
:param filename: A string that represents a data file.
:return: A boolean value.
"""
return os.path.isfile(os.path.abspath(filename))
[docs]def get_id_beta(sample):
"""
Get all beta values.
:return: return beta values of a sample.
"""
return sample.probes
[docs]def get_all_beta(sample):
"""
Get all beta values.
:return: A list of beta
"""
listx = []
for key in sample.probes.keys():
out = sample.probes[key]
listx.append(out)
return listx
[docs]def get_probe_avg(probe_id, samples, verbose=False):
"""
Get Probe AVG values.
:param probe_id: A list of probe ids.
:return: A list of avg beta values.
"""
beta_val = []
if verbose:
print("Probe id: %s" % probe_id)
print("Sample", "\t", "Beta Avg")
for i, sample in enumerate(samples):
beta_val.append(sample.probes[probe_id])
if verbose:
print(sample.name, "\t", beta_val)
return beta_val
[docs]def get_probes_avg(probe_id_list, sample):
"""
Get probe AVG beta values from a list of probes for all groups
:param probe_id_list: A list of probe ids.
:return: A list of beta values.
"""
out = []
for i in probe_id_list:
try:
out.append(sample.probes(i))
except Exception as ex:
pass
return out
[docs]def samples_to_bed(base_filename, probes, samples):
"""
Return a BED file representative of all groups for the provided probes.
:param base_filename: A base name for output file
:param probes: A list of probes objects.
:param samples: A list of groups to extract data.
:return: Static function - stores a file.
"""
for sample in samples:
probes_to_bed("%s-%s.bed" % (base_filename, sample.name), probes, sample)
[docs]def probes_to_bed(filename, probes, sample):
"""
Writes a BED file containing the probe beta info.
:param filename: A filename to be stored.
:param probes: A list of Probe info.
:param sample_no: The sample number to include in the BED file.
:return: Static function - stores a file.
"""
# lets parse some probe here.
out = open(filename, mode="w")
out.write('''track name="%s" description="Methylation" visibility=2 itemRgb="On" useScore=1\n''' % sample.name)
for probe in probes:
beta_val = None
try:
beta_val = float(sample.probes[probe.id])
except Exception as ex:
print(ex.args)
print("%s not found in %s." % (probe.id, sample.name))
continue
sign = None
if probe.strand == "F":
sign = "+"
else:
sign = "-"
r = int(beta_val * 255)
g = 0
b = 0
out_line = "chr%s\t%d\t%d\t%s\t%f\t%s\t%d\t%d\t%d,%d,%d\n" % (
probe.chr, probe.cord - 1, probe.cord + 1, probe.id, beta_val, sign, 0, 0, r, g, b)
out.write(out_line)
out.close()
print("%s successfully processed. " % filename)
[docs]def get_sample_by_no(samples, sample_no):
"""
Returns a sample by number [zero based].
:param sample_no: Sample number, a zero based integer.
:return: Return a sample object.
"""
return samples[sample_no]
[docs]def get_sample_by_name(samples, sample_name):
"""
Returns a sample by name.
:param sample_name: Sample name, a string.
:return: Return a sample object.
"""
selected_sample = None
for i in samples:
if i.name == sample_name:
selected_sample = i
break
return selected_sample
[docs]def get_all_sample_name(samples):
"""
Get all sample name.
:return: A list that contain sample names.
"""
sample_list = []
for i in samples:
sample_list.append(i)
return sample_list
[docs]def get_genes_from_probes(probe_list):
"""
Get gene names and number of probes associated with each gene.
:param probe_list: A list of probes.
:return: A dictionary of genes names and probes numbers.
"""
gene_dict = {}
for probe in probe_list:
if probe.gene in gene_dict:
gene_dict[probe.gene] += 1
else:
gene_dict.update({probe.gene: 1})
return gene_dict
[docs]def write_data(file_name, samples, probes):
"""
Export data to data table
:param samples: A list of groups.
:param probes: A list of probes.
:return: Writes a data file.
"""
output_file = open(file_name, mode="w")
output_file.write("Probe id\t")
# Header information
for sample in samples:
output_file.write("%s\t" % sample.name)
output_file.write("\n")
# probe methlation info
for probe in probes:
output_file.write("%s\t" % probe.id)
for sample in samples:
output_file.write("%s\t" % sample.probes[probe.id])
output_file.write("\n")
output_file.close()