Skip to content

Commit

Permalink
Minor
Browse files Browse the repository at this point in the history
  • Loading branch information
miriabernardino committed Oct 12, 2022
1 parent 3269a97 commit b73e6c1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 22 deletions.
2 changes: 1 addition & 1 deletion my_pseknc.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def encode_into_kmers(self, seq):

# loop over the sequence grabbing all 3-mers
for l in range(len(seq) - 2):
kmer = seq[l: l + 3]
kmer = seq[l: l + 3].upper()
try:
# searches oligonucs for the specific kmer and return its index in that array
index = self.oligonucs.index(kmer)
Expand Down
41 changes: 20 additions & 21 deletions pc3mer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class Pc3mer(Pseknc):
def __init__(self, classes=None, binary=True, folder_for_output=None,
file_name_prefix=""):
file_name_prefix="", L=58):
"""
:param classes:
Expand All @@ -20,7 +20,7 @@ def __init__(self, classes=None, binary=True, folder_for_output=None,
if binary and classes:
assert len(classes) == 2

super().__init__(binary=binary)
super().__init__(binary=binary, L=L)

self.name = 'pc3mer'
self.folder_for_output = folder_for_output
Expand Down Expand Up @@ -133,7 +133,7 @@ def encode_fasta_file(self, input_file_path, output_path=None, folder_for_output
"""

print(f"Encoding {input_file_path} into pc3mer...")
file_in = Pc3mer.read_fasta_file(input_file_path)
file_in = Pc3mer.read_fasta_file(input_file_path, L=L)

# path to save outputs
if folder_for_output is None:
Expand Down Expand Up @@ -228,31 +228,30 @@ def encode_fasta_file(self, input_file_path, output_path=None, folder_for_output
return feature_vector

@staticmethod
def read_fasta_file(input_file_path):
def read_fasta_file(input_file_path, L=58):
"""
Reads the file and verify if it is a valid fasta format file before
returning it as a list of lines
:param input_file_path:
:return:
"""
if not os.path.isfile(input_file_path):
print("The input fasta file {} does not exist.".format(input_file_path))
assert False
assert os.path.isfile(input_file_path), "The input fasta file {} does not exist.".format(input_file_path)

with open(input_file_path, "r") as fid:
file_in = fid.readlines()

line0 = file_in[0]
line1 = file_in[1][:-1]
fid.close()
if not(line0.startswith(">")) or len(line1) != L:
print(
f"The file at {input_file_path} is in the wrong format. Please, change it into fasta format with"+\
f" sequences of {L}bp"
)
assert False, f"The file at {input_file_path} is in wrong format. Please, change it into fasta format "+\
f"with sequences with {L}bp"
else:
with open(input_file_path, "r") as fid:
file_in = fid.readlines()

line0 = file_in[0]
line1 = file_in[1][:-1]
fid.close()
if not(line0.startswith(">")) or len(line1) != 58:
print(
"The file at {} is in wrong format. Please, change it into fasta format with sequences with 58bp".
format(input_file_path)
)
assert False
else:
return file_in
return file_in

def convert_fasta_file_to_pc3mer_stats(self, input_file_path, folder_for_output=None):
"""
Expand Down

0 comments on commit b73e6c1

Please sign in to comment.