""" Functions for finding motifs and converting fasta formatted files to csv format using regular expressions Author: Tai Montgomery, 10/30/18 """ import re import sys def motif_finder(input_file, motif): """ Count all non-overlapping occurences of a sequence motif """ try: file_handle = open(input_file) except: print('file could not be opened') return -1 with file_handle: file = file_handle.read() file = re.sub('\n', '', file).upper() new_motif = motif.replace('N', '.') print('Motif matches: ' + str(len(re.findall(new_motif, file)))) def fasta_to_csv(input_file, output_file): """ Convert fasta file to csv """ try: infile = open(input_file) outfile = open(output_file, 'w') except: print('file could not be opened') return -1 with infile, outfile: file = infile.read() tab_separated = re.sub('>(.*)\n', r'\1,', file) outfile.write(tab_separated) if __name__ == '__main__': # For testing motif_finder, uncomment next two lines #input_file, motif = sys.argv[1:3] #input_file, motif, motif_finder(input_file, motif) # For testing fasta_to_csv, uncomment next two lines #input_file, output_file = sys.argv[1:3] #print(fasta_to_csv(input_file, output_file))