# TEST SEQUENCE # aggGAACtATGcaTCtatTcaTAttTActacACGACTCGCgcacaggatAcctTaACGAttACcaaaAAAAAaaaa # # First erquest a sequence to be saved in a Variable, # and then make an Upper Case working copy # mRNA_Original=input("Enter the Sequence to be processed: ") mRNA_Upper = mRNA_Original.upper() # # Look for the ATG that is the Start of the CDS # Start_Position = mRNA_Upper.find("ATG") # # Trim off the 5 prime UTR and copy to a new Variable # CDS=mRNA_Upper[Start_Position:] # # Count the number of each type of Stop Codon # and print the results # TAA_Count = CDS.count('TAA') TAG_Count = CDS.count('TAG') TGA_Count = CDS.count('TGA') print("TAA_Count %4d\nTAG_Count %4d\nTGA_Count %4d\n" % (TAA_Count,TAG_Count,TGA_Count)) # # Find the Stop Codon Nearest the front # Use rfind even though find makes more sense! # # Oh dear! if no match is found, rfind returns -1 # That will mess thins up, so wee need to change any -1 to somwthing # big, such as the elength of the CDS # # I can see no way to do this without using a conditional statement ("if") # which we hae not covered yet! Ho hum, I expect you can cope # # More important to understand why leaving the -1 in place would mess things up! # CDS_Length = len(CDS) TAA_Pos = CDS.rfind('TAA') if (TAA_Pos == -1): TAA_Real_Pos = CDS_Length else: TAA_Real_Pos = TAA_Pos TAG_Pos = CDS.rfind('TAG') if (TAG_Pos == -1): TAG_Real_Pos = CDS_Length else: TAG_Real_Pos = TAG_Pos TGA_Pos = CDS.rfind('TGA') if (TGA_Pos == -1): TGA_Real_Pos = CDS_Length else: TGA_Real_Pos = TGA_Pos First_Stop = min(TAA_Real_Pos,TAG_Real_Pos,TGA_Real_Pos) print("First TAA is at: %4d\nFirst TAG is at: %4d\nFirst TGA is at: %4d\nFirst Stop is at: %4d" % (TAA_Pos,TAG_Pos,TGA_Pos,First_Stop)) # # Trim off the 3 prime UTR # ... CDS=CDS[:First_Stop] # # Check the CDS has no Frame Shift # print("CDS length is %4d\nCDS frame is %4d {if this is not zero, one is in trouble!}" % (len(CDS),len(CDS) % 3)) # # Display the resulting CDS # print("the CDS sequence is: %s" % (CDS))