Solution to challenge #1

seq = open("SingleSeq.fasta")

for line in seq:
    print line

Back to the lesson


Solution to challenge #2

seq = open("SingleSeq.fasta")
seq_2 = open("SingleSeq-2.fasta","w")

for line in seq:
    seq_2.write(line)

seq_2.close()

Back to the lesson


Solution to challenge #3

A number of possible solutions

seq = open("SingleSeq.fasta")

for line in seq:
    if line[0] == '>':
        print line

seq = open("SingleSeq.fasta")

for line in seq:
    if line[0] == '>':
        print line
seq = open("SingleSeq.fasta")

for line in seq:
    if line[0] != '>':
        print line
seq = open("SingleSeq.fasta")

for line in seq:
    if line[0] != '>':
        print line

Back to the lesson


Solution to challenge #4

fasta = open('SingleSeq.fasta')
header = open('header.txt', 'w’)

for line in fasta:
    if line[0] == '>':
        header.write(line)

header.close()

Back to the lesson


Solution to challenge #5

fasta = open('SingleSeq.fasta')
seq = open('seq.txt','w')

for line in fasta:
    if line[0] != '>':
        seq.write(line)

seq.close()

Back to the lesson


Solution to challenge #4 and #5 mergred

fasta = open('SingleSeq.fasta')
header = open('header.txt', 'w')
seq = open('seq.txt','w')

for line in fasta:
    if line[0] == '>':
        header.write(line)
    else:
        seq.write(line)

header.close()
seq.close()

Back to the lesson


Solution to challenge #6

seq_fasta = open("SingleSeq.fasta")

seq = ''

for line in seq_fasta:
    if line[0] == '>':
        header = line
    else:
        seq = seq + line.strip()

print header, seq

Back to the lesson


Solution to challenge #7

seq_fasta = open("SingleSeq.fasta")

seq = ''

for line in seq_fasta:
    if line[0] == '>':
        header = line
    else:
        seq = seq + line.strip()

num_cys = seq.count("C")

print header, seq, num_cys

Back to the lesson


Solution to challenge #8

seq_fasta = open("SingleSeq.fasta")

seq = ''
header = ''

for line in seq_fasta:
    if line[0] == '>':
        if "Homo sapiens" in line:
            header = line
    else:
        if header:
            seq = seq + line

if header:
    print header + seq
else:
    print "The record is not from H. sapiens"

Note the use of if header:

Apparently there is no statement after the condition

In Python empty objects in ‘if’ conditions are interpreted as False by default.

Therefore header here is treated as Boolean:

  • if it is empty it will be interpreted as False
  • once it is filled, it becomes True

Back to the lesson


Solution to challenge #9

fasta = open('SwissProt-Human.fasta')
headers = open('headers.txt', 'w')

for line in fasta:
    if line[0] == '>':
        headers.write(line)

headers.close()

>sp|P31946|1433B_HUMAN 14-3-3 protein beta/alpha OS=Homo sapiens
>sp|P62258|1433E_HUMAN 14-3-3 protein epsilon OS=Homo sapiens
>sp|Q04917|1433F_HUMAN 14-3-3 protein eta OS=Homo sapiens GN=YWHAH

Back to the lesson


Solution to challenge #10

fasta = open('SwissProt-Human.fasta')
seqs = open('seqs.txt', 'w')

for line in fasta:
    if line[0] == '>':
        seqs.write('\n')
    elif line[0] != '>':
        seqs.write(line)

seqs.close()


seqs.write(line.strip() + '\n')

Back to the lesson


Solution to challenge #11

human_fasta = open('SwissProt-Human.fasta')
Outfile = open('SwissProt-Human-AC.txt','w')

for line in human_fasta:
    if line[0] == '>':
        AC = line.split('|')[1]
        Outfile.write(AC + '\n')

Outfile.close()

Back to the lesson


Solution to challenge #12

One possible solution:

fasta = open('sprot_prot.fasta')

seq = ''

for line in fasta:
    if line[0] == '>' and seq == '':
        header = line[4:10]
    elif line[0] != '>':
        seq = seq + line.strip()
    elif line[0] == '>' and seq != '':
        cys_num = seq.count('C')
        print header, ': ', cys_num
        header = line[4:10]
        seq = ''

cys_num = seq.count('C')
print header, ': ', cys_num

another possible solution:

fasta = open('sprot_prot.fasta')

seq = ''

for line in fasta:
    if line[0]=='>':
        if seq:
            cys_num = seq.count('C')
            print header, ':' , cys_num
        header = line.split('|')[1]
        seq = ''
    else:
        seq = seq + line.strip()

Back to the lesson


Solution to challenge #13

One possible solution:

fasta = open('sprot_prot.fasta')
output = open('homo_sapiens.fasta', 'w')
seq = ''
for line in fasta:
    if line[0] == '>' and seq == '':
        header = line
    elif line[0] != '>':
        seq = seq + line
    elif line[0] == '>' and seq != '':
        if "Homo sapiens" in header:
              output.write(header + seq)
        header = line
        seq = ''   


if "Homo sapiens" in header:
  output.write(header + seq)

output.close()

another possible solution:

fasta = open('sprot_prot.fasta')
output = open('sprot_human.fasta', 'w')

seq = ''

for line in fasta:
  if line[0]=='>':
    if seq:
      if "Homo sapiens" in header:
        output.write(header + seq)
    header = line
    seq = ''
  else:
    seq = seq + line

output.close()

Back to the lesson


Solution II to challenge #14

fasta = open('SwissProtHuman.fasta','r')
outfile = open('SwissProtHuman-Filtered.fasta','w')

seq = ''

for line in fasta:
  if line[0:1] == '>' and seq == '':
    header = line
  elif line [0:1] != '>':
    seq = seq + line
  elif line[0:1] == '>' and seq != '':
    TRP_num = seq.count('W')
    if seq[0] == 'M' and TRP_num > 1:
      outfile.write(header + seq)
    seq = ''
    header = line

TRP_num = seq.count('W')
if seq[0] == 'M' and TRP_num > 1:
  outfile.write(header + seq)
outfile.close()

Back to the lesson


Solution II to challenge #15

InputFile = open("ap006852.gbk")
OutputFile = open("ap006852.fasta","w")
flag = 0

for line in InputFile:
  if line[0:9] == 'ACCESSION':
    AC = line.split()[1].strip()
    OutputFile.write('>'+AC+'\n')
  if line[0:6] == 'ORIGIN':
    flag = 1
    continue
  if flag == 1:
    fields = line.split()
    if fields != []:
      seq = ''.join(fields[1:])
      OutputFile.write(seq +'\n')

InputFile.close()
OutputFile.close()

Back to the lesson


Back

Back to main page.