Finding a Protein Motif
Author: L. Grondin
http://rosalind.info/problems/mprt/
Sample input
A2Z669 B5ZC00 P07204_TRBM_HUMAN P20840_SAG1_YEAST
Sample output
B5ZC00 85 118 142 306 395 P07204_TRBM_HUMAN 47 115 116 382 409 P20840_SAG1_YEAST 79 109 135 248 306 348 364 402 485 501 614
Source code: mprt-grondilu.pl
use v6; my @default-data = qw{ A2Z669 B5ZC00 P07204_TRBM_HUMAN P20840_SAG1_YEAST }; sub MAIN($input-file = Nil) { my @input = $input-file ?? $input-file.IO.lines !! @default-data; my $N-glycosylation = rx / N <-[P]> <[ST]> <-[P]> /; my $base-path = $*PROGRAM-NAME.IO.dirname; for @input -> $id { my $fasta-name = $*SPEC.catdir($base-path, "$id.fasta"); my $fasta = $fasta-name.IO.e ?? $fasta-name.IO.slurp !! qqx{wget -O - -q "http://www.uniprot.org/uniprot/$id.fasta"}; given join '', grep /^ <.alpha>+ $/, $fasta.lines { when $N-glycosylation { say $id; my @arr = gather for m:overlap/$N-glycosylation/ { take .from + 1} say "{@arr}" } } } }