Repeatedly update hashtables and k-nucleotide strings

Author: Daniel Carrera

http://benchmarksgame.alioth.debian.org/u32/performance.php?test=knucleotide

Based on the submission for Perl 5.

USAGE: perl6 k-nucleotide.p6 k-nucleotide.input

Expected output

A 30.298
T 30.157
C 19.793
G 19.752

AA 9.177
TA 9.137
AT 9.136
TT 9.094
AC 6.000
CA 5.999
GA 5.986
AG 5.985
TC 5.970
CT 5.970
GT 5.957
TG 5.956
CC 3.915
CG 3.910
GC 3.908
GG 3.902

14717GGT
4463GGTA
472GGTATT
902GGTATTTTAATT
902GGTATTTTAATTGGTATTTTAATTTATAGT

Source code: k-nucleotide.p6

use v6;

sub MAIN($input-file = $*SPEC.catdir($*PROGRAM-NAME.IO.dirname, "k-nucleotide.input")) {
    my $fh = open $input-file.IO;

    # Read FASTA file and extract DNA sequence THREE.
    my $sequence = '';
    my $lines;

    while $fh.get -> $line {
        last if ($line.substr(0,6) eq '>THREE');
    }
    while $fh.get -> $line {
        last if ($line.substr(0,1) eq '>');
        $sequence = $sequence ~ lc $line.subst(/\n/,'');
    }

    # Count nucleotide sequences
    my $len = $sequence.chars;
    my (@keys,%table,$sum,$frame_size);
    for 1..2 -> $frame_size {
        %table = ();
        update_hash($frame_size);

        # Print.
        $sum = $len - $frame_size + 1;
        for %table.sort: {$^b.value <=> $^a.value||$^a.key leg $^b.key} {
            printf "%s %.3f\n", .key, .value*100/$sum;
        }
        print "\n";
    }

    for <ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt> -> $seq {
        %table = ();
        update_hash($seq.chars);
        printf "%3d    $seq\n", (%table{$seq} || 0);
    }

    # Procedure to update a hashtable of k-nucleotide keys and count values
    # for a particular reading-frame.
    sub update_hash($frame_size) {
        for 0..($len - $frame_size) -> $i {
            %table{$sequence.substr($i,$frame_size)}++;
        }
    }
}