Compare two text files

From Augix' Wiki

Jump to: navigation, search
#!/usr/bin/perl
chdir "/picb/compbio5/augix/cDNA/soap/test";
open FA, "/picb/compbio5/augix/cDNA/soap/test/hg18.fa" or die $!;
while (<FA>) {
  if (/^>/) {
    /^>(.*)/;
    $chr = $1;
    push @chr, $chr;
    print $chr."\n";
  }
  else {
    chomp;
    $fa{$chr} .= $_;
  }
 
}
close FA;
 
sub DNArevcomp {
  my $dna = $_[0];
  my $revcomp = reverse($dna);
  $revcomp =~ tr/ACGTacgt/TGCAtgca/;
  return $revcomp;
}
 
 
open OUTPUT, ">seq.hg18.fa" or die $!;
$count=0;
foreach $chr(@chr) {
  $l = length $fa{$chr};
  $n = int ($l / 2800);
  print "chr: $chr\n
         length: $l\n
         n: $n\n";
  foreach $i(1..$n) {
    $pos = int rand($l-36);
    $seq = substr $fa{$chr}, $pos, 36;
    next if grep /N/, $seq;
    if ( rand(2) >1 ) {
      $seq = DNArevcomp($seq);
      $strand = "-";
    }
    else {$strand="+"}
    $pos ++;
    print OUTPUT ">$chr:$pos:$strand\n$seq\n";
    $count++;
    print $count."\n";
    exit 0 if $count == 1000000;   # only produce 1000000 sequences
  }
}
 
close OUTPUT;

second one

Personal tools