Compare two text files
From Augix' Wiki
#!/usr/bin/perl chdir "/picb/compbio5/augix/cDNA/soap/test"; open FA, "/picb/compbio5/augix/cDNA/soap/test/hg18.fa" or die $!; while (<FA>) { if (/^>/) { /^>(.*)/; $chr = $1; push @chr, $chr; print $chr."\n"; } else { chomp; $fa{$chr} .= $_; } } close FA; sub DNArevcomp { my $dna = $_[0]; my $revcomp = reverse($dna); $revcomp =~ tr/ACGTacgt/TGCAtgca/; return $revcomp; } open OUTPUT, ">seq.hg18.fa" or die $!; $count=0; foreach $chr(@chr) { $l = length $fa{$chr}; $n = int ($l / 2800); print "chr: $chr\n length: $l\n n: $n\n"; foreach $i(1..$n) { $pos = int rand($l-36); $seq = substr $fa{$chr}, $pos, 36; next if grep /N/, $seq; if ( rand(2) >1 ) { $seq = DNArevcomp($seq); $strand = "-"; } else {$strand="+"} $pos ++; print OUTPUT ">$chr:$pos:$strand\n$seq\n"; $count++; print $count."\n"; exit 0 if $count == 1000000; # only produce 1000000 sequences } } close OUTPUT;
second one

