#!/usr/bin/perl # One code to find them all -- perl utility ot extract information from RepeatMasker output files # Copyright (C) 2014 Bailly-Bechet Marc # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ### This script sums all *copynumber* files ### from a previous run of one_code_to_find_them_all.pl ############### SCRIPT USAGE ##################### ### To use, simply run: ### ### ./sum_copynumber.pl --dir directory_name ### ### where directory_name is the name of a directory ### containing multiple *copynumber.csv files ### (the script is recursive) ### The sum of all these files will be printed in standard output ### ### To redirect this output in a file filename do: ### ### ./sum_copynumber.pl --dir directory_name > filename ### ################################################### use FileHandle; use Getopt::Long; use File::Find; use File::Basename; GetOptions('dir=s' => \$dir); unless(-d $dir){die("Dying! Option --dir must be given a directory argument\n")} %mem=(); %smem=(); find(\&Wanted_Copynumber, $dir); sub Wanted_Copynumber{ if(/.*\.copynumber\.csv$/){ warn "Found copynumber file $_\n"; open COPY,"<$_" or die("Cannot open file $_; dying"); while(){ next if(/^Family/); chomp; @F=split; $name=$F[1]."||".$F[2]; if($F[0] =~ /^DNA/){ $group="DNA"; } if($F[0] =~ /^LINE/){ $group="LINE"; } if($F[0] =~ /^SINE/){ $group="SINE"; } if($F[0] =~ /^LTR/){ $group="LTR"; } for($i=3;$i<=6;$i++){ if($F[0] =~ /^\#/){ if($F[$i] eq "NA"){ $smem{$F[0]}[$i-3]+=0; }else{ $smem{$F[0]}[$i-3]+=$F[$i]; } }else{ if($F[$i] eq "NA"){ $mem{$group}{$F[0]}{$name}[$i-3]+=0; }else{ $mem{$group}{$F[0]}{$name}[$i-3]+=$F[$i]; } } } } } } foreach $g ("DNA","LINE","SINE","LTR"){ foreach $k (keys %{$mem{$g}}){ foreach $elem (keys %{$mem{$g}{$k}}){ @G= split /\|\|/,$elem; print "$k\t$G[0]\t$G[1]"; for($i=0;$i<=3;$i++){ print "\t$mem{$g}{$k}{$elem}[$i]"; } print "\n"; } $global="###".$k; print "$global\tAll_elements\tNA"; for($i=0;$i<=3;$i++){ print "\t$smem{$global}[$i]"; } print "\n" } $global="######Type:".$g; print "$global\tAll_elements\tNA"; for($i=0;$i<=3;$i++){ print "\t$smem{$global}[$i]"; } print "\n"; } foreach $other ("#########Type:EVERYTHING_TE", "######Type:Low_complexity", "######Type:Satellite", "######Type:Simple_repeat", "######Type:Unknown"){ print "$other\tAll_elements\tNA"; for($i=0;$i<=3;$i++){ print "\t$smem{$other}[$i]"; } print "\n"; }