#!/usr/bin/perl -w # remediator.pl version 1.55 8/3/07 # Copyright 2007, Jeffrey J. Headd and Robert Immormino # revision 1.02 - JJH 070726 - fixed HN2/H2 N-terminus ambiguity # revision 1.52 - JJH 070727 - added KIN file support # revision 1.53 - JJH 070731 - updated hash tables to correct conversions # revision 1.54 - JJH 070731 - fixed KIN file support for new probe dot format # revision 1.55 - RI 070803 - added support for DU bases if(!$ARGV[0]){ &help; exit(0); } #----------------------------------------------------------------------- sub help{ print "\n******************************************************************* remediator.pl: version 1.55 8/3/07\nCopyright 2007, Jeffrey J. Headd and Robert Immormino LAST CIF DICTIONARY UPDATE: 070815 USAGE: remediator.pl [-options] input_file options: -h outputs this help message -pdb takes a .pdb formatted file as input (default) -kin takes a .kin kinemage formatted file as input (under development) -oldout output file will use the PDBv2.3 naming conventions -remediated output file will use the remediated naming conventions (default) remediator.pl is generally inteded to convert from PDBv2.3 to PDBv3.0. This changes files from the pre-wwPDB format into the wwPDB remediated format. Output is directed to standard out. EXAMPLES: remediator.pl 404D.pdb > 404Dr.pdb remediator.pl -kin 1BAB.kin > 1BABr.kin \n\n"; exit(0); } #----------------------------------------------------------------------- $oldout = 0; $remediated = 0; $dopdb = 0; $dokin = 0; $input_pdb_flag = 0; $input_kin_flag = 0; $input=NULL; for ($i=0; $i < scalar(@ARGV); $i++) { if(substr($ARGV[$i],0,1) eq "-"){ if ($ARGV[$i] eq "-h"){ &help; exit(0); } elsif ($ARGV[$i] eq "-oldout") { $oldout = 1; } elsif ($ARGV[$i] eq "-remediated") { $remediated = 1; } elsif ($ARGV[$i] eq "-pdb") { $dopdb = 1; } elsif ($ARGV[$i] eq "-kin") { $dokin = 1; } else { print STDERR "Unrecognized flag ".$ARGV[$i]." IGNORED\n"; } } elsif ($ARGV[$i] =~ m/.pdb/){ if($input_pdb_flag==0 && $input_kin_flag==0){ $input=$ARGV[$i]; $input_pdb_flag=1; } else{ print STDERR "Too many options!! Exiting...\n"; &help; exit(0); } } elsif ($ARGV[$i] =~ m/.kin/){ if($input_pdb_flag==0 && $input_kin_flag==0){ $input=$ARGV[$i]; $input_kin_flag=1; } else{ print STDERR "Too many options!! Exiting...\n"; &help; exit(0); } } else{ if($input_pdb_flag==1 || $input_kin_flag==1){ print STDERR "Too many options!! Exiting...\n"; &help; exit(0); } elsif($dokin == 1){ print STDERR "Unrecognized input file extension!! Assuming .kin file...\n"; $input=$ARGV[0]; $input_kin_flag=1; } else{ print STDERR "Unrecognized input file extension!! Assuming .pdb file...\n"; $input=$ARGV[0]; $input_pdb_flag=1; } } } if($input eq NULL){ print STDERR "User must specify input file!! Exiting...\n"; &help; exit(0); } if($dopdb == 0 && $dokin == 0){ $dopdb=1; } if($dopdb == 1 && $input_pdb_flag == 0){ print STDERR "Wrong file type for PDB mode!! Exiting...\n"; &help; exit(0); } elsif($dokin == 1 && $input_kin_flag == 0){ print STDERR "Wrong file type for KIN mode!! Exiting...\n"; &help; exit(0); } if (!defined $input){ print "\n Sorry, input file was not found!! Exiting...\n"; &help; exit(0); } if($remediated == 0 && $oldout == 0){ $remediated = 1; } if($remediated == 1){ &build_old2new; } elsif($oldout == 1){ &build_new2old; } open IN, "$input" || die "Could not open input file!! Exiting...\n"; if($dopdb == 1){ &pdb_mod; } elsif($dokin == 1){ &kin_mod; } close IN; #--pdb_mod sub-routine------------------------------------------------------------------------------------- sub pdb_mod{ $previous = "NULL"; $current = "NULL"; $print_line = ""; while ($line=){ chomp($line); if (substr($line, 0,6) eq "ATOM " || substr($line, 0,6) eq "HETATM" || substr($line, 0,6) eq "TER " || substr($line, 0,6) eq "ANISOU" || substr($line, 0,6) eq "SIGATM" || substr($line, 0,6) eq "SIGUIJ" || substr($line, 0,6) eq "LINK "){ $entry=substr($line,12,8); $previous=$current; $current=substr($line,18,8); $clean_entry = substr($entry,0,4)." ".substr($entry,5,3); if($resn_exch{$clean_entry}){ substr($line,12,4) = substr($resn_exch{$clean_entry},0,4); substr($line,17,3) = substr($resn_exch{$clean_entry},5,3); } } if($previous eq "NULL"){ $previous = $current; } if($current eq $previous){ $print_line .= $line."\n"; } if($current ne $previous){ if($print_line =~ m/(.\S..) .[A,C,T,G,I,U] /){ if($print_line !~ m/O2[\',\*] ./){ $DNA_base = substr($previous,1,1); if($remediated == 1){ $print_line =~ s/(.\S..) $DNA_base /$1 D$DNA_base /g; $print_line =~ s/(TER.{15}) $DNA_base /$1D$DNA_base /g; } elsif($oldout == 1){ $print_line =~ s/(.\S..) D$DNA_base /$1 $DNA_base /g; $print_line =~ s/(TER.{15})D$DNA_base /$1 $DNA_base /g; } } } if($oldout == 1){ if($print_line =~ m/ HN2 (ALA|ARG|ASN|ASP|ASX|CSE|CYS|GLN|GLU|GLX|GLY|HIS|ILE|LEU|LYS|MET|MSE|PHE|PRO|SER|THR|TRP|UNK|TYR|VAL)/g){ $res=$1; if($print_line =~ m/1H $res/ || $print_line =~ m/3H $res/){ $print_line =~ s/ HN2 (ALA|ARG|ASN|ASP|ASX|CSE|CYS|GLN|GLU|GLX|GLY|HIS|ILE|LEU|LYS|MET|MSE|PHE|PRO|SER|THR|TRP|UNK|TYR|VAL)/2H $1/g; } } } print $print_line; $print_line=$line."\n"; } } if($print_line =~ m/(.\S..) [A,C,T,G,I,U] /){ if($print_line !~ m/O2[\',\*] ./){ $DNA_base = substr($previous,1,1); if($remediated == 1){ $print_line =~ s/(.\S..) $DNA_base /$1 D$DNA_base /g; $print_line =~ s/(TER.{15}) $DNA_base /$1D$DNA_base /g; } elsif($oldout == 1){ $print_line =~ s/(.\S..) D$DNA_base /$1 $DNA_base /g; $print_line =~ s/(TER.{15})D$DNA_base /$1 $DNA_base /g; } } if($oldout == 1){ if($print_line =~ m/ HN2 (ALA|ARG|ASN|ASP|ASX|CSE|CYS|GLN|GLU|GLX|GLY|HIS|ILE|LEU|LYS|MET|MSE|PHE|PRO|SER|THR|TRP|UNK|TYR|VAL)/g){ $res=$1; if($print_line =~ m/1H $res/ || $print_line =~ m/3H $res/){ $print_line =~ s/ HN2 (ALA|ARG|ASN|ASP|ASX|CSE|CYS|GLN|GLU|GLX|GLY|HIS|ILE|LEU|LYS|MET|MSE|PHE|PRO|SER|THR|TRP|UNK|TYR|VAL)/2H $1/g; } } } } print $print_line; } #---------------------------------------------------------------------------------------------------------- #--kin_mod sub-routine------------------------------------------------------------------------------------- sub kin_mod{ %RNA_ID=(); #pre-screen for RNA vs. DNA for -remediated if($remediated == 1){ while($line=){ chomp($line); my(@records) = $line =~ m/\{.{8,}?\}/g; for($i=0;$i){ chomp($line); my(@records) = $line =~ m/\{.{8,}?\}/g; for($i=0;$i