#!/usr/bin/python # remediator_python.py - version 1.55 8/15/07 # Copyright 2007, Jeffrey J. Headd and Robert Immormino # revision 1.55 - JJH 070808 - added support for DU DNA base # - JJH 070808 - added compiled RE object for HN2 RES special case # - JJH 070815 - updated name of hash dictionary file import sys import getopt import os import string import re def usage(): print """ ************************************ remediator_python.py: version 1.55 8/8/07 Copyright 2007, Jeffrey J. Headd and Robert Immormino USAGE: remediator.pl [--options] input_file options: --help outputs this help message --pdb takes a .pdb formatted file as input --old output file will use the PDBv2.3 naming conventions --remediated output file will use the remediated naming conventions (default) remediator.pl is generally inteded to convert from PDBv2.3 to PDBv3.0. This changes files from the pre-wwPDB format into the wwPDB remediated format. Output is directed to standard out. EXAMPLE: remediator.pl 404D.pdb > 404Dr.pdb """ try: opts, args = getopt.getopt( sys.argv[1:], 'hpor',['help', 'pdb', 'old', 'remediated'] ) except getopt.GetoptError: usage() sys.exit() old_out = False remediated_out = False dopdb = False #dokin = False for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-p", "--pdb"): dopdb = True #if o in ("-k", "--kin"): # dokin = True if o in ("-o", "--old"): old_out = True if o in ("-r", "--remediated"): remediated_out = True if len(args) < 1: sys.stderr.write("\n**REMEDIATOR ERROR: User must specify input filename\n") sys.exit(usage()) if len(args) > 1: sys.stderr.write("\n**REMEDIATOR ERROR: too many input files specified\n") sys.exit(usage()) #if dopdb == True and dokin == True: # usage() # sys.exit("REMEDIATOR ERROR: specify only one input file type") if old_out == True and remediated_out == True: sys.stderr.write("\n**REMEDIATOR ERROR: cannot output old and remediated names simultaneously\n") sys.exit(usage()) if dopdb == False: #print "REMEDIATOR: Assuming PDB input file" dopdb = True if old_out == False and remediated_out == False: remediated_out = True filename = args[0] assert os.path.isfile(filename),\ "\n**REMEDIATOR ERROR: cannot find %s" %(filename) basename = os.path.basename(filename) #--Build Hash Table------------------------------------------------ atom_exch = {} f = open("master_hash.txt") if remediated_out == True: #converting to remediated for line in f: line=line.rstrip() new, old = line.split(':') atom_exch[old] = new else: #converting to old for line in f: new, old = line.split(':') atom_exch[new] = old f.close() #------------------------------------------------------------------ #----PDB routine--------------------------------------------------- previous = None; current = None; print_line = ""; pdb_file = open(filename) aa_re = re.compile(' HN2 (ALA|ARG|ASN|ASP|ASX|CSE|CYS|GLN|GLU|GLX|GLY|HIS|ILE|LEU|LYS|MET|MSE|PHE|PRO|SER|THR|TRP|UNK|TYR|VAL)') for line in pdb_file: line=line.rstrip() type_test = line[0:6] if type_test in ("ATOM ", "HETATM", "TER ", "ANISOU", "SIGATM", "SIGUIJ", "LINK "): entry = line[12:20] previous = current current = line[18:26] clean_entry = entry[0:4] + " " + entry[5:8] if atom_exch.has_key(clean_entry): line = string.replace(line,clean_entry[0:4],atom_exch[clean_entry][0:4]) if previous == None: previous = current if previous == current: print_line += line + "\n" elif previous != current: if re.search(r'.\S.. .[A|C|T|G|I|U]',print_line): if re.search(r'O2[\'|\*] .',print_line) == None: DNA_base = previous[1] if remediated_out == True: print_line = re.sub(r'(.\S..) '+DNA_base+' ',r'\g<1> D'+DNA_base+' ',print_line) print_line = re.sub(r'(TER.{15}) '+DNA_base+' ',r'\g<1>D'+DNA_base+' ',print_line) elif old_out == True: print_line = re.sub(r'(.\S..) D'+DNA_base+' ',r'\g<1> '+DNA_base+' ',print_line) print_line = re.sub(r'(TER.{15})D'+DNA_base+' ',r'\g<1> '+DNA_base+' ',print_line) if old_out == True: m = aa_re.search(print_line) if m: res = m.group(1) if re.search('1H '+res,print_line) or re.search('2H '+res,print_line): print_line = re.sub(' HN2 '+res,'2H '+res,print_line) print_line=print_line.rstrip() print print_line print_line = line + "\n" pdb_file.close() if re.search(r'.\S.. .[A|C|T|G|I|U]',print_line): if re.search(r'O2[\'|\*] .',print_line) == None: DNA_base = previous[1] if remediated_out == True: print_line = re.sub(r'(.\S..) '+DNA_base,r'\g<1> D'+DNA_base,print_line) print_line = re.sub(r'(TER.{15}) '+DNA_base+' ',r'\g<1>D'+DNA_base+' ',print_line) elif old_out == True: print_line = re.sub(r'(.\S..) D'+DNA_base,r'\g<1> '+DNA_base,print_line) print_line = re.sub(r'(TER.{15})D'+DNA_base+' ',r'\g<1> '+DNA_base+' ',print_line) if old_out == True: m = aa_re.search(print_line) if m: res = m.group(1) if re.search('1H '+res,print_line) or re.search('2H '+res,print_line): print_line = re.sub(' HN2 '+res,'2H '+res,print_line) print_line=print_line.rstrip() print print_line