#!/usr/local/bin/perl -w

# repdb v. 7 Mar 1996, Dave Schweisguth <dcs@proton.chem.yale.edu>
# Interconverts various flavors of PDB format
# See http://www.pdb.bnl.gov/Format.doc/Format.Home.html

### Preliminaries

require 5.001;			    # Perl 5.000 required, 5.001m recommended
use strict;			    # Require optional-but-desirable practices

### Parameters

# Environment

(my $whatami = $0) =~ s|.*/||;	    # `basename $0`
my $isatty = -t STDIN;

# Configuration

my %formats = (
    'insight', {
	'insight', \&same,
	'midas', \&unknown,
	'xplor', \&insight2xplor,
    },
    'midas', {
	'insight', \&unknown,
	'midas', \&same,
	'xplor', \&unknown,
    },
    'xplor', {
	'insight', \&unknown,
	'midas', \&xplor2midas,
	'xplor', \&same,
    },
);
my $in = 'insight';		    # One of keys %formats
my $out = 'xplor';		    # One of keys %{$formats{$in}}

# Initialization (don't change these)

my(@pdb);

### Arguments and error-checking

# Parse args

my($arg, $sign, $first, $rest);
while (@ARGV and ($sign, $first, $rest) = ($ARGV[0] =~ /^([\-+])(.)(.*)/)) {
    if ($sign eq '+' && $first !~ /[\0]/) { # -/+ switches (none at the moment)
	&usage("$whatami: $sign$first is not an option.\n");
    }
    if ($first =~ /[io]/) {	# Switches with arguments
    	shift;
    	$arg = $rest ne '' ? $rest : @ARGV ? shift :
      	    &usage("$whatami: $sign$first requires an argument.\n");
    } elsif ($rest eq '') {
	shift;
    } else {
    	$ARGV[0] = "$sign$rest";
    }
    if	  ($first eq 'i') { $in = $arg; }
    elsif ($first eq 'o') { $out = $arg; }
    elsif ($first eq 'u') { &usage(0); }
    else		  { &usage("$whatami: $sign$first is not an option.\n"); }
}

sub usage {
    my($message) = $_[0];

    warn $message if $message;
    warn <<EOP;
Usage: $whatami [-iou] file [...]
-i xxx	Read input format xxx
-o xxx	Write output format xxx
-u	This message

Available input/output formats: insight midas xplor
Available conversions:
insight -> xplor
xplor -> midas
EOP
    exit !! $message;
}

# Post-process args

die "$whatami: Specify an input file or provide one on standard input.\n"
    if $isatty && ! @ARGV;

unless (grep(/^$in/i && ($in = $_), keys %formats)) {
    die "$whatami: Input format $in is not known.\n";
}

unless (grep(/^$out/i && ($out = $_), keys %{$formats{$in}})) {
    die "$whatami: Output format $out is not known.\n";
}

### Do it

if ($formats{$in}{$out} eq \&same) {    # Perfectly disgusting
    &same;
} elsif ($formats{$in}{$out}) {
    @pdb = <>;
    &{$formats{$in}{$out}};
    print @pdb;
} else {
    &unknown;
}

exit;

### Subroutines

# $whatami, $in, $out and $pdb are global

sub same {
    die "$whatami: Input format $in and output format $out are the same.\n";
}

sub unknown {
    die "$whatami: Don't know how to convert input format $in to output format $out.\n";
}

# Change atom names from Insight to Xplor (= PDB standard?)
#   (Insight considers column 17 (altLoc) part of the atom name)
# Change residue names from short to long
# Set segid (cols. 73-76) to "sgid" for easy global substitution in an editor

sub insight2xplor {

    # Each base amino has one proton which basepairs and one which doesn't.
    # In JPR's paramater files, the basepairing proton is H?2.
    # In Insight, the basepairing proton is H?2 for C and G and H61 for A.

    my %atoms = (
	"1H2  ",    " H21 ",
	"2H2  ",    " H22 ",
	"1H4  ",    " H41 ",
	"2H4  ",    " H42 ",
	"1H5' ",    " H5' ",
	"2H5' ",    "H5'' ",
	"1H6  ",    " H62 ",
	"2H6  ",    " H61 ",
	" HO2'",    " HO  ",
    );
    
    my %residues = (
	'  A',	'ADE',
	'  C',	'CYT',
	'  G',	'GUA',
	'  U',	'URI',
    );
    
    foreach (@pdb) {

	# Change atom names from Insight to Xplor (= PDB standard?)
	#   (Insight considers column 17 (altLoc) part of the atom name)
	# Change residue names from short to long

	if (/^(ATOM  .{6})(.{5})(.{3})(.*)/) {
	    $_ = $1 . ($atoms{$2} ? $atoms{$2} : $2) .
		($residues{$3} ? $residues{$3} : $3) . "$4\n";
	}
	
	# Set segid (cols. 73-76) to "sgid" for easy global substitution in an
	#   editor
	
	s/^(ATOM  .{66})    /${1}sgid/;
    }
}

# Use first character of segid for chain ID
# Give unnumbered remarks remark number 5 (unspecified format)

sub xplor2midas {

    foreach (@pdb) {

	# Use first character of segid for chain ID

	if (/^(ATOM  .{15})(.)(.{50})(.{4})(.*)/) {
	    if ($2 eq ' ') {
		$_ = $1 . substr($4, 0, 1) . "$3$4$5\n";
	    }
	}

	# Give unnumbered remarks remark number 5 (unspecified format)

	if (/^REMARK/) {
	    if (! /^REMARK (?:  | \d|\d\d)\d /) {
		s/^REMARK ?(.*)/REMARK   5 $1/;
	    }
	}

    }
}
