#!/usr/local/bin/perl -w =pod cpd2xplor v. 20 May 1996, Dave Schweisguth Converts a cpd to a list of distance constraints in Xplor format, filtering out uninformative peaks and translating intensity scores to distance ranges Caveats: Filtering is hardcoded Handles two-dimensional data only To do: Do more sanity checking on NOE buildup (see comments below) Use sort_as_spin to decide whether to swap spins when building %constraints Add peak numbers, assignments, shifts (will need to precalculate these), etc. to error messages Check that crosspeaks - on opposite sides of the diagonal - at different mixing times have the same chemical shifts. On the other hand, if check-cpd could cross-check chemical shifts in ssds and cpds it would prevent these errors indirectly. =cut ### Preliminaries require 5.002; # Perl 5.002 required use strict; # Require optional-but-desirable practices use vars qw($whatami); # Exempt globals from 'use strict' use Churn qw(partner retrieve_cpd retrieve_ssd %spins); ### Parameters # Environment ($whatami = $0) =~ s|.*/||; # `basename $0` my $isatty = -t STDIN; # Configuration my $nss5 = 1; my $filter = 0; my $Filter = 1; my $verbose = 0; my %distance = ( # Distance ranges for intensity scores short => '2.4 0.6 0.6', medium => '3 1 1', long => '4 1 1', exch => '3.8 1 1', ); my %spin_class = ( # Spin classes for sorting "h1" => 0, "h3" => 0, "h2" => 1, "h41" => 1, "h42" => 1, "h6" => 1, "h8" => 1, "h1'" => 2, "h5" => 2, "h2'" => 3, "h3'" => 3, "h4'" => 3, "h5'" => 3, "h5''" => 3 ); # Initialization (don't change these) $| = 1; # Interleave STDOUT and STDERR properly my($ssdfile, $ssd, @infiles, $infile, $cpd, $peak, $dim, @asg, $badasg, @type, @res, @spin, $partner, $swap, %constraints, $constraint, $i, $resi, $spini, $j, $resj, $spinj, @earlier, @later, $infile2, @scores, @intensity, $distance); ### Arguments and error-checking # Parse args my($arg, $sign, $first, $rest); while (@ARGV and ($sign, $first, $rest) = ($ARGV[0] =~ /^([\-+])(.)(.*)/)) { if ($sign eq '+' && $first !~ /[5fFqv]/) { # -/+ switches &usage("$sign$first is not an option.\n"); } if ($first =~ /[\0]/) { # Switches with arguments (none at the moment) shift; $arg = $rest ne '' ? $rest : @ARGV ? shift : &usage("$sign$first requires an argument.\n"); } elsif ($rest eq '') { shift; } else { $ARGV[0] = "$sign$rest"; } if ($first eq '5') { $nss5 = $sign eq '-' ? 1 : 0; } elsif ($first eq 'f') { $filter = $sign eq '-' ? 1 : 0; } elsif ($first eq 'F') { $Filter = $sign eq '-' ? 1 : 0; } elsif ($first eq 'q') { $verbose -= $sign eq '-' ? 1 : -1; } elsif ($first eq 'v') { $verbose += $sign eq '-' ? 1 : -1; } elsif ($first eq 'u') { &usage(0); } else { &usage("$sign$first is not an option.\n"); } } sub usage { warn $_[0] ? "$whatami: $_[0]" : '', < 1; next PEAK; } } # Deconstruct assignment name ($type[$dim], $res[$dim], $spin[$dim]) = $asg[$dim] =~ /^(.)(\d+)(.*)/; } # Filter out uninformative constraints if ($filter && ( # n sugar - n sugar ( $res[0] == $res[1] && $spin[0] =~ /^h[1-5]''?$/ && $spin[1] =~ /^h[1-5]''?$/ ) || # n 6/8 - n sugar (except 5'/5'') ( $res[0] == $res[1] && ( $spin[0] =~ /^h[68]$/ && $spin[1] =~ /^h[1-4]'$/ || $spin[1] =~ /^h[68]$/ && $spin[0] =~ /^h[1-4]'$/ ) ) # New uninformative filter conditions go here )) { warn "$whatami: In $infiles[$infile], $asg[0]-$asg[1] is uninformative; filtering.\n" if $verbose > 1; next; } if ($Filter && ( # (g)n imino - (c)n' h41, h42 ( defined($partner = &partner($ssd, $res[0])) && $partner == $res[1] && ($spin[0] =~ /^h4[12]$/ || $spin[1] =~ /^h4[12]$/) ) || # (g)n imino - (c)n' h5 ( defined($partner = &partner($ssd, $res[0])) && $partner == $res[1] && ( $type[0] eq 'c' && $spin[0] eq 'h5' || $type[1] eq 'c' && $spin[1] eq 'h5' ) ) || # (g)n imino - n+1 anomeric ( $res[0] + 1 == $res[1] && $spin[0] eq 'h1' && $spin[1] eq "h1'" || $res[1] + 1 == $res[0] && $spin[1] eq 'h1' && $spin[0] eq "h1'" ) || # (g)n imino - n'+1 anomeric ( defined($partner = &partner($ssd, $res[0])) && $partner + 1 == $res[1] && $spin[0] eq 'h1' && $spin[1] eq "h1'" || defined($partner = &partner($ssd, $res[1])) && $partner + 1 == $res[0] && $spin[1] eq 'h1' && $spin[0] eq "h1'" ) # New exchange filter conditions go here )) { warn "$whatami: In $infiles[$infile], $asg[0]-$asg[1] comes from exchange; filtering.\n" if $verbose > 1; next; } # If $asg[1] should come before $asg[1] in sort order, swap them. # (This is the same algorithm as sort_as_spin; they should be merged.) # Note for later whether or not we swapped. if ( ( $res[0] <=> $res[1] || $spin_class{$spin[0]} <=> $spin_class{$spin[1]} || $spin[0] cmp $spin[1] ) == 1 ) { @asg = @asg[1, 0]; $swap = 1; } else { $swap = 0; } $constraints{$asg[0]}{$asg[1]}[$infile][$swap] = $peak; } } foreach $i (&sort_as_spin(keys %constraints)) { ($resi, $spini) = ($i =~ /^.(\d+)(.*)/); foreach $j (&sort_as_spin(keys %{$constraints{$i}})) { ($resj, $spinj) = ($j =~ /^.(\d+)(.*)/); $constraint = $constraints{$i}{$j}; # Deref for brevity foreach $infile (0 .. $#infiles) { if (! defined $$constraint[$infile]) { # Deal with missing peak. @earlier = (); @later = (); foreach $infile2 (0 .. $#infiles) { if (defined $$constraint[$infile2]) { push(@earlier, $infile2) if $infile2 < $infile; push(@later, $infile2) if $infile2 > $infile; } } if (@earlier) { # A peak present at a shorter mixing time should always be # present at a longer mixing time. warn "$whatami: $i-$j and/or $j-$i is in @infiles[@earlier], but neither is in $infiles[$infile]!\n" if $verbose > -1; } if (@later) { # A peak present at a longer mixing time may well not be # present at a shorter mixing time. warn "$whatami: Neither $i-$j nor $j-$i is in $infiles[$infile], but one or both is in @infiles[@later].\n" if $verbose > 0; } $intensity[$infile] = 'a'; } else { # Merge unswapped and swapped intensity scores into @intensity @scores = ( # Deref for brevity $$constraint[$infile][0]{intensity}, $$constraint[$infile][1]{intensity} ); if (! defined $scores[0]) { warn "$whatami: In $infiles[$infile], $j-$i is defined but $i-$j is not.\n" if $verbose > 0; $intensity[$infile] = $scores[1]; } elsif (! defined $scores[1]) { warn "$whatami: In $infiles[$infile], $i-$j is defined but $j-$i is not.\n" if $verbose > 0; $intensity[$infile] = $scores[0]; } else { if ( $scores[0] eq 'e' && $scores[1] =~ /^smw$/ || $scores[0] =~ /^smw$/ && $scores[1] eq 'e' ) { warn "$whatami: In $infiles[$infile], $i-$j has an intensity score of 'e' in one quadrant and 's', 'm' or 'w' in the other. Skipping ...\n"; $intensity[$infile] = 'a'; } elsif ($scores[0] eq 'e' || $scores[1] eq 'e') { $intensity[$infile] = 'e'; } else { $intensity[$infile] = &max_intensity(@scores); } } if ($intensity[$infile] eq 'a') { warn "$whatami: In $infiles[$infile], $i-$j is defined to be absent.\n" if $verbose > 1; } } } # Merge @intensity into $distance. The algorithm is AAFS': # # - If a peak involves an exchangeable proton, the corresponding # distance is "exch" no matter what the mixing time. # - If a nonexchangeable peak is strong or medium at the first mixing # time, the corresponding distance is "short". # - If a nonexchangeable peak is strong or medium at the second mixing # time, the corresponding distance is "medium". # - If a nonexchangeable peak is a) weak at the first mixing time and # not strong or medium at the second, b) weak at the second mixing # time or c) present at any later mixing time, the corresponding # distance is "long". # # Note that # - peaks which are strong or medium at the first mixing time and # weak or absent at the second don't cause a warning # - peaks which are exchangeable at one mixing time and # nonexchangeable at another don't cause a warning $distance = $intensity[0] eq 'e' ? 'exch' : $intensity[0] =~ /^[sm]$/ ? 'short' : @infiles == 1 ? '' : $intensity[1] eq 'e' ? 'exch' : $intensity[1] =~ /^[sm]$/ ? 'medium' : ''; if ($distance eq '') { foreach $infile (0 .. $#infiles) { $distance = $intensity[$infile] eq 'e' ? 'exch' : $intensity[$infile] =~ /^[smw]$/ ? 'long' : ''; } } # If distance is set, tweak and print constraint. If not, complain. if ($distance) { $distance = $distance{$distance}; # Sorry about that if ($nss5) { foreach ($spini, $spinj) { if (/^h5''?$/) { $distance =~ s/^(\S+)\s+(\S+)\s+(\S+)/sprintf("$1 %3.1f %3.1f", $2 + 1.78, $3 + 1.78)/e; } } } print "assign (resid $resi and name $spini) ", "(resid $resj and name $spinj) $distance\n"; } else { warn "$whatami: In all input files, $i-$j is absent or defined to be absent.\n" if $verbose > 1; } } } exit; ### Subroutines # Sort as spin names sub sort_as_spin { # Use the Schwartzian transform (from perlfunc(1)'s entry on sort) for # speed. It's only twice as fast as a simple sort in my case (21 residues) # but it's ten times as much fun. Read it from the bottom up. map { $_->[0] } sort { # Numerically by residue number || numerically by spin class || # lexically by spin name $$a[1] <=> $$b[1] || $spin_class{$$a[2]} <=> $spin_class{$$b[2]} || $$a[2] cmp $$b[2] } map { # Precalculate an array of arrays of $_, residue number, spin name [ $_, $_ =~ /^.(\d+)(.*)/ ] } @_; } # Return the greater of two strong/medium/weak/absent intensity scores sub max_intensity { foreach (qw(s m w a)) { if ($_[0] eq $_ || $_[1] eq $_) { return $_ }; } }