#!/usr/local/bin/perl -w =pod check-cpd v. 16 Jul 1996, Dave Schweisguth Checks a cpd for errors: spins which are null (unassigned) which are ambi(guous) or junk * whose residue type and number is inconsistent with the specified ssd whose type is inconsistent with their residue type which have a chemical shift inconsistent with their type which have a chemical shift different from that in the specified ssd which have an unrecognizeable name which are defined to be absent * intensities e where no partner is exchangeable smw where one or more partners is exchangeable other than esmwa which are missing peaks which have the same assignments as other peaks which involve non-adjacent residues * only in verbose mode Caveats: - Peaks in three (or more)-dimensional data with (e.g.) D1 adjacent to D2 and D2 to D3 but D1 not adjacent to D3 cause a warning. Bugs: - Handles molecules with more than one segment both incorrectly and inconsistently. =cut ### Preliminaries require 5.002; # Perl 5.002 required use strict; # Require optional-but-desirable practices use vars qw($whatami); # Exempt globals from 'use strict' use Churn qw(@matrix_params partner retrieve_cpd retrieve_ssd); ### Parameters # Environment ($whatami = $0) =~ s|.*/||; # `basename $0` my $isatty = -t STDIN; # Configuration my $intensity = 1; # Check for missing intensities? my $tolerance = 0.03; # Acceptable difference between cpd and ssd my $verbose = 0; my %shift_range = ( # Ranges of expected chemical shifts "h2'" => [3.9, 5], "h3'" => [3.9, 5], "h4'" => [3.9, 5], "h5'" => [3.9, 5], "h5''" => [3.9, 5], "h1'" => [5, 6], "h5" => [5, 6], "h2" => [6.9, 8.5], "h6" => [6.9, 8.5], "h8" => [6.9, 8.5], "h41" => [6.5, 7.5], "h42" => [8, 9], "h1" => [10, 15], "h3" => [10, 15], ); my %is_exch = ( # Exchangeable protons h41 => 1, h42 => 1, h1 => 1, h3 => 1, ); # Initialization (don't change these) $| = 1; # Interleave STDOUT and STDERR properly my($ssdfile, $ssd, @cpdfiles, $cpdfile, $cpd, %seen, $peak, $this_intensity, $unique, $exch, $tag, $dim, @shift, $peakbanner, $asg, $dimbanner, $type, $res, $spin, $type_ok, $spin_ok, $seg, $dim0, $res0, $dim1, $res1, $partner); ### Arguments and error-checking # Parse args my($arg, $sign, $first, $rest); while (@ARGV and ($sign, $first, $rest) = ($ARGV[0] =~ /^([\-+])(.)(.*)/)) { if ($sign eq '+' && $first !~ /[iqv]/) { # -/+ switches &usage("$sign$first is not an option.\n"); } if ($first =~ /[t]/) { # Switches with arguments (none at the moment) shift; $arg = $rest ne '' ? $rest : @ARGV ? shift : &usage("$sign$first requires an argument.\n"); } elsif ($rest eq '') { shift; } else { $ARGV[0] = "$sign$rest"; } if ($first eq 'i') { $intensity = $sign eq '-' ? 1 : 0; } elsif ($first eq 't') { $tolerance = $arg; } elsif ($first eq 'q') { $verbose -= $sign eq '-' ? 1 : -1; } elsif ($first eq 'v') { $verbose += $sign eq '-' ? 1 : -1; } elsif ($first eq 'u') { &usage(0); } else { &usage("$sign$first is not an option.\n"); } } sub usage { warn $_[0] ? "$whatami: $_[0]" : '', < 0; $this_intensity = 0; # Ambi/junk spins don't need intensity $unique = 0; # Ambi/junk spins may not be unique } elsif ($asg =~ /^(.)(\d+)(.+)/) { ($type, $res, $spin) = ($1, $2, $3); $spin = "\L$spin"; $exch |= defined $is_exch{$spin}; # Save for later # Worry about assignment validity # Is residue $res really $type? # Does residue $res really have a spin $spin? $type_ok = 0; $spin_ok = 0; foreach $seg (keys %{$$ssd{segs}}) { $type_ok |= $$ssd{segs}{$seg}{$res}{type} eq $type; $spin_ok |= defined $$ssd{segs}{$seg}{$res}{spins}{$spin}; } print "$dimbanner has incorrect residue type.\n" unless $type_ok; print "$dimbanner has incorrect spin.\n" unless $spin_ok; # Worry about chemical shift ranges if ( defined $shift_range{$spin} && ( $shift[$dim] < $shift_range{$spin}[0] || $shift[$dim] > $shift_range{$spin}[1] ) ) { print "$dimbanner has out-of-range shift.\n"; } # Check shift in cpd against shift in ssd if ($type_ok && $spin_ok) { $seg = (keys %{$$ssd{segs}})[0]; if ($$ssd{segs}{$seg}{$res}{spins}{$spin} eq '?') { warn "$dimbanner is unknown in ssd!\n"; } elsif ($$ssd{segs}{$seg}{$res}{spins}{$spin} eq 'no') { warn "$dimbanner is not-observed (NO) in ssd!\n"; } elsif (abs($shift[$dim] - $$ssd{segs}{$seg}{$res}{spins}{$spin}) > $tolerance) { warn "$dimbanner is $$ssd{segs}{$seg}{$res}{spins}{$spin} in ssd!\n"; } } } else { print "$dimbanner has an unparseable name.\n"; } } # Worry about intensities if ($this_intensity) { if (defined $$peak{intensity}) { if ($$peak{intensity} eq 'a') { print "$peakbanner is defined to be absent.\n" if $verbose > 0; } elsif ($$peak{intensity} eq 'e') { if (! $exch) { print "$peakbanner has intensity $$peak{intensity} but does not involve an exchangeable proton.\n"; } } elsif ($$peak{intensity} =~ /^[smw]$/) { if ($exch) { print "$peakbanner has intensity $$peak{intensity} but involves an exchangeable proton.\n"; } } else { print "$peakbanner has intensity $$peak{intensity}.\n"; } } else { print "$peakbanner has no intensity.\n"; } } # Worry about duplicate peaks if ($unique) { if ($seen{$tag}) { print "$peakbanner and $seen{$tag} have the same assignments.\n"; } else { $seen{$tag} = "peak $$peak{item} (" . sprintf("%.2f,%.2f", @shift) . ')'; } } # Worry about adjacency of residue numbers # Check all pairs of assignments in crosspeak. This will complain about # peaks in three (or more)-dimensional data with (e.g.) D1 adjacent to # D2 and D2 to D3 but D1 not adjacent to D3. If one knew which # dimensions were expected to be adjacent (either by convention, which # would probably inconvenience the user, or by explicit definition in # the cpd) one could check only the right pairs. foreach $dim0 (0 .. $#{$$peak{dim}} - 1) { if (($res0) = $$peak{dim}[$dim0]{asg} =~ /^.(\d+)/) { foreach $dim1 ($dim0 + 1 .. $#{$$peak{dim}}) { if (($res1) = $$peak{dim}[$dim1]{asg} =~ /^.(\d+)/) { unless ( abs($res0 - $res1) <= 1 || defined($partner = &partner($ssd, $res0)) && abs($partner - $res1) <= 1 ) { print "$peakbanner involves non-adjacent residues.\n"; } } } } } } } exit; ### Subroutines # Convert points to ppm sub point2ppm { my($sfreq, $swidth, $refpt, $refsh, $datsiz, $point) = @_; $refsh + ($refpt - $point) * $swidth / $datsiz / $sfreq; }