#!/usr/local/bin/perl -w =pod table2ssd v. 23 Jul 1996, Dave Schweisguth Incorporates a human-readable/writable table of chemical shifts into an ssd. Input file format: # Res H1/H3 NH1 NH2 H6/H8 H2/H5 H1' H2' H3' H4' H5' H5'' P #---- ----- --- --- ----- ----- ---- ---- ---- ---- ---- ---- ----- G25 NO NO NO 8.13 NA 5.81 4.93 4.71 4.57 4.26 4.41 NO G26 13.25 NO NO 7.60 NA 5.90 4.56 4.56 4.27 4.51 -3.68 C27 NA 6.79 8.41 7.66 5.25 5.51 4.55 4.54 4.45 4.14 -4.26 A28 NA NO NO 7.96 6.98 5.94 4.66 4.68 4.50 4.15 -3.77 Each line must contain 13 entries. Entries are separated by any number of spaces and/or a SINGLE tab. The first entry is a residue name, consting of three elements, each optional: - the nucleotide type ("A", "C", "G" or "U"), - the segment name (not shown in the example above) - the residue number The first letter (if any) of the first entry is interpreted as a residue type if possible and a segment name (or part thereof) if not. Subsequent entries may be - a chemical shift - blank (i.e. two consecutive tabs) or "?" to indicate an unknown chemical shift - "NO" to indicate a spin which has definitively been "not observed" - "NA" to indicate that a column is "not applicable" Segments may be specified in the format used by ssd2table segment x g1 u2 a3 c4 or like so gx1 ux2 ax3 cx4 or even like so gx1 u2 a3 c4 The rules for segment names are described in the comments to seq2ssd. Case is ignored except in segment names. Blank lines and comments (lines beginning with #, ! or ;) are ignored. The exit code is the number of errors which were encountered. Bugs: - Diagnostic messages do not always preserve the case of the input text about which they complain. Caveats: - Deals only with a fixed table format, which happens to include only proton and phosphorus. - Lines in the input table are not required to refer to unique residues or to be in any particular order. - A tab at the end of a line of chemical shifts counts as an entry. =cut ### Preliminaries require 5.002; # Perl 5.002 required use strict; # Require optional-but-desirable practices use vars qw($whatami); # Exempt globals from 'use strict' use Churn qw(%schema %spins retrieve_ssd store_fd); ### Parameters # Environment (my $whatami = $0) =~ s|.*/||; # `basename $0` my $isatty = -t STDIN; # Configuration my $overwrite = 0; my $tolerance = 0.03; my $verbose = 0; my $seg = 'a'; # Default segment name my $default_is_current = 1; # We haven't changed the default seg name my $have_used_default = 0; # We've used the default segment name my $res = 0; # One less than the default first resiuue # number # Initialization (don't change these) $| = 1; # Interleave STDOUT and STDERR properly my $exit = 0; # Exit code my $types = join('', keys %spins); my($ssd, $banner, @entries, $type, $temp_seg, $temp_res, $i, $entry, $key, $spin, $asg); ### Arguments and error-checking # Parse arguments my($arg, $sign, $first, $rest); while (@ARGV and ($sign, $first, $rest) = ($ARGV[0] =~ /^([\-+])(.)(.*)/)) { if ($sign eq '+' && $first !~ /[oqv]/) { # -/+ switches (none at the moment) &usage("$sign$first is not an option.\n"); } if ($first =~ /[t]/) { # Switches with arguments (none at the moment) shift; $arg = $rest ne '' ? $rest : @ARGV ? shift : &usage("$sign$first requires an argument.\n"); } elsif ($rest eq '') { shift; } else { $ARGV[0] = "$sign$rest"; } if ($first eq 'o') { $overwrite = $sign eq '-' ? 1 : 0; } elsif ($first eq 'q') { $verbose -= $sign eq '-' ? 1 : -1; } elsif ($first eq 't') { $tolerance = $arg; } elsif ($first eq 'v') { $verbose += $sign eq '-' ? 1 : -1; } elsif ($first eq 'u') { &usage(0); } else { &usage("$sign$first is not an option.\n"); } } sub usage { warn $_[0] ? "$whatami: $_[0]" : '', <) { $banner = "$whatami: $ARGV line $."; if (/^$/ || /^[#!;]/) { # Comment next; } elsif (/^seg[a-z]*\s+([a-z_]+)$/i) { # Segment definition if ($default_is_current && $have_used_default) { warn "$banner: The first segment is defined in mid-table.\n" if $verbose > -1; } if (defined $$ssd{segs}{$1}) { $seg = $1; $default_is_current = 0; } else { warn "$banner: Segment $1 does not exist in ssd!\n" if $verbose > -1; $exit++; } } elsif (/^[a-z_]*(?:-?\d+)?( *[ \t] *\S*){12} *\n$/i) { # Chemical shift table line # Split line s/[ \n]*$//; @entries = split(/ *[ \t] */, $_, 13); # LIMIT field preserves trailing '' # Extract segment name, residue number and residue type from first entry ($type, $temp_seg, $temp_res) = (shift @entries) =~ /^([$types]?)([a-z_]*)((?:-?\d+)?)/i; # Segment name if ($temp_seg eq '') { # Guess a segment name if ($default_is_current) { $have_used_default = 1; } } else { # Use the given segment name if (! defined $$ssd{segs}{$temp_seg}) { warn "$banner: Segment $temp_seg does not exist in ssd!\n" if $verbose > -1; $exit++; next; } $seg = $temp_seg; $default_is_current = 0; } # Residue number if ($temp_res eq '') { # Guess a residue number $res++; } else { # Use the given residue number $res = $temp_res; } if (! defined $$ssd{segs}{$seg}{$res}) { warn "$banner: " . (keys %{$$ssd{segs}} > 1 ? "Segment $seg r" : 'R') . "esidue $res does not exist in ssd!\n" if $verbose > -1; $exit++; next; } # Residue type if ($type eq '') { # Use the residue type from ssd $type = $$ssd{segs}{$seg}{$res}{type}; } else { # Use the given residue type $type = lc($type); if ($$ssd{segs}{$seg}{$res}{type} ne $type) { warn "$banner: " . (keys %{$$ssd{segs}} > 1 ? "Segment $seg r" : 'R') . "esidue $res is type $$ssd{segs}{$seg}{$res}{type}, not type $type in ssd!\n" if $verbose > -1; $exit++; next; } } # Incorporate entries into ssd $i = 0; foreach $entry (@entries) { $entry = lc($entry); $banner = "$whatami: $ARGV line $. column " . ($i + 2) . " (\"$entry\")"; $key = $schema{$type}[$i]; $spin = \$$ssd{segs}{$seg}{$res}{spins}{$key} unless $key eq 'na'; $asg = uc($type) . (keys %{$$ssd{segs}} > 1 ? $seg : '') . $res . uc($key); if ($entry eq 'na') { # Check first to see if this column can ever have an entry for # this residue, then if the appropriate spin is defined in the # ssd. The latter might be false when the former is true if the # residue is special in some way, e.g. a 5' residue without a # phosphate. if ($key eq 'na' || ! defined $$spin) { warn "$banner: Entry is not-applicable (NA).\n" if $verbose > 0; } else { warn "$banner: $asg is not-applicable (NA), but should be ?, not-observed (NO) or a chemical shift!\n" if $verbose > -1; $exit++; } } elsif ($entry eq '' || $entry eq '?') { if ($key eq 'na' || ! defined $$spin) { warn "$banner: Entry is unknown but should be not-applicable (NA)!\n" if $verbose > -1; $exit++; } else { warn "$banner: $asg is unknown.\n" if $verbose > 0; } } elsif ($entry eq 'no') { if ($key eq 'na' || ! defined $$spin) { warn "$banner: Entry is not-observed (NO) but should be not-applicable (NA)!\n" if $verbose > -1; $exit++; } else { if ($$spin eq '?') { $$spin = $entry; warn "$banner: $asg set to not-observed (NO).\n" if $verbose > 0; } else { warn "$banner: $asg is not-observed (NO) in table and $$spin in ssd.\n" if $verbose > 0; if ($overwrite) { $$spin = $entry; warn "$banner: $asg set to not-observed (NO).\n" if $verbose > 0; } } } } elsif ($entry =~ /^-?(\d*)\.?(\d*)/ && "$1$2" ne '') { # A number if ($key eq 'na' || ! defined $$spin) { warn "$banner: Entry is assigned but should be not-applicable (NA)!\n" if $verbose > -1; $exit++; } else { if ($$spin eq '?') { $$spin = $entry; warn "$banner: $asg set to $entry.\n" if $verbose > 0; } elsif (abs($$spin - $entry) < $tolerance) { warn "$banner: $asg is $entry in table and $$spin in ssd.\n" if $verbose > 0; if ($overwrite) { $$spin = $entry; warn "$banner: $asg set to $entry.\n" if $verbose > 0; } } else { warn "$banner: $asg is $entry in table and $$spin in ssd!\n" if $verbose > -1; $exit++; if ($overwrite) { $$spin = $entry; warn "$banner: $asg set to $entry.\n" if $verbose > 0; } } } } else { # Unparseable entry if ($key eq 'na' || ! defined $$spin) { warn "$banner: Entry is unparseable (\"$entry\") but should be not-applicable (NA)!\n" if $verbose > -1; $exit++; } else { warn "$banner: $asg is unparseable (\"$entry\")!\n" if $verbose > -1; $exit++; } } $i++; } } else { # Unparseable line warn "$banner: Line is unparseable:\n$_" if $verbose > -1; $exit++; } } &store_fd($ssd, 'STDOUT'); exit $exit;