#!/usr/local/bin/perl -w =pod peaks2cpd v. 7 Apr 1996, Dave Schweisguth Converts a Felix peaks entity file into a cpd ("CrossPeak Database") to be used by other CHURN programs Caveats: - Multiple elements of @{$cpd{'dim'}} may point to the same anonymous hash. This might cause problems if one wanted to modify the contents of one elemen but not the other. Input file format: Create the input file by saving a Felix peaks entity into a text file. Assignments ("asg" fields) may follow the following conventions: - An assignment may be "null", to indicate that it has not yet been assigned, "ambi", to indicate that it is known to be ambiguous, or "junk", to indicate that it is believed to be an artifact. - If the last character of an assignment is "s", "m", "w", "e" or "a", it will be removed and used as an intensity score. This does not conflict with conventional notation for spins in RNA (e.g. "g1h8"), which always ends with a digit or prime. It also does not conflict with "null", "ambi" or "junk". It can be turned off with the '+i' option. cpd ("CrossPeak Database") structure: %cpd = { name => Filename from c** line dim => [ # Array of dimensions { # Dimension structure sfreq => Spectrometer frequency (MHz) in this dimension swidth => Spectral width (Hz) in this dimension refpt => Reference point in this dimension refsh => Reference shift in this dimension datsiz => Data size in this dimension }, ... ], peaks => [ # Array of crosspeaks { # Crosspeak structure item => Item number in the input file dim => [ # Array of dimensions { # Dimension structure cen => Center of peak in this dimension wid => Width of peak in this dimension ptr => ??? asg => name }, ... ], intensity => Intensity score cc => ??? }, ... ], }; =cut ### Preliminaries require 5.002; # Perl 5.002 required use strict; # Require optional-but-desirable practices use vars qw($whatami); # Exempt globals from 'use strict' use Churn qw(@matrix_params is_num store_fd); ### Parameters # Environment ($whatami = $0) =~ s|.*/||; # `basename $0` my $isatty = -t STDIN; # Configuration my $dim = 2; # Dimensionality if unspecified my $exch = 0; # Fill in undefined intensities with 'e' my $hetero = 0; # Heteronuclear data? my $intensity = 1; # Extract peak intensities # Initialization (don't change these) my $dimd = 1; # $dim default flag my $regexp1 = '(.{10})(.{10}) (.{5}) (.{16})'; # Regexp for 1D peak fields my($params, $regexp, $i, $j, %cpd, $intdim, @words, $offset, $this_intensity); ### Arguments and error-checking # Parse args my($arg, $sign, $first, $rest); while (@ARGV and ($sign, $first, $rest) = ($ARGV[0] =~ /^([\-+])(.)(.*)/)) { if ($sign eq '+' && $first !~ /[ehi]/) { # -/+ switches (none at the moment) &usage("$sign$first is not an option.\n"); } if ($first =~ /[d]/) { # Switches with arguments shift; $arg = $rest ne '' ? $rest : @ARGV ? shift : &usage("$sign$first requires an argument.\n"); } elsif ($rest eq '') { shift; } else { $ARGV[0] = "$sign$rest"; } if ($first eq 'd') { $dim = $arg; $dimd = 0; } elsif ($first eq 'e') { $exch = $sign eq '-' ? 1 : 0; $intensity |= $exch } elsif ($first eq 'h') { $hetero = $sign eq '-' ? 0 : 1; } elsif ($first eq 'i') { $intensity = $sign eq '-' ? 1 : 0; } elsif ($first eq 'u') { &usage(0); } else { &usage("$sign$first is not an option.\n"); } } sub usage { warn $_[0] ? "$whatami: $_[0]" : '', < 0) { $cpd{dim}[$i] = $cpd{dim}[0]; } else { foreach $j (@matrix_params) { $cpd{dim}[$i]{$j} = shift; } } } # Input file die "$whatami: Specify an input file or provide one on standard input.\n" if $isatty && ! @ARGV; ### Do it while (<>) { if (/^c\*\*(.*)/) { # c** line; read filename $cpd{name} = $1; } elsif (/^xpk (...)/) { # xpk line; read internal dimension ($intdim = $1) =~ s/\s//g; if ($dimd) { $dim = $intdim; $regexp = $regexp1 x $dim; } else { if ($dim != $intdim) { warn "$whatami: Dimensionalities from -d ($dim) and $ARGV ($intdim) disagree!\n"; } } } elsif (@words = /^ (.{5})$regexp(.{10})/o) { # Peak line # Trim whitespace from matched words foreach $i (@words) { $i =~ s/^\s*(.*?)\s*$/$1/; } # Load words into %peak structure my %peak = (); $offset = 0; $this_intensity = 1; $peak{item} = $words[$offset++]; foreach $i (0 .. $dim - 1) { foreach $j ('cen', 'wid', 'ptr', 'asg') { $peak{dim}[$i]{$j} = $words[$offset++]; } if ($intensity && $peak{dim}[$i]{asg} =~ s/[smwea]$//) { if (defined $peak{intensity}) { warn "$whatami: Peak intensity score in multiple dimensions at $ARGV line $.:\n$_"; } $peak{intensity} = $&; } if ($peak{dim}[$i]{asg} =~ /^(?:null|ambi|junk)/) { $this_intensity = 0; } } $peak{cc} = $words[$offset++]; if ($this_intensity && ! defined $peak{intensity}) { if ($exch) { $peak{intensity} = 'e'; } else { warn "$whatami: No peak intensity score at $ARGV line $.:\n$_"; } } push(@{$cpd{peaks}}, \%peak); } else { warn "$whatami: Can't parse $ARGV input line $.:\n", $_; } } &store_fd(\%cpd, 'STDOUT'); exit;