26 July 2011

8. Very rudimentary peak-finding

The following scripts looks through a file with one x column and several y columns, and returns values larger than a certain cutoff.

Usage:
findpeaks filename columnnumber cutoffvalue
e.g.
findpeaks test.dat 3 0.01

findpeaks:
#!/usr/bin/python2.6
import sys
infile=sys.argv[1]
colno=int(sys.argv[2])
cutoff=float(sys.argv[3])

minsig=100
maxsig=0


spectra=open(infile,'r')



for spectrum in spectra:
spectrum=spectrum.rstrip('\n')
spectrum=spectrum.split(' ')
# print spectrum

try:
spectrum[colno]=float(spectrum[colno])
if spectrum[colno]>cutoff:
print spectrum[1],' ',spectrum[colno]

if spectrum[colno]<minsig:
minsig=spectrum[colno]
if spectrum[colno]>maxsig:
maxsig=spectrum[colno]


except:
a=0
maxsig=0
print "not working: "

print "max sig: ",maxsig,", minsig: ",minsig
spectra.close

20 July 2011

7. Processing 1D Bruker nmr data

Bruker 1D binary NMR files can be processed using a combination of cat, grep, sed, gawk and od, together with python and octave (w/ octave-optim) for some fancy line-fitting.

 brukdig2asc:
 #!/bin/bash
#usage: brukdig2asc
SW=`cat acqus | grep 'SW_h' | sed 's/\=/\t/g' | gawk '{print $2}'| tr -d '\n'`
TD=`cat acqus | grep 'TD=' | sed 's/\=/\t/g' | gawk '{print $2}'| tr -d '\n'`
O=`cat acqus | grep '$O1=' | sed 's/\=/\t/g' | gawk '{print $2}'`
SFO=`cat acqus | grep 'SFO1=' | sed 's/\=/\t/g' | gawk '{print $2}'`
#TIME=16384
#SWEEP=23809.5238095238
#AQ=`echo "1/(23809.5238095238/(16384/2))" | bc -lq`
cp fid fid.bin
ls fid.bin | cpio -o | cpio -i --swap -u
od -An -t dI -v -w8 fid.bin| gawk '{print NR,$1,$2}'| sed '1,64d' >fid.asc1
pynmr $SW $TD $O $SFO
makespec

pynmr:
#!/usr/bin/python2.6
import sys
#print str(sys.argv)
sweepwidth=float(sys.argv[1])
nopts=int(sys.argv[2])
centrefreq=float(sys.argv[3])
basefreq=float(sys.argv[4])

aq=1/(sweepwidth/(nopts/2))
#print str(sweepwidth),str(nopts)
f=open('fid.asc1','r')
g=open('fid.asc','w')
for line in f:
    line=line.rstrip('\n')
    line=line.split(' ')
#    print line
    freq=float(line[0])/(nopts/2)*sweepwidth+(centrefreq-sweepwidth/2)
    line[0]=(float(line[0])/(nopts/2))*aq
    g.write(str(line[0])+'\t'+str(line[1])+'\t'+str(line[2])+'\t'+str(freq)+'\n')
f.close
g.close

makespec:

#!/bin/bash
octave --silent --eval "fid=load('fid.asc');
#make xaxis
[nopts b]=size(fid);
aq=max(fid(:,1));
sw=nopts/aq;
freqx=linspace(0,sw,nopts)';

#apodizing
lb=5/10000;
fid(:,2)=fid(:,2).*exp(-lb.*freqx);
fid(:,3)=fid(:,3).*exp(-lb.*freqx);

#phasing
spec=[fid(:,1) real(fftshift(fft(fid(:,2)+i*fid(:,3)))) imag(fftshift(fft(fid(:,2)+i*fid(:,3))))];
[a b]=size(spec); spec(a/2,2:3)=[0 0];
phc=linspace(0,2*pi,180);
maxsig=0;k=1;
for n=1:180;
        localmax=max( real( (spec(:,2)+i*spec(:,3)).*exp(i*phc(n)) ));
        if (localmax>maxsig)
                maxsig=localmax;
                k=n;
        endif
endfor;
#simple baseline
absd=inline('m+t*0','t','m');
guess=0;
[f m kvg iter corp covp covr stdresid z r2]=leasqr(fid(:,4),real((spec(:,2)+i*spec(:,3)).*exp(i*phc(k))),guess,absd);
#disp(m)
#disp(sqrt(diag(covp)))

#make spectrum
spectrum=[fid(:,4) real((spec(:,2)+i*spec(:,3)).*exp(i*phc(k)))-m imag((spec(:,2)+i*spec(:,3)).*exp(i*(phc(k)+pi/2)))-m];

#fitting
pkg load optim
[a b]=max(spectrum(:,2));
centre=fid(b,4);
guess=[10 max(spec(:,2))]; #centre width height
#disp(guess)
lorentzian=inline('p(2)*(1/pi)*(p(1)/2)./((t-centre).^2+(0.5*p(1))^2)','t','p');
[f p r2]=leasqr(fid((b-150):(b+150),4),spectrum((b-150):(b+150),3),guess,lorentzian);

#filter out artefacts from fitting set
filtered=[0 0];
res=floor((max(fid(:,4))-min(fid(:,4)))/nopts);
for l=(b-ceil(5*p(1)/res)):(b+5*ceil(p(1))/res)
delta=lorentzian(fid(l,4),p)-spectrum(l,2);

if (delta>(lorentzian(fid(l,4),p))/1.2)
# do nothing
else
filtered=[filtered; fid(l,4) spectrum(l,2)];
endif
endfor

filtered=[ filtered(2:size(filtered(:,2)),1) filtered(2:(size(filtered(:,2))),2)  ];
[f p r2]=leasqr(filtered(:,1),filtered(:,2),p,lorentzian);

#disp(p')
#disp(r2)
params=[centre centre/67.8 max(lorentzian(fid(:,4),p)) p(1) 1.000 p(2)];
disp(params)
#save
spex=[fid(:,4) real((spec(:,2)+i*spec(:,3)).*exp(i*phc(k)))-m imag((spec(:,2)+i*spec(:,3)).*exp(i*(phc(k)+pi/2)))-m lorentzian(fid(:,4),p)];
save spectrum.dat spex;"

6. Miscellaneous scripts

The following two scripts look through a set of files with paired xy values but not necessarily the same number of x values in each file, extracts all the x values from all the files (script numero uno), then makes sure that all x values are present in all files (and zero-fills to make up for missing data). In short, it's a way of creating files that can be used to generate a matrix of values where all rows have the same number of fields.

The first script:
homogenise.sh
 #!/bin/bash
for e in {0..200..10} {220..300..20}
    do
        cat $e.dat | gawk '{ printf("%s\t",$1) }'
        echo ""
    done

The second script:

makelist.py
#!/usr/bin/python2.6
import sys
infile=sys.argv[1]

f=open(infile,'r')
arr=[]

for line in f:
    line=line.rstrip('\n')
    line=line.split('\t')
    try:
        for i in line:
            i=float(i)
            if i not in arr:
                arr+=[i]
    except:
        arr=arr       

f.close

arr.sort()
mylist=arr
last = mylist[-1]
for i in range(len(mylist)-2, -1, -1):
    if last == mylist[i]:
        del mylist[i]
    else:
        last = mylist[i]

voltages=[0,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,220,240,260,280,300]
myys=[0]*len(mylist)

for n in voltages:
    print "voltage: ',n,'\n'
    f=open(str(n)+'.dat','r')
    g=open(str(n)+'full.dat','w')
    arrx=[]
    arry=[]

    for line in f:
        line=line.rstrip('\n')
        line=line.split(' ')
        try:
            line[0]=float(line[0])
            line[1]=float(line[1])
            arrx+=[line[0]]
            arry+=[line[1]]
          
        except:
            #do nothing
            print "fail"

    for i in range(0,len(arrx)-1):

        try:
            myys[mylist.index(arrx[i])]=arry[i]
        except:
            a=0           

    for i in range(0,len(myys)-1):
        g.write(str(mylist[i])+'\t'+str(myys[i])+'\n')

    f.close
    g.close