OK, I have a set of scripts that work for me now. They're clunky and it's certainly not optimal, but they work.
Using qsub, I run a master script:
Code:
#!/bin/sh
#PBS -N pbsdsh
#PBS -l nodes=5:ppn=12
#cd $PBS_O_WORKDIR
cd /data/home/mkass/Jobs/aniak
octave parsejobs.m
pbsdsh -v sh $PBS_O_WORKDIR/thrrun.sh
I have an octave script, parsejobs, that creates a script for each node (I think this is how Skynet starts).
It's a little extra complicated because my 'line numbers' are not sequential, and (for reasons evident in the next script) I need sequential, numeric values for each directory.
It's in this script that the jobs are parsed out and sent to each processor.
Code:
fid = fopen('linelist.txt','rt');
nNodes = 5;
nThr = 12;
ii = 0;
while ~feof(fid)
temp = fscanf(fid,'%s',1);
ii=ii+1;
end
fclose(fid);
nSounds = ii;
fid = fopen('linelist.txt','rt');
lines = cell(nSounds,1);
for ii = 1:nSounds
lines{ii}=fscanf(fid,'%s',1);
end
fclose(fid);
b = floor(nSounds/nNodes);
ro = mod(nSounds,nNodes);
opspnode = zeros(nNodes,1);
opspnode(1:nNodes) = b;
for ii = 1:ro
opspnode(ii) = opspnode(ii)+1;
end
opspthread = zeros(nNodes,nThr);
for ii = 1:nNodes
b = floor(opspnode(ii)/nThr);
ro = mod(opspnode(ii),nThr);
opspthread(ii,:) = b;
for jj = 1:ro
opspthread(ii,jj) = opspthread(ii,jj) + 1;
end
end
%disp(opspthread');
save 'parsejobs.mat' opspthread;
save 'lines.mat' lines;
opsthrcat = reshape(opspthread,nNodes*nThr,1);
%% Make 60 scripts
for ii = 1:(nNodes*nThr)
% fid = fopen(strcat('thrrun.',num2str(ii),'.temp.m'),'wt');
fid = fopen(strcat('thrrun.',num2str(ii-1),'.temp.sh'),'wt');
%each script handles
%sum(opsthrcat(1:ii))-(opsthrcat(ii)+1):sum(opsthrcat(1:ii))
%sum(opsthrcat(1:ii))-(opsthrcat(ii)+1):sum(opsthrcat(1:ii))
fprintf(fid,'%s\n','#!/bin/bash');
%fprintf(fid,'%s\n','cd $PBS_O_WORKDIR');
fprintf(fid,'%s\n','cd /data/home/mkass/Jobs/aniak');
t = strcat('COUNTER=',num2str(sum(opsthrcat(1:ii))-(opsthrcat(ii))+1));
fprintf(fid,'%s\n',t);
%t = strcat('while [ $COUNTER -lt ',num2str(sum(opsthrcat(1:ii))+1));
t = ['while [ $COUNTER -lt ',num2str(sum(opsthrcat(1:ii))+1)];
t2 = strcat(t,' ]; do');
fprintf(fid,'%s\n',t2);
fprintf(fid,'%s\n','mkdir $COUNTER');
fprintf(fid,'%s\n','cp data/$COUNTER.txt $COUNTER/');
fprintf(fid,'%s\n','cp makeEM1DFMobs.m $COUNTER/');
fprintf(fid,'%s\n','cp writeEM1DFMobsV3.m $COUNTER/');
fprintf(fid,'%s\n','cp em1dfmgen.m $COUNTER/');
fprintf(fid,'%s\n','cp start.mod $COUNTER/');
fprintf(fid,'%s\n','cp ref.mod $COUNTER/');
fprintf(fid,'%s\n','cp em1dfm/* $COUNTER/');
fprintf(fid,'%s\n','cd $COUNTER/');
fprintf(fid,'%s\n','octave makeEM1DFMobs.m');
fprintf(fid,'%s\n','octave em1dfmgen.m');
fprintf(fid,'%s\n','wine em1dfm.exe');
fprintf(fid,'%s\n','rm *.exe');
fprintf(fid,'%s\n','cd ../');
fprintf(fid,'%s\n','let COUNTER=COUNTER+1');
fprintf(fid,'%s\n','done');
% fclose(fid);
fclose(fid);
end
And finally, this script executes each script written out above.
Code:
#!/bin/sh
cd /data/home/mkass/Jobs/aniak
sh thrrun.$PBS_VNODENUM.temp.sh
So there you have it. An operation that would have taken 13 days was done in 5 hours. There's plenty of room for improvement, but this works for me. Cheers!