#!/bin/csh -f
#
# mail on abort and error
#PBS -m ae
## reserve 2 node(s), and run two processes on each node
##PBS -l nodes=2:ppn=2
## Request time for job
##PBS -l walltime=00:01:00
# Job will be terminated one second after any process terminates
setenv GMPIRUN_KILL 1 
# Verbose(1) or non-verbose(0) output from the myrinet interface 
setenv GMPIRUN_VERBOSE 0 

## NOTE: PBS NODE AND TIME COMMANDS ARE NOW PASSED ON THE COMMAND LINE ONLY
 
# has run_name been specified? If not then exit!!
if ( $?run_name != 1 ) then
	exit -1
endif

# set variable. run_name is passed from command line
set prog = Z1EPOC
set DATADIR = "/data/nla101/"
set FROMDIR = "/home/nla101/epoc/"
set APPLICATION = $FROMDIR$prog
set RUNFLAGS = $run_name.input
set COPYDIR = $FROMDIR${PBS_JOBID}
 

##sample calls
#qsub  -l nodes=1:ppn=2,walltime=00:10:00 ~/scripts/run_epoc.csh -N test -W depend=afterok:40948 -v run_name=test,checkp=40948,checkf=check
#qsub  -l nodes=1:ppn=2,walltime=00:10:00 ~/scripts/run_epoc.csh -N test -v run_name=test,checkp=40948,checkf=check
#qsub  -l nodes=1:ppn=2,walltime=00:10:00 ~/scripts/run_epoc.csh -N test -v run_name=test,checkp=yes
#qsub  -l nodes=1:ppn=2,walltime=00:10:00 ~/scripts/run_epoc.csh -N test -v run_name=test

## checkp and checkf are for checkpointing, and are optional. run_name must be included, as must the -l options.
## if checkf is not specified, but checkp is then the checkpoint file used will be $FROMDIR$run_name.check,
## and in this case the value of checkp is not used. If checkf is specified then the checkpoint file used is
## $FROMDIR$checkp.erik.beo.org/$run_name.$checkf which is copied to $RUNDIR$run_name.check. Obviously in this case
## the value of checkp is important, and is the PBS_JOBID number which named an already run directory.
## If the -W depend=afterok:# option is invoked then the job will not run until the other job completes successfully.

## can use the ~/scripts/submit.csh file for easy submission
#csh ~/scripts/submit.csh 1 00:10:00 test
#csh ~/scripts/submit.csh 1 00:10:00 test yes
#csh ~/scripts/submit.csh 1 00:10:00 test 40948 check
#csh ~/scripts/submit.csh 1 00:10:00 test 40948 check 40948


# go onto the local disk
cd $DATADIR
 
# make a unique directory for the data
set RUNDIR = $DATADIR${PBS_JOBID}

 
# run the parallel stuff
source /home/nla101/scripts/run_para.csh

 
# copy back data files
# first, make directory
mkdir $COPYDIR
#copy the data back, then delete it on the slave nodes
echo "copying data back"
foreach i ( $nodes )
   rsh ${i} "cd ${DATADIR}"
   rsh ${i} "cp -rf ${RUNDIR}/* ${COPYDIR}/."
   rsh ${i} "rm -rf ${RUNDIR}"
end