⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mpirun.ch_v3.in

📁 Path MPICH-V for MPICH the MPI Implementation
💻 IN
字号:
#! /bin/sh##  MPICH-V3#  Copyright (C) 2002, 2003 Groupe Cluster et Grid, LRI, Universite de Paris Sud###  This file is part of MPICH-V3.##  MPICH-V3 is free software; you can redistribute it and/or modify#  it under the terms of the GNU General Public License as published by#  the Free Software Foundation; either version 2 of the License, or#  (at your option) any later version.##  MPICH-V3 is distributed in the hope that it will be useful,#  but WITHOUT ANY WARRANTY; without even the implied warranty of#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the#  GNU General Public License for more details.##  You should have received a copy of the GNU General Public License#  along with MPICH-V3; if not, write to the Free Software#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA##  $Id: mpirun.ch_v3.in,v 1.2 2004/03/23 19:47:36 herault Exp $exitstatus=1# Default configurations for ch_v2 device# Event logger relatedrel=4del=1threadsEL=3useExplicitELonly="0"# Checkpoint server relatedrcs=4dcs=1useExplicitCSonly="0"just_testing=0MPI_HOST=$HOSTnolocal=1curarch=2dispatcherHost=$HOSTdispatcherPort=5555bindir=${MPIRUN_HOME}if [ -z "$rshcmd" ] ; then  rshcmd=`which rsh`fielcmd="$bindir/mpirun.v2eventlogger"wrapcmd="$bindir/mpirun.v2d"killcmd="$bindir/mpirun.v2kill"cscmd="$bindir/mpirun.v2checkpointserver"sccmd="$bindir/mpirun.v2checkpointscheduler"tmpCS="/tmp"if [ -z "$pel_default" ] ; then pel_default=5001 ; fiif [ -z "$pcs_default" ] ; then pcs_default=4001 ; fiif [ -z "$pdp_default" ] ; then pdp_default=5555; fiif [ -z "$pComm"]; then pComm=9000; fiif [ -z "$cschedPort"]; then cschedPort=2003; fidispatcherPort=$pdp_defaultMAX_CLUSTER_SIZE=16xwworkdir="."mpirun_verbose=0xwpgfile="v2pgfile.$job_id"if [ -z "$argsset" ] ; then   . $MPIRUN_HOME/mpirun.args   argsset=1fiif [ -z "$keep_xwfile" ] ; then  keep_xwfile=0fiif [ -z "$machineFile" ] ; then  machineFile="machines.ch_v2"fiif [ -z "$job_id" ] ; then  job_id=$$fiv2_tmp="$PWD/.$job_id/"xwavailfile="$xwpgfile.avail"xwcommandsfile="$xwpgfile.commands"# We now proceed with the different initializations of# the number of event loggers and checkpoint serversif [ -n "$eventLoggerRatio" ] ; then    rcm=`expr $eventLoggerRatio + 0`fiif [ -n "$eventLoggerNumber" ] ; then    npEL=`expr $eventLoggerNumber + 0`    rel=`expr $np / $npEL`else    npEL=`expr $np / $rel + 1`fiif [ -n "$checkPointServerRatio" ] ; then    rcs=`expr $checkPointServerRatio + 0`fiif [ -n "$checkPointServerNumber" ] ; then    npCS=`expr $checkPointServerNumber + 0`    rcs=`expr $np / $npCS`else    npCS=`expr $np / $rcs + 1`finpTotal=`expr $np + $npEL + $npCS`if [ -n "$xwSecurityRatio" ] ; then# NOTE: we calculate on the number of processors, but the margin is a number of NODES of course...    xwSecurityMargin=`expr $xwSecurityRatio  \* $npTotal / 100 `else    if [ -z "$xwSecurityMargin" ] ; then	# The default ratio is 30%	xwSecurityMargin=`expr 30  \* $npTotal / 100 `    fifi## Construct the procgroup file.# If p4 was built with comm=shared, set MPI_MAX_CLUSTER_SIZE to 16 unless# it has already been set.if [ "$COMM" = "shared" ] ; then    if [ -z "$MPI_MAX_CLUSTER_SIZE" ] ; then        MPI_MAX_CLUSTER_SIZE=16    fielse    MPI_MAX_CLUSTER_SIZE=1fi. $MPIRUN_HOME/mpirun.pg_v2# machinelist has the hosts# eventloggerPortList has the event logger ports# checkpointServerPortList has the checkpoint servers ports# dualcapabilitylist has the list indicating if a machine can be both EL and CS# archuselist has the architectures# nprocuselist has the number of processors## We use this form instead of "local 0" in-case the user is trying to# select a second network whose names are not those returned by# "hostname".  For example, a system with a DEC Gigiswitch, Myricom # network, or IP over the IBM SP2 switch (HPS).prognamemain=$prognameif [ -z "$xwpgfileGiven"  ] ; then    #xwpgfile="$PWD_TRIAL/PI$$"    # We need to explicitly redirect stderr.    # Under Solaris, an echo that fails aborts the script (!)  There is    # no way around this using echo, so we use cat instead.    cat <<EOF 2>/dev/null > $xwpgfileTestEOF    if [ ! -s $xwpgfile ] ; then	# May not be able to write there.  Try the user's home directory	xwpgfile=$HOME/$xwpgfile    else	/bin/rm -rf $xwpgfile    fi    # cnt is the "index" into the list of machines    procNum=1    nprocval=`echo $nprocuselist | cut -d' ' -f1`    # for the "local" entry, the number is the number of ADDITIONAL     # processes.    if [ -z "$nprocval" ] ; then	# just in case...	nprocval=1    fi    nprocval=`expr $nprocval - 1`    archval=$archlocal    proginstance=`echo $progname | sed "s/%a/$archval/g"`    prognamemain=$proginstance    if [ "$just_testing" = 1 ] ; then       if [ "$nolocal" = 0 ] ; then	    if [ -z "$nproclocal" ] ; then	        nprocval=0	    else	        nprocval=`expr $nproclocal - 1`	    fi	          else 	                procNum=2       fi    else       if [ "$nolocal" = 0 ] ; then	    if [ -z "$MPI_HOST" ] ; then		echo "No value for MPI_HOST!"	        echo "MPI_HOST is set either from your environment or by"		echo "processing for an MPI machine type of ch_p4, ch_tcp, "		echo "ch_nexus, or sgi_mp.  The machine type you used was $machine ."		echo "Use the -machine <machinename> argument to select a "		echo "specific machine type."	        exit 1	    fi	    if [ -z "$nproclocal" ] ; then	        nprocval=0	    else	        nprocval=`expr $nproclocal - 1`	    fi       else            procNum=2       fi    fi    if [ $just_testing = 1 ] ; then	cmdline="echo"    else	cmdline="eval"    fi# The following function is used to get the address of the machine if it's connected to a high performance network. Returns the normal ip if non existsgetFastIP() {  if [ -n "$ipTranslationFile" ] ; then    newIP=`cat $ipTranslationFile | grep $hostIP | cut -d' ' -f 2`    if [ -z "$newIP" ] ; then      fastIP=$hostIP    else      fastIP=$newIP    fi  else    fastIP=$hostIP  fi}    # We first take care of the event loggers    eventLoggerUsed=""    fastEventLoggerUsed=""    eventLoggerPortUsed=""    elidx=0    elinc=0#DEBUG    elDistribution=1#    elNb=2#ENDDEBUG    i=1    elNb=$npEL    while [ "$elidx" -lt $elNb ] ; do	port=`exec echo $eventLoggerPortList | cut -d' ' -f $i`	freecpus=`exec echo $nprocuselist | cut -d' ' -f $i`	if [ "$port" = -1 -o "$freecpus" = 0 ] ; then	    i=`expr $i + 1` #TODO: check for infinite loops!	else	    hostName=`exec echo $machinelist | cut -d' ' -f $i`	    hostIP=`exec host "$hostName" | grep address | cut -d' ' -f 4` # ugly way to get the IP address, isn't it?	    getFastIP	    eventLoggerUsed="$eventLoggerUsed $hostIP"            fastEventLoggerUsed="$fastEventLoggerUsed $fastIP"	    eventLoggerPortUsed="$eventLoggerPortUsed $port"	    echo "EL $hostIP $fastIP $port"	    # Now we update the cpu list as well as the checkpointserver property	    #  if the node cannot perform both capabilities at the same time	    newcpulist=""	    cpuidx=0	    dc=`exec echo $dualCapabilityList | cut -d' ' -f $i`	    # We first update the procs used list	    	    for nprocs in $nprocuselist ; do		cpuidx=`expr $cpuidx + 1`		if [ $cpuidx = $i ] ; then		    cpuval=`expr $freecpus - 1`		    newcpulist="$newcpulist $cpuval"		    cspe="-1"		else		    newcpulist="$newcpulist $nprocs"		    cspe=`exec echo $checkpointServerPortList | cut -d" " -f $cpuidx`		fi		newCSPList="$newCSPList $cspe"	    done	    nprocuselist="$newcpulist"	    if [ "$dc" != "1" ] ; then checkpointServerPortList="$newCSPList"; fi ;	    elidx=`expr $elidx + 1`	    i=`expr $i + $elDistribution`	fi    done > "$xwpgfile"    # We now take care of the checkpoint servers    checkpointServerUsed=""    fastCheckpointServerUsed=""    checkpointServerPortUsed=""    csidx=0    csinc=0#DEBUG    csDistribution=1#    csNb=2#ENDDEBUG    i=1    csNb=$npCS    while [ "$csidx" -lt $csNb ] ; do	port=`exec echo $checkpointServerPortList | cut -d' ' -f $i`	freecpus=`exec echo $nprocuselist | cut -d' ' -f $i`	if [ "$port" = -1 -o "$freecpus" = 0 ] ; then	    i=`expr $i + 1` #TODO: check for infinite loops!	else	    hostName=`echo $machinelist | cut -d' ' -f $i`	    hostIP=`exec host $hostName | grep address | cut -d' ' -f 4`            getFastIP	    checkpointServerUsed="$checkpointServerUsed $hostIP"            fastCheckpointServerUsed="$fastCheckpointServerUsed $fastIP"	    checkpointServerPortUsed="$checkpointServerPortUsed $port"	    echo "CS $hostIP $fastIP $port $tmpCS"	    # Now we update the cpu list	    newcpulist=""	    cpuidx=0	    for nprocs in $nprocuselist ; do		cpuidx=`expr $cpuidx + 1`		if [ $cpuidx = $i ] ; then		    cpuval=`expr $freecpus - 1`		    newcpulist="$newcpulist $cpuval"		else		    newcpulist="$newcpulist $nprocs"		fi	    done	    nprocuselist="$newcpulist"	    csidx=`expr $csidx + 1`	    i=`expr $i + $csDistribution`	fi    done >> "$xwpgfile"    # Then the checkpoint scheduler(s)    checkpointSchedulerUsed=`echo $eventLoggerUsed | cut -d' ' -f 1`    fastCheckpointSchedulerUsed=""     {      hostIP="$checkpointSchedulerUsed"      getFastIP      fastCheckpointSchedulerUsed="$fastCheckpointSchedulerUsed $fastIP"       echo "SC $hostIP $fastIP $cschedPort"    } >> "$xwpgfile"#DEBUG    current_scs=$fastIP    current_scp=$cschedPort#DEBUG    rank=0    i=0  # The index of the machine in machinelist    firstpass=1    allMachines=""    while [ $rank -lt $np ] ; do	i=`expr $i + 1`	if [ $i -gt $nnodes ]; then	    i=1	    firstpass=0	fi	# No need to go further if there are no CPUs free on this machine	freecpus=`exec echo $nprocuselist | cut -d' ' -f $i`	if [ "$freecpus" = 0 ] ; then continue ; fi	machine=`exec echo $machinelist | cut -d' ' -f $i`	# As we would rather use as many machines as possible, try to avoid using	# the EL and CS machines, for the first pass only of course	if [ $firstpass -eq "1" ]; then	    eltest=`echo $eventLoggerUsed | grep -e ' $machine '`	    cstest=`echo $checkpointServerUsed | grep -e ' $machine '`	    if [ "$eltest" != "" -o "$cstest" != "" ]; then		continue	    fi	fi	elidx=`expr $rank / $rel + 1`	current_els=`exec echo $fastEventLoggerUsed | cut -d' ' -f $elidx`	current_elp=`exec echo $eventLoggerPortUsed | cut -d' ' -f $elidx`	cpidx=`expr $rank / $rcs + 1`	current_cps=`exec echo $fastCheckpointServerUsed | cut -d' ' -f $cpidx`	current_cpp=`exec echo $checkpointServerPortUsed | cut -d' ' -f $cpidx`	hostIP=`exec host "$machine" | grep address | cut -d' ' -f 4`        getFastIP        myPort=`expr $pComm + $rank`	echo "CN $rank $machine $hostIP $fastIP $myPort $current_els $current_elp $current_cps $current_cpp $current_scs $current_scp"        	# We now remove 1 processor from the machine in the nprocuselist variable	ncpus1=`exec echo $nprocuselist | cut -d' ' -f -$(($i-1))`	ncpus2=`exec echo $nprocuselist | cut -d' ' -f $(($i+1))-`	newnp=`expr $freecpus - 1`	nprocuselist="$ncpus1 $newnp $ncpus2"		rank=`expr $rank + 1`    done >> "$xwpgfile"    # We now write the command file    {        echo "pwd=$PWD"	echo "rshcmd=$rshcmd"	echo "cscmd=$cscmd"        echo "sccmd=$sccmd"	echo "elcmd=$elcmd"	echo "prog=$progname"        echo "wrapper=$wrapcmd"	echo "kill=$killcmd"	echo "just_testing=$just_testing"	echo "v2tmp=$v2_tmp"	echo "keep_v2file=$keep_xwfile"    } > $xwcommandsfile    # The avail file, specifying which nodes can be used, and with what options    IFS=" "    for node in $security ; do        IFS=":"	set -- $node	hostname=$1	ip=`exec host "$hostname" | grep address | cut -d' ' -f 4`	nprocs=$2	elp=$3	csp=$4	dc=$5	echo "$hostname $ip $nprocs $elp $csp $dc"    done > $xwavailfile    # make sure that all the files needed by runprog have been written    if [ ! -r "$xwpgfile" ] ; then        echo Failed to write "$xwpgfile" : Exiting.        exit 1    fi    if [ ! -r "$xwcommandsfile" ] ; then        echo Failed to write "$xwcommandsfile" : Exiting.        exit 1    fi    if [ ! -r "$xwavailfile" ] ; then        echo Failed to write "$xwavailfile" : Exiting.        exit 1    fi    # NOW we launch the program    if [ -z "$debug_command" ] ; then      debug_command=`exec host "$HOST" | grep address | cut -d' ' -f 4`      debug_command="$debug_command:1976"    fi     progArguments="$bindir/mpirun.v2run -f $xwpgfile -g $job_id -p $dispatcherPort -debug $debug_command"    if [ -n "$checkpointTime" ] ; then      progArguments="$progArguments -checkpoint $checkpointTime"    fi    if [ -n "$debugFile" ] ; then      progArguments="$progArguments -debugfile $debugFile"      echo $progArguments    fi    if [ -z "$noRun" ] ; then      eval $progArguments    fi    # when the execution is finished, it's time to clean    if [ $keep_xwfile -ne "1" ]; then      rm -f $xwpgfile      rm -f $xwcommandsfile      rm -f $xwavailfile    fifiexit $exitstatus

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -