⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vprivcheckpoint.c

📁 Path MPICH-V for MPICH the MPI Implementation
💻 C
字号:
/*  MPICH-V  Copyright (C) 2002, 2003 Groupe Cluster et Grid, LRI, Universite de Paris Sud  This file is part of MPICH-V.  MPICH-V is free software; you can redistribute it and/or modify  it under the terms of the GNU General Public License as published by  the Free Software Foundation; either version 2 of the License, or  (at your option) any later version.  MPICH-V is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  GNU General Public License for more details.  You should have received a copy of the GNU General Public License  along with MPICH-V; if not, write to the Free Software  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  $Id: vprivcheckpoint.c,v 1.5 2006/01/24 19:35:01 rodrigue Exp $*/#include "config.h"#include "vprivcheckpoint.h"#include <stdlib.h>#include <stdio.h>#include <unistd.h>#include <signal.h>#include <sys/types.h>#include <sys/stat.h>#include <sys/time.h>#include <fcntl.h>#include <wait.h>#include <time.h>#include <libcr.h>#include "debug.h"#include "mpid.h"#include "mpiddev.h"#include "mpimem.h"extern void VPreCheckpoint(void);extern void VPostCheckpoint(void);static int VGROUP;static int VRANK;static int blcr_is_restart = 0;static pid_t cpid;static struct sigaction soa;static void on_child(int s, siginfo_t *i, void *ptr){  pid_t p;  for(;;)    {      p = waitpid(-1, NULL, WNOHANG);      if( p <= 0 )	break;      if(p == cpid)	{	  cpid = 0;	  sigaction(SIGCHLD, &soa, NULL);	}      else	{	  if( soa.sa_flags & SA_SIGINFO )	    {	      if( (soa.sa_sigaction != (void*)SIG_DFL) && (soa.sa_sigaction != (void*)SIG_IGN) && (soa.sa_sigaction != NULL) )		soa.sa_sigaction(s, i, ptr);	    }	  else	    {	      if( (soa.sa_handler != SIG_DFL) && (soa.sa_handler != SIG_IGN) && (soa.sa_handler != NULL) )		soa.sa_handler(s);	    }	}    }}static int blcr_on_checkpoint(void* arg) {   FILE *f_debug;  blcr_is_restart = cr_checkpoint(CR_CHECKPOINT_READY);  f_debug = fopen("/tmp/debug.log", "a");  fprintf(f_debug, "%d call of the callback: %p, %d\n", getpid(), &blcr_is_restart, blcr_is_restart);   if(blcr_is_restart)    fprintf(f_debug, "We have been restarted callback\n");  else     fprintf(f_debug, "We are continuing\n");    fprintf(f_debug, "out of callback : blcr_is_restart = %p, %d\n", &blcr_is_restart, blcr_is_restart);  fclose(f_debug);    return 0;}/**  *  Open pipe for asynchronous read  *  @return -1 on error, 0 on success  */int vprivcheckpoint_init(int group, int rank){  VGROUP = group;  VRANK = rank;  printi("ckpt", "Initializing BLCR driver checkpoint lib for %d:%d\n", VGROUP, VRANK);  if( cr_init() < 0 )    qerror("unable to initialize BLCR in MPI application: %s", cr_strerror(errno));  return 0;}/**  * Perform actual checkpoint   * @return -1 on failure, 0 on success  */int vprivcheckpoint_performcheckpoint(int ckpt_wait){  FILE *f_debug;  struct timespec req;  struct sigaction sa;  char filename[256];  sprintf(filename, TMPDIR"/%d:%d.ckpt.pipe", VGROUP, VRANK);   printi("ckpt", "before signal treatment start");    sa.sa_sigaction = on_child;  sigemptyset(&sa.sa_mask);  sa.sa_flags = SA_SIGINFO;  if( sigaction(SIGCHLD, &sa, &soa) == -1 )    printe("sigaction SIGCHLD");  /*  generate a clone of the process, close all opened socket, write       checkpoint image and exit the clone */  switch(cpid = fork())    {    case 0 :      printi("ckpt", "Child process launched");      /*  let's go for checkpoint  */      printi("ckpt", "let s go for checkpoint lib calls");       if( cr_init() < 0 )	printe("cr_init failed: %s", cr_strerror(errno));      else	printi("cr_init", "forked process initiated");      VPreCheckpoint();      /*  finaly we let BLCR proceed to the checkpoint  */      f_debug=fopen("/tmp/debug.log", "w");         fprintf(f_debug, "checkpoint by BLCR\n");         fprintf(f_debug, "call of blcr init done\n");      cr_register_callback(blcr_on_checkpoint, NULL, CR_SIGNAL_CONTEXT);      fprintf(f_debug, "CKPT_FILENAME = %s\n", filename?filename:"(null)\n");      fclose(f_debug);	       cr_request_file (filename);      f_debug = fopen("/tmp/debug.log", "a");      fprintf(f_debug, "the lib blcr fonctions are called (blcr_is_restart = %p, %d)\n", 	      &blcr_is_restart, blcr_is_restart);        if(!blcr_is_restart)	{	  for(;;) {	    if( cr_status() == CR_STATE_IDLE )	      break;	    fprintf(f_debug, "Still checkpointing : status = %d\n", cr_status());	    req.tv_sec = 1;	    req.tv_nsec = 0;	    nanosleep(&req, NULL);	  }	  fprintf(f_debug, "This is not a restart: exiting\n");	  fclose(f_debug);	  _exit(0);	}      fprintf(f_debug, "this is a restart\n");      blcr_is_restart = 0;      fclose(f_debug);      VPostCheckpoint();              printi("ckpt", "Restart completed");      return 0;    case -1 :      return -1;    default :      if (ckpt_wait)        {          printi("ckpt","Waiting for clone to checkpoint");          while( cpid != 0 ) 	    pause();        }    }  printi("ckpt", "checkpoint %s", ckpt_wait?"complete":"running");  return 0;}/**  * Close and free all internaly used data structures  * @return -1 on error, 0 on success  */int vprivcheckpoint_finalize(void){  printi("ckpt", "checkpoint_finalize: pipe closed");  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -