📄 genericcheckpoint.h
字号:
/** @file genericCheckpoint.h defines the general protocol-independant API for checkpointing */#ifndef __INCLUDE_GENERIC_CHECKPOINT_H__#define __INCLUDE_GENERIC_CHECKPOINT_H__#include "config.h"#include "debug.h"#include <sys/types.h>#include <sys/socket.h>#include <netinet/in.h>typedef struct CktpSock{ int proto; int data; int file; int protocur; int datacur; char name[64];} CkptSock;/******************************************************************************//** Fills CkptSock struct with some default values indicating that it is not * connected * @param s: the struct to fill */void initCheckpointSock(CkptSock *s);/** Connect to checkpoint server * @param s: CkptSock struct to modify * @param addr: address of the checkpoint server * @return -1 on error, 0 on success. */int connectCheckpointServer(CkptSock *s, const struct sockaddr *addr);/** open local checkpoint file for writting checkpoint data * @param s: CkptSock struct to modify * @param group: group of this MPI application * @param rank: rank of this MPI process * @param seq: sequence number of this checkpoint. -1 if sequence number has no * meaning in your high level implementation. * @return like open. */int openWCheckpointLocalFile(CkptSock *s, int group, int rank, int seq);/** downloads the checkpoint into cin->pipe file (cin being obtained through prestart_begin with a nice side effect) * synchronous * assumes that it is already connected to a server * called from chl of daemons when restarting * @param fd: the file descriptor to download the checkpoint to * @return -1 if error, 0 otherwise */int downloadCheckpoint();/** open local checkpoint file for writting checkpoint data * @param s: CkptSock struct to modify * @param group: group of this MPI application * @param rank: rank of this MPI process * @param seq: sequence number of this checkpoint. -1 if sequence number has no * meaning in your high level implementation. * @return like open. */int openRCheckpointLocalFile(CkptSock *s, int group, int rank, int seq);/** Acknowledge protocol, after this function has been called, the checkpoint image can be * considered to be safely stored on the checkpoint server if any, and on * the local file, if any. * @param s: Checkpoint descriptor set to use * @param totalsize: total data size of the checkpoint data transmitted to server * @return -1 if confirmation failed from checkpoint server if requested, * -2 if confirmation failed from local checkpoint file * (if requested, this error status means that checkpoint server * successfully recieved image) * 0 when no error occured. */int sendCheckpointConfirm(CkptSock *s, int totalsize);/** Close sockets to checkpoint server. * @param s: socket set associated to this checkpoint * @return like close on error, 0 otherwise. */int closeCheckpointServer(CkptSock *s);int closeCheckpointLocalFile(CkptSock *s);/* implementation independant protocol functions */int putCheckpointProto(CkptSock *s, int mygroup, int myrank, int seqnumber, int protosize);int getCheckpointProto(CkptSock *s, int *mygroup, int *myrank, int *seqnumber, int *protosize, int *totalsize);/* send/recv functions */int sendCheckpointProtoData(CkptSock *s, const void *buffer, int size);int syncSendCheckpointProtoData(CkptSock *s, const void *buffer, int size);int sendCheckpointImageData(CkptSock *s, const void *buffer, int size);int syncSendCheckpointImageData(CkptSock *s, const void *buffer, int size);int recvCheckpointProtoData(CkptSock *s, void *buffer, int size);int syncRecvCheckpointProtoData(CkptSock *s, void *buffer, int size);int recvCheckpointImageData(CkptSock *s, void *buffer, int size);int syncRecvCheckpointImageData(CkptSock *s, void *buffer, int size);#endif /* __INCLUDE_GENERIC_CHECKPOINT_H__ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -