📄 launchprocess.cpp
字号:
sprintf(pszError, "Unable to read the result of the geterror command on '%s'\r\nError %d", arg->pszHost, WSAGetLastError()); MessageBox(NULL, pszError, "Critical Error", MB_OK); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (strcmp(pszStr, "ERROR_SUCCESS")) { if (arg->i == 0 && !arg->pDlg->m_bNoMPI) { sprintf(pszError, "Failed to launch the root process:\n%s\n%s\n", arg->pszCmdLine, pszStr); } else { sprintf(pszError, "Failed to launch process %d:\n'%s'\n%s\n", arg->i, arg->pszCmdLine, pszStr); } //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); MessageBox(NULL, pszError, "Critical Error", MB_OK); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } } // Get the port number and redirect input to the first process if (arg->i == 0 && !arg->pDlg->m_bNoMPI) { /* // Check if the root process is alive sprintf(pszStr, "getexitcode %d", launchid); if (WriteString(sock, pszStr) == SOCKET_ERROR) { printf("Error: Unable to send a getexitcode command to '%s'\r\nError %d", arg->pszHost, WSAGetLastError());fflush(stdout); easy_closesocket(sock); SetEvent(g_hAbortEvent); delete arg; return; } if (!ReadStringTimeout(sock, pszStr, g_nLaunchTimeout)) { printf("ERROR: Unable to read the result of the root getexitcode command on '%s': error %d", arg->pszHost, WSAGetLastError()); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(g_hAbortEvent); delete arg; return; } if (stricmp(pszStr, "ACTIVE") != 0) { printf("ERROR: Root process has unexpectedly exited.\n"); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(g_hAbortEvent); delete arg; return; } */ // barrier to let the root process do the put sprintf(pszStr, "barrier name=%s count=2", arg->pszJobID); if (WriteString(sock, pszStr) == SOCKET_ERROR) { printf("ERROR: Unable to write the barrier command: error %d", WSAGetLastError()); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } /* if (!ReadString(sock, pszStr)) { printf("ERROR: Unable to read the result of the barrier command on '%s': error %d", arg->pszHost, WSAGetLastError()); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } */ if (!ReadStringTimeout(sock, pszStr, g_nLaunchTimeout)) { error = WSAGetLastError(); if (error != 0) { printf("ERROR: Unable to read the result of the barrier command on '%s': error %d", arg->pszHost, error); } else { sprintf(pszStr, "getexitcode %d", launchid); if (WriteString(sock, pszStr) == SOCKET_ERROR) { printf("Error: Unable to send a getexitcode command to '%s'\r\nError %d", arg->pszHost, WSAGetLastError());fflush(stdout); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (!ReadStringTimeout(sock, pszStr, g_nLaunchTimeout)) { printf("ERROR: Unable to read the result of the root getexitcode command on '%s': error %d", arg->pszHost, WSAGetLastError()); //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (stricmp(pszStr, "ACTIVE") == 0) { printf("ERROR: timed-out waiting for the root process to call MPI_Init\n"); if (g_bUseJobHost) { // Save this process's information to the job database PutJobProcessInDatabase(arg, nPid); } } else { printf("ERROR: The root process has unexpectedly exited.\n"); if (g_bUseJobHost) { sprintf(pszStr, "geterror %d", launchid); WriteString(sock, pszStr); pszStr[0] = '\0'; ReadStringTimeout(sock, pszStr, MPD_DEFAULT_TIMEOUT); // Save this process's information to the job database PutJobProcessInDatabase(arg, nPid); UpdateJobKeyValue(0, "error", pszStr); } //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); } WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (strncmp(pszStr, "SUCCESS", 8)) { printf("ERROR: barrier failed on '%s':\n%s", arg->pszHost, pszStr); //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } // after the barrier, the data is available so do the get sprintf(pszStr, "dbget name=%s key=port", pszStartupDB); if (WriteString(sock, pszStr) == SOCKET_ERROR) { printf("ERROR: Unable to write '%s': error %d", pszStr, WSAGetLastError()); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (!ReadString(sock, pszStr)) { printf("ERROR: Unable to get the root port: error %d", WSAGetLastError()); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (strncmp(pszStr, DBS_FAIL_STR, strlen(DBS_FAIL_STR)+1) == 0) { printf("ERROR: Unable to get the root port:\n%s", pszStr); //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } // save the gotten data arg->pDlg->m_nRootPort = atoi(pszStr); // destroy the database since it is no longer necessary sprintf(pszStr, "dbdestroy name=%s", pszStartupDB); if (WriteString(sock, pszStr) == SOCKET_ERROR) { printf("ERROR: Unable to write '%s' to socket[%d]\n", pszStr, sock); //ExitProcess(0); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } // read result if (!ReadString(sock, pszStr)) { printf("ERROR: ReadString failed to read the result of dbdestroy: error %d\n", WSAGetLastError()); //ExitProcess(0); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } if (strnicmp(pszStr, DBS_FAIL_STR, strlen(DBS_FAIL_STR)+1) == 0) { printf("Unable to destroy the database '%s' on '%s'\n%s", pszStartupDB, arg->pszHost, pszStr);fflush(stdout); //UnmapDrives(sock, arg->pDlg->m_pDriveMapList); sprintf(pszStr, "freeprocess %d", launchid); WriteString(sock, pszStr); ReadString(sock, pszStr); WriteString(sock, "done"); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } } if (g_bUseJobHost) { // Save this process's information to the job database PutJobProcessInDatabase(arg, nPid); } // Start to wait for the process to exit sprintf(pszStr, "getexitcodewait %d", launchid); if (WriteString(sock, pszStr) == SOCKET_ERROR) { sprintf(pszError, "Unable to send a getexitcodewait command to '%s'\r\nError %d", arg->pszHost, WSAGetLastError()); MessageBox(NULL, pszError, "Critical Error", MB_OK); easy_closesocket(sock); SetEvent(arg->pDlg->m_hAbortEvent); delete arg; return; } i = InterlockedIncrement(&arg->pDlg->m_nNumProcessSockets) - 1; arg->pDlg->m_pProcessSocket[i] = sock; arg->pDlg->m_pProcessLaunchId[i] = launchid; arg->pDlg->m_pLaunchIdToRank[i] = arg->i; } else { sprintf(pszError, "MPIRunLaunchProcess: Connect to %s failed, error: %s\n", arg->pszHost, pszStr); MessageBox(NULL, pszError, "Critical Error", MB_OK); SetEvent(arg->pDlg->m_hAbortEvent); } memset(arg->pszPassword, 0, 100); delete arg;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -