📄 startmulticoremaster.m
字号:
% Leave while-loop immediately after result was loaded. Semaphore
% will be removed below.
break
end
% Check if the processing time (current time minus time stamp of
% working file) exceeds the maximum wait time. Still using the
% semaphore of the parameter file from above.
if existfile(workingFileName)
if debugMode
disp(sprintf('Master found working file nr %d.', curFileNr));
end
% Check if the job timed out by getting the time when the slave
% started working on that file. If the job has timed out, the
% master will do the job.
jobTimedOut = mbtime - getfiledate(workingFileName) * 86400 > maxMasterWaitTime;
else
% No working file has been found. The loop is immediately left
% and the master will do the job.
if showFileAccessWarnings
disp(sprintf('Warning: Working file %s not found.', workingFileName));
end
jobTimedOut = true;
end
if jobTimedOut
if debugMode
disp(sprintf('Job nr %d has timed out.', curFileNr));
end
% As the slave process seems to be dead or too slow, the master
% will do the job itself (semaphore will be removed below).
break
else
if debugMode
disp(sprintf('Job nr %d has NOT timed out.', curFileNr));
end
end
% If the job did not time out, remove semaphore and wait a moment
% before checking again
removefilesemaphore(sem);
if debugMode
disp(sprintf('Waiting for result (file nr %d).', curFileNr));
end
pause(curPauseTime);
curPauseTime = min(maxPauseTime, curPauseTime + startPauseTime);
end % while 1
end % if parameterFileExisting
% remove semaphore
removefilesemaphore(sem);
% evaluate function if the result could not be loaded
if ~resultLoaded
if debugMode
disp(sprintf('Master evaluates job nr %d.', curFileNr));
t0 = mbtime;
end
for k = parIndex
if debugMode
%fprintf(' %d,', k);
end
if iscell(parameters)
resultCell{k} = feval(functionHandleCell{k}, parameterCell{k}{:});
else
resultCell{k} = feval(functionHandleCell{k}, parameterCell{k});
end
end
nrOfFilesMaster = nrOfFilesMaster + 1;
waitbar__('update', nrOfFiles, nrOfFilesMaster, nrOfFilesSlaves);
if debugMode
disp(sprintf('Master finished job nr %d in %.2f seconds.', curFileNr, mbtime - t0));
end
end
% move to next file
fileNr = fileNr + 1;
if debugMode
disp(sprintf('Moving to next file (%d -> %d).', curFileNr, curFileNr + 1));
end
end % if masterIsWorker
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Check if all work is done %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (fileNr - 1) is the number of the file/job that was last computed/loaded when
% working down the list from top to bottom.
% (fileNr2 + 1) is the number of the file/job that was last computed/loaded when
% checking for results from bottom to top.
if (fileNr - 1) + 1 == (fileNr2 + 1)
% all results have been collected, leave big while-loop
if debugMode
disp('********************************');
disp(sprintf('All work is done (fileNr = %d, fileNr2 = %d).', fileNr, fileNr2));
end
break
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% work down the file list from bottom to top and collect results %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if debugMode
disp(sprintf('********** 2. Working from bottom to top (file nr %d)', fileNr2));
end
curPauseTime = startPauseTime;
while 1 % in this while-loop, fileNr2 will be decremented if results are found
if fileNr2 < 1
% all work is done
if debugMode
disp('********************************');
disp(sprintf('All work is done (fileNr2 = %d).', fileNr2));
end
break
end
curFileNr = fileNr2; % for simpler copy&paste
parameterFileName = strrep(parameterFileNameTemplate, 'XX', sprintf('%04d', curFileNr));
resultFileName = strrep(parameterFileName, 'parameters', 'result' );
workingFileName = strrep(parameterFileName, 'parameters', 'working');
parIndex = ((curFileNr-1)*nrOfEvalsAtOnce+1) : min(curFileNr*nrOfEvalsAtOnce, nrOfEvals);
% set semaphore (only for the parameter file to reduce overhead)
sem = setfilesemaphore(parameterFileName);
% Check if the result is available (the semaphore file of the
% parameter file is used for the following file accesses of the
% result file)
if existfile(resultFileName)
[result, resultLoaded] = loadresultfile__(resultFileName, showFileAccessWarnings);
if resultLoaded && debugMode
disp(sprintf('Result file nr %d loaded.', curFileNr));
end
else
resultLoaded = false;
if debugMode
disp(sprintf('Result file nr %d was not found.', curFileNr));
end
end
if resultLoaded
% Result was successfully loaded. Remove semaphore.
removefilesemaphore(sem);
% save result
resultCell(parIndex) = result;
nrOfFilesSlaves = nrOfFilesSlaves + 1;
waitbar__('update', nrOfFiles, nrOfFilesMaster, nrOfFilesSlaves);
% reset variables
parameterFileFoundTime = NaN;
curPauseTime = startPauseTime;
parameterFileRegCounter = 0;
% decrement fileNr2
fileNr2 = fileNr2 - 1;
% check if all work is done
if (fileNr - 1) + 1 == (fileNr2 + 1)
% all results have been collected
break
else
if debugMode
disp(sprintf('***** Moving to next file (%d -> %d).', curFileNr, curFileNr-1));
end
% move to next file
continue
end
else
% Result was not available.
% Check if parameter file is existing.
parameterFileExisting = existfile(parameterFileName);
% Check if job timed out.
if parameterFileExisting
if debugMode
disp(sprintf('Parameter file nr %d was existing.', curFileNr));
end
% If the parameter file is existing, no other process has started
% working on that job yet, which is most of the times normal.
if ~isnan(parameterFileFoundTime)
% If parameterFileFoundTime is not NaN, the same parameter file
% has been found before. Now check if the job has timed out,
% i.e. no slave process seems to be alive.
jobTimedOut = mbtime - parameterFileFoundTime > maxMasterWaitTime;
else
% Remember the current time to decide later if the job has timed out.
parameterFileFoundTime = mbtime;
jobTimedOut = false;
end
else
if debugMode
disp(sprintf('Parameter file nr %d was NOT existing.', curFileNr));
end
% Parameter file has been taken by a slave, who should be working
% on the job.
if existfile(workingFileName)
if debugMode
disp(sprintf('Master found working file nr %d.', curFileNr));
end
% Check if the job has timed out using the time stamp of the
% working file.
jobTimedOut = mbtime - getfiledate(workingFileName) * 86400 > maxMasterWaitTime;
else
% Parameter file has been taken but no working file has been
% generated, which is not normal. The master will generate the
% parameter file again or do the job.
if showFileAccessWarnings
disp(sprintf('Warning: Working file %s not found.', workingFileName));
end
jobTimedOut = true;
end
end % if parameterFileExisting
% Do the job or generate parameter file again if job has timed out.
if jobTimedOut
if debugMode
disp(sprintf('Job nr %d has timed out.', curFileNr));
end
if parameterFileExisting
% The job timed out and the parameter file was existing, so
% something seems to be wrong. A possible reason is that no
% slaves are alive anymore. The master will do the job.
% Remove parameter file so that no other slave process can load it.
mbdelete(parameterFileName, showFileAccessWarnings);
if debugMode
disp(sprintf('Parameter file nr %d deleted by master.', curFileNr));
end
else
% The job timed out and the parameter file was not existing.
% A possible reason is that a slave process was killed while
% working on the current job (if a slave is still working on
% the job and is just too slow, the parameter maxEvalTimeSingle
% should be chosen higher). The parameter file is generated
% again, hoping that another slave will finish the job. If all
% slaves are dead, the master will later do the job.
functionHandles = functionHandleCell(parIndex); %#ok
parameters = parameterCell (parIndex); %#ok
save(parameterFileName, 'functionHandles', 'parameters');
parameterFileRegCounter = parameterFileRegCounter + 1;
if debugMode
disp(sprintf('Parameter file nr %d was generated again (%d. time).', ...
curFileNr, parameterFileRegCounter));
end
end
% Remove semaphore.
removefilesemaphore(sem);
if parameterFileExisting || parameterFileRegCounter > 2
% The current job has timed out and the parameter file was not
% generated again OR the same parameter file has been
% re-generated several times ==> The master will do the job.
if debugMode
disp(sprintf('Master evaluates job nr %d.', curFileNr));
t0 = mbtime;
end
for k = parIndex
if iscell(parameters)
resultCell{k} = feval(functionHandleCell{k}, parameterCell{k}{:});
else
resultCell{k} = feval(functionHandleCell{k}, parameterCell{k});
end
end
nrOfFilesMaster = nrOfFilesMaster + 1;
waitbar__('update', nrOfFiles, nrOfFilesMaster, nrOfFilesSlaves);
if debugMode
disp(sprintf('Master finished job nr %d in %.2f seconds.', curFileNr, mbtime - t0));
end
% result has been computed, move to next file
fileNr2 = fileNr2 - 1;
% Reset number of times the current parameter file was generated
% again
parameterFileRegCounter = 0;
if debugMode
disp(sprintf('Moving to next file (%d -> %d).', curFileNr, curFileNr-1));
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -