📄 qcdat.cpp
字号:
if (MSLevel<=1)
{
scan_start_ptr += 50;
continue;
}
if (ScanNumber != the_scan)
{
scan_start_ptr += 150;
continue;
}
precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
if (! precursorIntensityStr)
{
cout << "Warning: no precursor intensity found for scan " << ScanNumber << " pos: " << BufferPos << endl;
scan_start_ptr += 50;
continue;
}
else
{
precursorIntensity = ParseMassFromXML(precursorIntensityStr);
}
PrecursorStr = strstr(ScanStr, "<precursorMz");
if (PrecursorStr)
{
PrecursorStr = strstr(PrecursorStr, ">");
PrecursorMZ = ParseMassFromXML(PrecursorStr);
}
if (!PrecursorStr && MSLevel > 1)
{
printf("Warning: mzXML parser encountered a scan with no m/z: File %s Pos %d\n",
mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
scan_start_ptr += 50;
continue;
}
// check that this is a good spectrum to output
if (MSLevel>1 && PeakCount>2)
{
// read peaks
PeakStr = strstr(PrecursorStr, "<peaks");
if (PeakStr)
{
// Get byte order:
ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
if (ByteOrderStr)
{
ByteOrderStr += 11;
if (!strncmp(ByteOrderStr, "network", 7))
{
ByteOrderLittle = 0;
}
if (!strncmp(ByteOrderStr, "big", 3))
{
ByteOrderLittle = 0;
}
if (!strncmp(ByteOrderStr, "little", 6))
{
ByteOrderLittle = 1;
}
}
PeakStr = strstr(PeakStr, ">");
}
if (!PeakStr)
{
cout << "Warning couldn't find peaks tag for scan: " << ScanNumber << " Buffer pos: " << BufferPos << " skipping..." << endl;
scan_start_ptr += 50;
continue;
}
PeakStr++;
PeakBuffer = PeakStr;
if (PeakBufferSize < PeakCount)
{
if (DecodedPeakBuffer)
{
char *dbf = DecodedPeakBuffer;
free(DecodedPeakBuffer);
DecodedPeakBuffer = NULL;
free(Peaks);
Peaks = NULL;
free(FilteredPeaks);
FilteredPeaks=NULL;
}
PeakBufferSize = (int)(PeakCount*1.5);
DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
}
Trail = (PeakCount % 3);
if (!(PeakCount % 3))
{
PeakBuffer[PeakCount * 32/3] = '\0';
}
else
{
PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
}
// cout << "dd " << spec_counter << " " << ScanNumber << endl;
b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
{
#ifdef BYTEORDER_LITTLE_ENDIAN
if (!ByteOrderLittle)
{
char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
}
memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
#else
if (ByteOrderLittle)
{
char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
}
memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
#endif
}
// add spectrum
int i;
for (i=0; i<FloatIndex; i+=2)
{
cout << fixed << setprecision(3) << Peaks[i] << " " << Peaks[i+1] << endl;
}
exit(0);
// join and filter peaks
}
else
scan_start_ptr = ScanStr +50;
}
fclose(MZXMLFile);
cout << spec_counter << " spectra..." << endl << endl;
return spec_counter;
}
int DAT_Converter::parse_annotated_spectra_from_single_MZXML(
Config *config,
string& mzxml_name,
int file_idx,
map<mzXML_annotation,int>& ann_map)
{
static char* Buffer = NULL;
static char* DecodedPeakBuffer = NULL;
static float* Peaks = NULL;
static float* FilteredPeaks = NULL;
static int PeakBufferSize = 0;
int Trail;
static char* PrecursorStr;
int FloatIndex;
char* ByteOrderStr;
int ByteOrderLittle = 1;
int BytesToRead;
int BufferStartPos = 0;
int BytesRead;
int BufferEnd = 0;
FILE* MZXMLFile;
int ParseState = 0;
int FilePos = 0;
// allocate
if (! Buffer)
Buffer = (char*)calloc(XML_BUFFER_SIZE + 1, sizeof(char));
MZXMLFile = fopen(mzxml_name.c_str(), "rb");
if (!MZXMLFile)
{
cout << "Error: Can't open MZXML file " << mzxml_name << endl;
exit(1);
}
printf("File idx %d , '%s'...\n",file_idx, mzxml_name.c_str());
int idx_in_file = 0;
int spec_counter =0;
char *scan_start_ptr = NULL;
while (1)
{
char* ScanStr;
char* ScanNumberStr;
int ScanNumber;
char* MSLevelStr;
int MSLevel;
char *PrecursorStr;
mass_t PrecursorMZ;
char *retentionTimeStr;
float retentionTime;
char *precursorIntensityStr;
float precursorIntensity;
char* PeakStr;
char* PeakBuffer;
int BufferPos;
// Read more data, to fill up the buffer:
if ( ! scan_start_ptr ||
( (Buffer + BufferEnd - scan_start_ptr) < XML_BUFFER_HALF_SIZE) )
{
// try shunt half of the buffer
if (scan_start_ptr)
{
if (BufferEnd - XML_BUFFER_HALF_SIZE>0)
{
memmove(Buffer, Buffer + XML_BUFFER_HALF_SIZE, BufferEnd - XML_BUFFER_HALF_SIZE);
BufferEnd -= XML_BUFFER_HALF_SIZE;
scan_start_ptr -= XML_BUFFER_HALF_SIZE;
// cout << "MOVED!" << endl;
}
}
else
scan_start_ptr = Buffer;
BytesToRead = XML_BUFFER_SIZE - BufferEnd;
BytesRead = fread(Buffer + BufferEnd, sizeof(char), BytesToRead, MZXMLFile);
if (BytesRead<5)
break;
BufferEnd += BytesRead;
Buffer[BufferEnd] = '\0';
FilePos += BytesRead;
}
// Look for a new <scan tag opening:
// this scan cannot be done with strstr since there might be NULL termination
const char *last_pos = Buffer + BufferEnd - 5;
char *pos = scan_start_ptr;
while (++pos<last_pos)
{
if (*pos != '<')
continue;
if (*(pos+1)=='s' && *(pos+2)=='c' && *(pos+3)=='a' && *(pos+4)=='n')
break;
}
ScanStr = (pos<last_pos) ? pos : NULL;
if (ScanStr)
{
// if this is the case, read over more data to avoid the case where
// the spectrum's record is not all in the buffer
if (scan_start_ptr - Buffer > XML_BUFFER_HALF_SIZE)
{
scan_start_ptr = ScanStr-2;
continue;
}
BufferPos = ScanStr - Buffer;
}
else
{
BufferPos = 0;
}
if (!ScanStr )
{
scan_start_ptr = Buffer + BufferEnd-5;
continue;
}
ScanNumberStr = strstr(ScanStr, "num=");
if (!ScanNumberStr)
{
// printf("** Warning: mzXML parser encountered a scan with no scan number! File %s Pos %d\n",
// mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
ScanNumber = -1;
}
else
{
ScanNumber = ParseIntFromXML(ScanNumberStr);
scan_start_ptr = ScanNumberStr;
}
retentionTimeStr = strstr(ScanStr,"retentionTime=\"PT");
if (! retentionTimeStr)
{
retentionTime = -1;
}
else
{
retentionTime = ParseMassFromXML(retentionTimeStr);
}
char *PeakCountStr = strstr(ScanStr, "peaksCount=\"");
if (!PeakCountStr)
{
cout << "Warning: bad parsing peaks in mzxml. " << endl;
cout << "Scan: " << ScanNumber << " Pos: " << FilePos << endl;
scan_start_ptr += 50;
continue;
}
int PeakCount = ParseIntFromXML(PeakCountStr);
MSLevelStr = strstr(ScanStr, "msLevel=");
if (!MSLevelStr)
{
printf("** Warning: mzXML parser encountered a scan with no scan level! File %s Pos %d\n",
mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
scan_start_ptr += 50;
continue;
}
else
{
MSLevel = ParseIntFromXML(MSLevelStr);
}
if (MSLevel<=1)
{
scan_start_ptr += 50;
continue;
}
precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
if (! precursorIntensityStr)
{
cout << "Warning: no precursor intensity found for scan " << ScanNumber << " pos: " << BufferPos << endl;
scan_start_ptr += 50;
continue;
}
else
{
precursorIntensity = ParseMassFromXML(precursorIntensityStr);
}
PrecursorStr = strstr(ScanStr, "<precursorMz");
if (PrecursorStr)
{
PrecursorStr = strstr(PrecursorStr, ">");
PrecursorMZ = ParseMassFromXML(PrecursorStr);
}
if (!PrecursorStr && MSLevel > 1)
{
printf("Warning: mzXML parser encountered a scan with no m/z: File %s Pos %d\n",
mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
scan_start_ptr += 50;
continue;
}
// check if this spectrum is in the list of annotated spectra
// if not, don't output it.
bool is_annotated_spectrum = false;
map<mzXML_annotation,int>::const_iterator it;
mzXML_annotation ann_pos;
ann_pos.mzXML_file_idx = file_idx;
ann_pos.scan = ScanNumber;
it = ann_map.find(ann_pos);
if (it != ann_map.end())
is_annotated_spectrum = true;
// check that this is a good spectrum to output
if (MSLevel>1 && PeakCount>2 && is_annotated_spectrum)
{
// read peaks
PeakStr = strstr(PrecursorStr, "<peaks");
if (PeakStr)
{
// Get byte order:
ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
if (ByteOrderStr)
{
ByteOrderStr += 11;
if (!strncmp(ByteOrderStr, "network", 7))
{
ByteOrderLittle = 0;
}
if (!strncmp(ByteOrderStr, "big", 3))
{
ByteOrderLittle = 0;
}
if (!strncmp(ByteOrderStr, "little", 6))
{
ByteOrderLittle = 1;
}
}
PeakStr = strstr(PeakStr, ">");
}
if (!PeakStr)
{
cout << "Warning couldn't find peaks tag for scan: " << ScanNumber << " Buffer pos: " << BufferPos << " skipping..." << endl;
scan_start_ptr += 50;
continue;
}
PeakStr++;
PeakBuffer = PeakStr;
if (PeakBufferSize < PeakCount)
{
if (DecodedPeakBuffer)
{
char *dbf = DecodedPeakBuffer;
free(DecodedPeakBuffer);
DecodedPeakBuffer = NULL;
free(Peaks);
Peaks = NULL;
free(FilteredPeaks);
FilteredPeaks=NULL;
}
PeakBufferSize = (int)(PeakCount*1.5);
DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
}
Trail = (PeakCount % 3);
if (!(PeakCount % 3))
{
PeakBuffer[PeakCount * 32/3] = '\0';
}
else
{
PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
}
b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
{
#ifdef BYTEORDER_LITTLE_ENDIAN
if (!ByteOrderLittle)
{
char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
}
memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
#else
if (ByteOrderLittle)
{
char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
}
memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
#endif
}
// add spectrum
int DAT_file_idx = (int)(PrecursorMZ/mass_increment);
if (DAT_file_idx > max_dat_file_idx)
DAT_file_idx = max_dat_file_idx;
if (! dat_buffs[DAT_file_idx].ind_was_initialized)
{
ostringstream os, os_batch;
os << DAT_file_idx;
os_batch << batch;
string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
dat_buffs[DAT_file_idx].init(path,dat_buff_size);
}
// join and filter peaks
int new_num_peaks = join_and_filter_peak_list(config,PrecursorMZ,Peaks,
PeakCount,FilteredPeaks);
if (new_num_peaks>3)
{
dat_buffs[DAT_file_idx].add_spec_to_DAT_file(PrecursorMZ, 0, file_idx, ScanNumber, retentionTime,
precursorIntensity, new_num_peaks, (char *)FilteredPeaks);
spec_counter++;
}
scan_start_ptr = PeakStr + 8 * PeakCount;
}
else
scan_start_ptr = ScanStr +50;
}
fclose(MZXMLFile);
cout << spec_counter << " spectra..." << endl << endl;
return spec_counter;
}
/*******************************************************************************
Create DAT files only for annoated spectra
********************************************************************************/
void DAT_Converter::create_dat_files_for_anns(Config *config, char *mzXML_list, char *anns_file,
char *_out_dir, char *_name)
{
map<mzXML_annotation,int> ann_map;
read_mzXML_annotations_to_map(anns_file,ann_map);
vector<string> list;
read_paths_into_list(mzXML_list,list);
init_DAT_Converter((float)2000.0,(float)20.0,524288);
name = _name;
out_dir = _out_dir;
batch = 0;
int num_spectra_parsed=0;
int f;
for (f=0; f<list.size(); f++)
{
num_spectra_parsed += parse_annotated_spectra_from_single_MZXML(
config,
list[f],
f,
ann_map);
}
int d;
for (d=0; d<dat_buffs.size(); d++)
{
if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
dat_buffs[d].flush_buff();
}
cout << "Total spectra extracted and converted to DAT: " << num_spectra_parsed << endl;
create_list_file();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -