⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qcdat.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:

		if (MSLevel<=1)
		{
			scan_start_ptr += 50;
			continue;
		}

		if (ScanNumber != the_scan)
		{
			scan_start_ptr += 150;
			continue;
		}



		precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
		if (! precursorIntensityStr)
		{
			cout << "Warning: no precursor intensity found for scan " << ScanNumber << " pos: " << BufferPos << endl;
			scan_start_ptr += 50;
			continue;
		}
		else
		{
			precursorIntensity = ParseMassFromXML(precursorIntensityStr);
		}


		PrecursorStr = strstr(ScanStr, "<precursorMz");
		if (PrecursorStr)
		{
			PrecursorStr = strstr(PrecursorStr, ">");
			PrecursorMZ = ParseMassFromXML(PrecursorStr);
		}

		if (!PrecursorStr && MSLevel > 1)
		{
			printf("Warning: mzXML parser encountered a scan with no m/z: File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue;
		}
		
		//  check that this is a good spectrum to output
		if (MSLevel>1 && PeakCount>2)
		{

			// read peaks

			PeakStr = strstr(PrecursorStr, "<peaks");
			if (PeakStr)
			{
				// Get byte order:
				ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
				if (ByteOrderStr)
				{
					ByteOrderStr += 11;
					if (!strncmp(ByteOrderStr, "network", 7))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "big", 3))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "little", 6))
					{
						ByteOrderLittle = 1;
					}
				}
				PeakStr = strstr(PeakStr, ">");
			}
			if (!PeakStr)
			{
				cout << "Warning couldn't find peaks tag for scan: " << ScanNumber << " Buffer pos: " << BufferPos << "  skipping..." << endl;
				scan_start_ptr += 50;
				continue;
				
			}

			PeakStr++;
			PeakBuffer = PeakStr;

			if (PeakBufferSize < PeakCount)
			{
				if (DecodedPeakBuffer)
				{
					char *dbf = DecodedPeakBuffer;
					free(DecodedPeakBuffer);
					DecodedPeakBuffer = NULL;
					free(Peaks);
					Peaks = NULL;
					free(FilteredPeaks);
					FilteredPeaks=NULL;
				}
				PeakBufferSize = (int)(PeakCount*1.5);
				DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
				Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
				FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
			}
			
			Trail = (PeakCount % 3);
			if (!(PeakCount % 3))
			{
				PeakBuffer[PeakCount * 32/3] = '\0';
			}
			else
			{
				PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
			}
	
		//	cout << "dd " << spec_counter << " " << ScanNumber << endl;

			b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
			for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
			{
		#ifdef BYTEORDER_LITTLE_ENDIAN
				if (!ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#else
				if (ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#endif
			}


			// add spectrum
			int i;
			for (i=0; i<FloatIndex; i+=2)
			{
				cout << fixed << setprecision(3) << Peaks[i] << " " << Peaks[i+1] << endl;
			}

			exit(0);
			// join and filter peaks
		}
		else
			scan_start_ptr = ScanStr +50;

	}

	fclose(MZXMLFile);
	cout << spec_counter << " spectra..." << endl << endl;

	return spec_counter;
}




int DAT_Converter::parse_annotated_spectra_from_single_MZXML(
								Config *config, 
								string& mzxml_name, 
								int file_idx,
								map<mzXML_annotation,int>& ann_map)
{
	static char* Buffer = NULL;
    static char* DecodedPeakBuffer = NULL;
    static float* Peaks = NULL;
	static float* FilteredPeaks = NULL;
	static int PeakBufferSize = 0;
    int Trail;
    static char* PrecursorStr;
    int FloatIndex;
    char* ByteOrderStr;
    int ByteOrderLittle = 1;

    int BytesToRead;
    int BufferStartPos = 0;
    int BytesRead;
    int BufferEnd = 0;
    FILE* MZXMLFile;
    int ParseState = 0;
    int FilePos = 0;
    

    // allocate
	if (! Buffer)
		Buffer = (char*)calloc(XML_BUFFER_SIZE + 1, sizeof(char));

    MZXMLFile = fopen(mzxml_name.c_str(), "rb");
    if (!MZXMLFile)
    {
        cout << "Error: Can't open MZXML file " <<  mzxml_name << endl;
        exit(1);
    }

    printf("File idx %d , '%s'...\n",file_idx, mzxml_name.c_str());

	int idx_in_file = 0;
	int spec_counter =0;
	char *scan_start_ptr = NULL;
    while (1)
    {
		char* ScanStr;
		char* ScanNumberStr;
		int ScanNumber;
		char* MSLevelStr;
		int MSLevel;
		char *PrecursorStr;
		mass_t PrecursorMZ;
		char *retentionTimeStr;
		float retentionTime;
		char *precursorIntensityStr;
		float precursorIntensity;
		char* PeakStr;
		char* PeakBuffer;
		int  BufferPos;

        // Read more data, to fill up the buffer:
	
		if ( ! scan_start_ptr || 
			( (Buffer + BufferEnd - scan_start_ptr) < XML_BUFFER_HALF_SIZE) )
		{
			// try shunt half of the buffer
			if (scan_start_ptr)
			{
				if (BufferEnd - XML_BUFFER_HALF_SIZE>0)
				{
					memmove(Buffer, Buffer + XML_BUFFER_HALF_SIZE, BufferEnd - XML_BUFFER_HALF_SIZE);
					BufferEnd -= XML_BUFFER_HALF_SIZE;
					scan_start_ptr -= XML_BUFFER_HALF_SIZE;

//					cout << "MOVED!" << endl;
				}
			}
			else
				scan_start_ptr = Buffer;

			BytesToRead = XML_BUFFER_SIZE - BufferEnd;
			BytesRead = fread(Buffer + BufferEnd, sizeof(char), BytesToRead, MZXMLFile);


			if (BytesRead<5)
				break;

			BufferEnd += BytesRead;
			Buffer[BufferEnd] = '\0';


			FilePos += BytesRead;
		}

        // Look for a new <scan tag opening:
		// this scan cannot be done with strstr since there might be NULL termination
		const char *last_pos = Buffer + BufferEnd - 5;
		char *pos = scan_start_ptr;

		while (++pos<last_pos)
		{
			if (*pos != '<')
				continue;

			if (*(pos+1)=='s' && *(pos+2)=='c' && *(pos+3)=='a' && *(pos+4)=='n')
				break;
		}
		ScanStr =  (pos<last_pos) ? pos : NULL;

        if (ScanStr)
        {
			// if this is the case, read over more data to avoid the case where
			// the spectrum's record is not all in the buffer
			if (scan_start_ptr - Buffer > XML_BUFFER_HALF_SIZE)
			{
				scan_start_ptr = ScanStr-2;
				continue;
			}

            BufferPos = ScanStr - Buffer;
        }
        else
        {
            BufferPos = 0;
        }

        if (!ScanStr )
        {
			scan_start_ptr = Buffer + BufferEnd-5;
            continue;
        }

        ScanNumberStr = strstr(ScanStr, "num=");
        if (!ScanNumberStr)
        {
          //  printf("** Warning: mzXML parser encountered a scan with no scan number!  File %s Pos %d\n", 
		  //		   mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

            ScanNumber = -1;
        }
        else
        {
            ScanNumber = ParseIntFromXML(ScanNumberStr);
			scan_start_ptr = ScanNumberStr;
        }

		retentionTimeStr = strstr(ScanStr,"retentionTime=\"PT");
		if (! retentionTimeStr)
		{
			retentionTime = -1;
		}
		else
		{
			retentionTime = ParseMassFromXML(retentionTimeStr);
		}


		char *PeakCountStr = strstr(ScanStr, "peaksCount=\"");
		if (!PeakCountStr)
		{
			cout << "Warning: bad parsing peaks in mzxml. " << endl;
			cout << "Scan: " << ScanNumber << " Pos: " << FilePos << endl;
			scan_start_ptr += 50;
			continue;
		}
		int PeakCount = ParseIntFromXML(PeakCountStr);
		
        MSLevelStr = strstr(ScanStr, "msLevel=");
        if (!MSLevelStr)
        {
            printf("** Warning: mzXML parser encountered a scan with no scan level!  File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue; 
        }
        else
        {
            MSLevel = ParseIntFromXML(MSLevelStr);
        }

		if (MSLevel<=1)
		{
			scan_start_ptr += 50;
			continue;
		}


		precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
		if (! precursorIntensityStr)
		{
			cout << "Warning: no precursor intensity found for scan " << ScanNumber << " pos: " << BufferPos << endl;
			scan_start_ptr += 50;
			continue;
		}
		else
		{
			precursorIntensity = ParseMassFromXML(precursorIntensityStr);
		}


		PrecursorStr = strstr(ScanStr, "<precursorMz");
		if (PrecursorStr)
		{
			PrecursorStr = strstr(PrecursorStr, ">");
			PrecursorMZ = ParseMassFromXML(PrecursorStr);
		}

		if (!PrecursorStr && MSLevel > 1)
		{
			printf("Warning: mzXML parser encountered a scan with no m/z: File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue;
		}
		

		// check if this spectrum is in the list of annotated spectra
		// if not, don't output it.
		bool is_annotated_spectrum = false;
		map<mzXML_annotation,int>::const_iterator it;
		mzXML_annotation ann_pos;
		ann_pos.mzXML_file_idx = file_idx;
		ann_pos.scan = ScanNumber;

		it = ann_map.find(ann_pos);
		if (it != ann_map.end())
			is_annotated_spectrum = true;

		//  check that this is a good spectrum to output
		if (MSLevel>1 && PeakCount>2 && is_annotated_spectrum)
		{

			// read peaks

			PeakStr = strstr(PrecursorStr, "<peaks");
			if (PeakStr)
			{
				// Get byte order:
				ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
				if (ByteOrderStr)
				{
					ByteOrderStr += 11;
					if (!strncmp(ByteOrderStr, "network", 7))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "big", 3))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "little", 6))
					{
						ByteOrderLittle = 1;
					}
				}
				PeakStr = strstr(PeakStr, ">");
			}
			if (!PeakStr)
			{
				cout << "Warning couldn't find peaks tag for scan: " << ScanNumber << " Buffer pos: " << BufferPos << "  skipping..." << endl;
				scan_start_ptr += 50;
				continue;
			}

			PeakStr++;
			PeakBuffer = PeakStr;

			if (PeakBufferSize < PeakCount)
			{
				if (DecodedPeakBuffer)
				{
					char *dbf = DecodedPeakBuffer;
					free(DecodedPeakBuffer);
					DecodedPeakBuffer = NULL;
					free(Peaks);
					Peaks = NULL;
					free(FilteredPeaks);
					FilteredPeaks=NULL;
				}
				PeakBufferSize = (int)(PeakCount*1.5);
				DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
				Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
				FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
			}
			
			Trail = (PeakCount % 3);
			if (!(PeakCount % 3))
			{
				PeakBuffer[PeakCount * 32/3] = '\0';
			}
			else
			{
				PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
			}

			b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
			for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
			{
		#ifdef BYTEORDER_LITTLE_ENDIAN
				if (!ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#else
				if (ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#endif
			}


			// add spectrum
			int DAT_file_idx =  (int)(PrecursorMZ/mass_increment);
			if (DAT_file_idx > max_dat_file_idx)
				DAT_file_idx = max_dat_file_idx;

			if (! dat_buffs[DAT_file_idx].ind_was_initialized)
			{
				ostringstream os, os_batch;
				os << DAT_file_idx; 
				os_batch << batch;
				string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
				dat_buffs[DAT_file_idx].init(path,dat_buff_size);
			}

			// join and filter peaks
			int new_num_peaks = join_and_filter_peak_list(config,PrecursorMZ,Peaks,
														  PeakCount,FilteredPeaks);
			if (new_num_peaks>3)
			{
				dat_buffs[DAT_file_idx].add_spec_to_DAT_file(PrecursorMZ, 0, file_idx, ScanNumber, retentionTime,  
									 precursorIntensity, new_num_peaks, (char *)FilteredPeaks);
				spec_counter++;
			}
			scan_start_ptr = PeakStr + 8 * PeakCount;
		}
		else
			scan_start_ptr = ScanStr +50;
	}

	fclose(MZXMLFile);
	cout << spec_counter << " spectra..." << endl << endl;

	return spec_counter;
}





/*******************************************************************************
Create DAT files only for annoated spectra
********************************************************************************/
void DAT_Converter::create_dat_files_for_anns(Config *config, char *mzXML_list, char *anns_file,
								char *_out_dir, char *_name)
{
	map<mzXML_annotation,int> ann_map;
	read_mzXML_annotations_to_map(anns_file,ann_map);

	vector<string> list;
	read_paths_into_list(mzXML_list,list);

	init_DAT_Converter((float)2000.0,(float)20.0,524288);

	name	= _name;
	out_dir = _out_dir;
	batch	= 0;

	int num_spectra_parsed=0;
	int f;
	for (f=0; f<list.size(); f++)
	{

		num_spectra_parsed += parse_annotated_spectra_from_single_MZXML(
								config, 
								list[f],
								f,
								ann_map);
	}

	int d;
	for (d=0; d<dat_buffs.size(); d++)
	{
		if (dat_buffs[d].ind_was_initialized && dat_buffs[d].pos > dat_buffs[d].buff)
			dat_buffs[d].flush_buff();
	}
	
	
	cout << "Total spectra extracted and converted to DAT: " << num_spectra_parsed << endl;

	create_list_file();
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -