⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qcdat.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
//					cout << "MOVED!" << endl;
				}
			}
			else
				scan_start_ptr = Buffer;

			BytesToRead = XML_BUFFER_SIZE - BufferEnd;
			BytesRead = fread(Buffer + BufferEnd, sizeof(char), BytesToRead, MZXMLFile);


			if (BytesRead<5)
				break;

			BufferEnd += BytesRead;
			Buffer[BufferEnd] = '\0';


			FilePos += BytesRead;
		}

        // Look for a new <scan tag opening:
		// this scan cannot be done with strstr since there might be NULL termination
		const char *last_pos = Buffer + BufferEnd - 5;
		char *pos = scan_start_ptr;

		while (++pos<last_pos)
		{
			if (*pos != '<')
				continue;

			if (*(pos+1)=='s' && *(pos+2)=='c' && *(pos+3)=='a' && *(pos+4)=='n')
				break;
		}
		ScanStr =  (pos<last_pos) ? pos : NULL;

        if (ScanStr)
        {
			// if this is the case, read over more data to avoid the case where
			// the spectrum's record is not all in the buffer
			if (scan_start_ptr - Buffer > XML_BUFFER_HALF_SIZE)
			{
				scan_start_ptr = ScanStr-2;
				continue;
			}

            BufferPos = ScanStr - Buffer;
        }
        else
        {
            BufferPos = 0;
        }

        if (!ScanStr )
        {
			scan_start_ptr = Buffer + BufferEnd-5;
            continue;
        }

        ScanNumberStr = strstr(ScanStr, "num=");
        if (!ScanNumberStr)
        {
          //  printf("** Warning: mzXML parser encountered a scan with no scan number!  File %s Pos %d\n", 
		  //		   mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

            ScanNumber = -1;
        }
        else
        {
            ScanNumber = ParseIntFromXML(ScanNumberStr);
			scan_start_ptr = ScanNumberStr;
        }

		retentionTimeStr = strstr(ScanStr,"retentionTime=\"PT");
		if (! retentionTimeStr)
		{
			retentionTime = -1;
		}
		else
		{
			retentionTime = ParseMassFromXML(retentionTimeStr);
		}


		char *PeakCountStr = strstr(ScanStr, "peaksCount=\"");
		if (!PeakCountStr)
		{
			cout << "Warning: bad parsing peaks in mzxml. " << endl;
			cout << "Scan: " << ScanNumber << " Pos: " << FilePos << endl;
			scan_start_ptr += 50;
			continue;
		}
		int PeakCount = ParseIntFromXML(PeakCountStr);
		
        MSLevelStr = strstr(ScanStr, "msLevel=");
        if (!MSLevelStr)
        {
            printf("** Warning: mzXML parser encountered a scan with no scan level!  File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue; 
        }
        else
        {
            MSLevel = ParseIntFromXML(MSLevelStr);
        }

		if (MSLevel<=1)
		{
			scan_start_ptr += 50;
			continue;
		}


		precursorIntensityStr = strstr(ScanStr,"precursorIntensity=");
		if (! precursorIntensityStr)
		{
			cout << "Warning: no precursor intensity found for scan " << ScanNumber << " pos: " << BufferPos << endl;
			scan_start_ptr += 50;
			continue;
		}
		else
		{
			precursorIntensity = ParseMassFromXML(precursorIntensityStr);
		}


		PrecursorStr = strstr(ScanStr, "<precursorMz");
		if (PrecursorStr)
		{
			PrecursorStr = strstr(PrecursorStr, ">");
			PrecursorMZ = ParseMassFromXML(PrecursorStr);
		}

		if (!PrecursorStr && MSLevel > 1)
		{
			printf("Warning: mzXML parser encountered a scan with no m/z: File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue;
		}
		
		//  check that this is a good spectrum to output
		if (MSLevel>1 && PeakCount>2 && PrecursorMZ>= min_m_over_z && PrecursorMZ <= max_m_over_z)
		{

			// read peaks

			PeakStr = strstr(PrecursorStr, "<peaks");
			if (PeakStr)
			{
				// Get byte order:
				ByteOrderStr = strstr(PeakStr, "byteOrder=\"");
				if (ByteOrderStr)
				{
					ByteOrderStr += 11;
					if (!strncmp(ByteOrderStr, "network", 7))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "big", 3))
					{
						ByteOrderLittle = 0;
					}
					if (!strncmp(ByteOrderStr, "little", 6))
					{
						ByteOrderLittle = 1;
					}
				}
				PeakStr = strstr(PeakStr, ">");
			}
			if (!PeakStr)
			{
				cout << "Warning couldn't find peaks tag for scan: " << ScanNumber << " Buffer pos: " << BufferPos << "  skipping..." << endl;
				scan_start_ptr += 50;
				continue;
			}

			PeakStr++;
			PeakBuffer = PeakStr;

			if (PeakBufferSize < PeakCount)
			{
				if (DecodedPeakBuffer)
				{
					char *dbf = DecodedPeakBuffer;
					free(DecodedPeakBuffer);
					DecodedPeakBuffer = NULL;
					free(Peaks);
					Peaks = NULL;
					free(FilteredPeaks);
					FilteredPeaks=NULL;
				}
				PeakBufferSize = (int)(PeakCount*1.5);
				DecodedPeakBuffer = (char*)calloc(PeakBufferSize * 8 + 8, 1);
				Peaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
				FilteredPeaks = (float*)calloc(PeakBufferSize * 2, sizeof(float));
			}
			
			Trail = (PeakCount % 3);
			if (!(PeakCount % 3))
			{
				PeakBuffer[PeakCount * 32/3] = '\0';
			}
			else
			{
				PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
			}

			b64_decode_mio( DecodedPeakBuffer, PeakBuffer);
			for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
			{
		#ifdef BYTEORDER_LITTLE_ENDIAN
				if (!ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#else
				if (ByteOrderLittle)
				{
					char ByteSwap = DecodedPeakBuffer[FloatIndex*4];
					DecodedPeakBuffer[FloatIndex*4] = DecodedPeakBuffer[FloatIndex*4 + 3];
					DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
					ByteSwap = DecodedPeakBuffer[FloatIndex*4 + 1];
					DecodedPeakBuffer[FloatIndex*4 + 1] = DecodedPeakBuffer[FloatIndex*4 + 2];
					DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
				}
				memcpy(Peaks + FloatIndex, DecodedPeakBuffer + FloatIndex * 4, 4);
		#endif
			}


			// add spectrum
			int DAT_file_idx =  (int)(PrecursorMZ/mass_increment);
			if (DAT_file_idx > max_dat_file_idx)
				DAT_file_idx = max_dat_file_idx;

			if (! dat_buffs[DAT_file_idx].ind_was_initialized)
			{
				ostringstream os, os_batch;
				os << DAT_file_idx;
				os_batch << batch;
				string path = out_dir + "/" + name + "_" + os_batch.str() + "_" + os.str() + ".dat";
				dat_buffs[DAT_file_idx].init(path,dat_buff_size);
			}

			// join and filter peaks
			int new_num_peaks = join_and_filter_peak_list(config,PrecursorMZ,Peaks,
														  PeakCount,FilteredPeaks);
			if (new_num_peaks>3)
			{
				dat_buffs[DAT_file_idx].add_spec_to_DAT_file(PrecursorMZ, 0, file_idx, ScanNumber, retentionTime,  
									 precursorIntensity, new_num_peaks, (char *)FilteredPeaks);
				spec_counter++;
			}
			scan_start_ptr = PeakStr + 8 * PeakCount;
		}
		else
			scan_start_ptr = ScanStr +50;
	}

	fclose(MZXMLFile);
	cout << spec_counter << " spectra..." << endl << endl;

	return spec_counter;
}


void create_annotated_mgf_from_dat(Config *config, 
                                   char *dat_list,
                                   char *mzxml_list,
                                   char *anns_file,
                                   char *output_file)
{
        vector<string> list, mzxml_names;

        read_paths_into_list(dat_list,list);

        vector< vector<int> >    annotation_idxs;
        vector<mzXML_annotation> annotations;

        read_mzXML_annotations(mzxml_list,anns_file, annotation_idxs, annotations);

        FileManager fm;
        fm.init_from_list_file_and_add_annotations(config,dat_list,annotation_idxs,annotations,true);

        FileSet fs;
        fs.select_all_files(fm);
        const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();


        BasicSpecReader bsr;
        int i;
        ofstream mgf_stream(output_file);

        QCPeak peaks[5000];
        for (i=0; i<all_ssf.size(); i++)
        {
                int num_spec_peaks = bsr.read_basic_spec(config,fm,all_ssf[i],peaks);
                BasicSpectrum bs;
                bs.num_peaks = num_spec_peaks;
                bs.peaks = peaks;
                bs.ssf = all_ssf[i];
                bs.ssf->charge = 2;


                DAT_single *dat_single = (DAT_single *)all_ssf[i];
                char single_name[256];
                sprintf(single_name,"spec_%d_%d",dat_single->mzxml_file_idx,dat_single->scan_number);

                bs.ssf->single_name = single_name;

                bs.output_to_mgf(mgf_stream,config);
        }

        mgf_stream.close();
}


/*
void create_annotated_mgf_from_mgf(Config *config, 
								   char *mgf_list,
								   char *anns_file,
								   char *output_file)
{
	vector<string> list;

	cout << "MGF: " << mgf_list << endl;
	cout << "ANN: " << anns_file << endl;
	cout << "OUT: " << output_file << endl;

	read_paths_into_list(mgf_list,list);

	cout << "working on " << list.size() << " paths" << endl;

	vector< vector<int> >    annotation_idxs;
	vector<mzXML_annotation> annotations;

	read_mzXML_annotations_limited(mgf_list,anns_file, annotation_idxs, annotations);

	cout << "Annotations: " << annotations.size() << endl;

	FileManager fm;
	fm.init_from_list_file_and_add_annotations(config,mgf_list,annotation_idxs,annotations,true);

	FileSet fs;
	fs.select_all_files(fm);
	const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();


	BasicSpecReader bsr;
	int i;
	ofstream mgf_stream(output_file);

	QCPeak peaks[5000];
	for (i=0; i<all_ssf.size(); i++)
	{
		int num_spec_peaks = bsr.read_basic_spec(config,fm,all_ssf[i],peaks,false,true);
		BasicSpectrum bs;
		bs.num_peaks = num_spec_peaks;
		bs.peaks = peaks;
		bs.ssf = all_ssf[i];
		bs.ssf->charge = 2;


		MGF_single *mgf_single = (MGF_single *)all_ssf[i];
		char single_name[256];
		sprintf(single_name,"spec_%d_%d",mgf_single->file_idx,mgf_single->idx_in_file);

		bs.ssf->single_name = single_name;

		bs.output_to_mgf(mgf_stream,config);
	}

	mgf_stream.close();
}
*/




int parse_single_MZXML_file_print_peaks(Config *config, 
										   string& mzxml_name, 
										   int the_scan)
										   
{
	static char* Buffer = NULL;
    static char* DecodedPeakBuffer = NULL;
    static float* Peaks = NULL;
	static float* FilteredPeaks = NULL;
	static int PeakBufferSize = 0;
    int Trail;
    static char* PrecursorStr;
    int FloatIndex;
    char* ByteOrderStr;
    int ByteOrderLittle = 1;

    int BytesToRead;
    int BufferStartPos = 0;
    int BytesRead;
    int BufferEnd = 0;
    FILE* MZXMLFile;
    int ParseState = 0;
    int FilePos = 0;
    

    // allocate
	if (! Buffer)
		Buffer = (char*)calloc(XML_BUFFER_SIZE + 1, sizeof(char));

    MZXMLFile = fopen(mzxml_name.c_str(), "rb");
    if (!MZXMLFile)
    {
        cout << "Error: Can't open MZXML file " <<  mzxml_name << endl;
        exit(1);
    }

  
	int idx_in_file = 0;
	int spec_counter =0;
	char *scan_start_ptr = NULL;
    while (1)
    {
		char* ScanStr;
		char* ScanNumberStr;
		int ScanNumber;
		char* MSLevelStr;
		int MSLevel;
		char *PrecursorStr;
		mass_t PrecursorMZ;
		char *retentionTimeStr;
		float retentionTime;
		char *precursorIntensityStr;
		float precursorIntensity;
		char* PeakStr;
		char* PeakBuffer;
		int  BufferPos;

        // Read more data, to fill up the buffer:
	
		if ( ! scan_start_ptr || 
			( (Buffer + BufferEnd - scan_start_ptr) < XML_BUFFER_HALF_SIZE) )
		{
			// try shunt half of the buffer
			if (scan_start_ptr)
			{
				if (BufferEnd - XML_BUFFER_HALF_SIZE>0)
				{
					memmove(Buffer, Buffer + XML_BUFFER_HALF_SIZE, BufferEnd - XML_BUFFER_HALF_SIZE);
					BufferEnd -= XML_BUFFER_HALF_SIZE;
					scan_start_ptr -= XML_BUFFER_HALF_SIZE;

//					cout << "MOVED!" << endl;
				}
			}
			else
				scan_start_ptr = Buffer;

			BytesToRead = XML_BUFFER_SIZE - BufferEnd;
			BytesRead = fread(Buffer + BufferEnd, sizeof(char), BytesToRead, MZXMLFile);


			if (BytesRead<5)
				break;

			BufferEnd += BytesRead;
			Buffer[BufferEnd] = '\0';

			
			// remove all '\0' from buffer (these cause parsing errors.
			// replace them with ' '
		//	const char *end_ptr = Buffer + BufferEnd - 2;
		//	for (char *ptr=Buffer; ptr<end_ptr; ptr++)
		//		if (*ptr == '\0')
		//			*ptr = ' ';
			
			FilePos += BytesRead;
		}

        // Look for a new <scan tag opening:
		// this scan cannot be done with strstr since there might be NULL termination
		const char *last_pos = Buffer + BufferEnd - 5;
		char *pos = scan_start_ptr;

		while (++pos<last_pos)
		{
			if (*pos != '<')
				continue;

			if (*(pos+1)=='s' && *(pos+2)=='c' && *(pos+3)=='a' && *(pos+4)=='n')
				break;
		}
		ScanStr =  (pos<last_pos) ? pos : NULL;

        if (ScanStr)
        {
			// if this is the case, read over more data to avoid the case where
			// the spectrum's record is not all in the buffer
			if (scan_start_ptr - Buffer > XML_BUFFER_HALF_SIZE)
			{
				scan_start_ptr = ScanStr-2;
				continue;
			}

            BufferPos = ScanStr - Buffer;
        }
        else
        {
            BufferPos = 0;
        }

        if (!ScanStr )
        {
			scan_start_ptr = Buffer + BufferEnd-5;
            continue;
        }

        ScanNumberStr = strstr(ScanStr, "num=");
        if (!ScanNumberStr)
        {
          //  printf("** Warning: mzXML parser encountered a scan with no scan number!  File %s Pos %d\n", 
		  //		   mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

            ScanNumber = -1;
        }
        else
        {
            ScanNumber = ParseIntFromXML(ScanNumberStr);
			scan_start_ptr = ScanNumberStr;
        }

		retentionTimeStr = strstr(ScanStr,"retentionTime=\"PT");
		if (! retentionTimeStr)
		{
		//	printf("Error: mzXML parser encountered a scan with no retnetion time: File %s Pos %d\n", 
		//		mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);
		//	cout <<"SCANSTR:"<<endl << ScanStr << endl;
         //   exit(1);
			retentionTime = -1;
		}
		else
		{
			retentionTime = ParseMassFromXML(retentionTimeStr);
		}


		char *PeakCountStr = strstr(ScanStr, "peaksCount=\"");
		if (!PeakCountStr)
		{
			cout << "Warning: bad parsing peaks in mzxml. " << endl;
			cout << "Scan: " << ScanNumber << " Pos: " << FilePos << endl;
			scan_start_ptr += 50;
			continue;
		//	exit(1);
		}
		int PeakCount = ParseIntFromXML(PeakCountStr);
		
        MSLevelStr = strstr(ScanStr, "msLevel=");
        if (!MSLevelStr)
        {
            printf("** Warning: mzXML parser encountered a scan with no scan level!  File %s Pos %d\n", 
				mzxml_name.c_str(), FilePos + BufferPos - BufferEnd);

			scan_start_ptr += 50;
			continue;
            
        }
        else
        {
            MSLevel = ParseIntFromXML(MSLevelStr);
        }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -