📄 filemanagement.cpp
字号:
int i;
vector<string> list;
read_paths_into_list(list_file,list);
mass_t min_m_over_z=0;
mass_t max_m_over_z=100000;
num_spectra.resize(20,0);
for (i=0; i<list.size(); i++)
{
if (list[i][0] == '#')
continue;
bool read_file = file_indicators[i];
int last_pos = list[i].length()-1;
if (list[i][last_pos] == '\n' || list[i][last_pos] == '\r' ||
list[i][last_pos] == '\t' || list[i][last_pos] == '\f')
last_pos--;
if (list[i][last_pos-2]=='d' && list[i][last_pos-1]=='t' &&
list[i][last_pos ]=='a')
{
DTA_file dta_file;
dta_file.scan_dta(list[i],config);
if (dta_file.m_over_z>=min_m_over_z && dta_file.m_over_z<max_m_over_z)
dta_files.push_back(dta_file);
}
else if (list[i][last_pos-2]=='m' && list[i][last_pos-1]=='g' &&
list[i][last_pos ]=='f')
{
MGF_file mgf_file;
mgf_file.mgf_name =list[i];
if (read_file)
{
mgf_file.initial_read(config,mgf_files.size(),true);
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<MGF_single> good_singles;
int j;
for (j=0; j<mgf_file.single_spectra.size(); j++)
{
if (mgf_file.single_spectra[j].m_over_z >= min_m_over_z &&
mgf_file.single_spectra[j].m_over_z < max_m_over_z)
{
good_singles.push_back(mgf_file.single_spectra[j]);
}
}
mgf_file.single_spectra = good_singles;
}
else
mgf_file.single_spectra.clear();
mgf_files.push_back(mgf_file);
}
else if (list[i][last_pos-4] == 'm' &&
list[i][last_pos-3] == 'z' &&
list[i][last_pos-2]=='X' && list[i][last_pos-1]=='M' &&
list[i][last_pos ]=='L')
{
MZXML_file mzxml;
mzxml.mzxml_name =list[i];
if (read_file)
{
mzxml.initial_read(config,mzxml_files.size());
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<MZXML_single> good_singles;
int j;
for (j=0; j<mzxml.single_spectra.size(); j++)
{
if (mzxml.single_spectra[j].m_over_z >= min_m_over_z &&
mzxml.single_spectra[j].m_over_z < max_m_over_z)
{
good_singles.push_back(mzxml.single_spectra[j]);
}
}
mzxml.single_spectra = good_singles;
}
else
mzxml.single_spectra.clear();
mzxml_files.push_back(mzxml);
}
else if (list[i][last_pos-2]=='d' && list[i][last_pos-1]=='a' &&
list[i][last_pos ]=='t')
{
DAT_file dat;
dat.dat_name =list[i];
if (read_file)
{
dat.initial_read(config,dat_files.size());
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<DAT_single> good_singles;
int j;
for (j=0; j<dat.single_spectra.size(); j++)
{
if (dat.single_spectra[j].m_over_z >= min_m_over_z &&
dat.single_spectra[j].m_over_z < max_m_over_z)
{
good_singles.push_back(dat.single_spectra[j]);
}
}
dat.single_spectra = good_singles;
}
else
dat.single_spectra.clear();
dat_files.push_back(dat);
}
else if (list[i][last_pos-2]=='m' && list[i][last_pos-1]=='s' &&
list[i][last_pos ]=='2')
{
MS2_file ms2_file;
ms2_file.ms2_name =list[i];
ms2_file.initial_read(config,ms2_files.size(),true);
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<MS2_single> good_singles;
int j;
for (j=0; j<ms2_file.single_spectra.size(); j++)
{
if (ms2_file.single_spectra[j].m_over_z >= min_m_over_z &&
ms2_file.single_spectra[j].m_over_z < max_m_over_z)
{
good_singles.push_back(ms2_file.single_spectra[j]);
}
}
ms2_file.single_spectra = good_singles;
ms2_files.push_back(ms2_file);
}
else
{
cout << "Error: couldn't recognize file type for:: " << list[i] << endl;
exit(1);
}
}
count_num_spectra();
}
// only keeps ssfs of mzXML singles that have an annotation
void FileManager::init_from_list_file(Config *config, const char* list_file,
const vector< vector<int> >& annotation_idxs)
{
int i;
vector<string> list;
read_paths_into_list(list_file,list);
mgf_files.clear();
int total_dat_read=0;
for (i=0; i<list.size(); i++)
{
if (list[i][0] == '#')
continue;
int last_pos = list[i].length()-1;
if (list[i][last_pos-2]=='m' && list[i][last_pos-1]=='g' &&
list[i][last_pos ]=='f')
{
MGF_file mgf;
mgf.mgf_name =list[i];
bool has_anns=false;
const vector<int>& anns = annotation_idxs[i];
const int ann_size = anns.size();
int j;
for (j=0; j<ann_size; j++)
if (anns[j]>=0)
break;
if (j<anns.size())
has_anns = true;
if (has_anns)
{
// cout << "DD2 reading idx " << mgf_files.size() << endl;
mgf.initial_read(config,mgf_files.size(),true);
cout << i << " " << mgf.mgf_name << " ";
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<MGF_single> good_singles;
int j;
for (j=0; j<mgf.single_spectra.size(); j++)
{
int mgf_file_idx = mgf.single_spectra[j].file_idx;
int scan_number = mgf.single_spectra[j].scan_number;
// cout << mgf_file_idx <<" " << scan_number << endl;
if (mgf_file_idx < annotation_idxs.size() &&
scan_number < annotation_idxs[mgf_file_idx].size() &&
annotation_idxs[mgf_file_idx][scan_number]>=0)
{
mgf.single_spectra[j].ann_idx=annotation_idxs[mgf_file_idx][scan_number];
good_singles.push_back(mgf.single_spectra[j]);
}
}
cout << good_singles.size() << " ..." << endl;
mgf.single_spectra = good_singles;
}
mgf_files.push_back(mgf);
}
else if (list[i][last_pos-4] == 'm' &&
list[i][last_pos-3] == 'z' &&
list[i][last_pos-2]=='X' && list[i][last_pos-1]=='M' &&
list[i][last_pos ]=='L')
{
MZXML_file mzxml;
mzxml.mzxml_name =list[i];
bool has_anns=false;
const vector<int>& anns = annotation_idxs[i];
const int ann_size = anns.size();
int j;
for (j=0; j<ann_size; j++)
if (anns[j]>=0)
break;
if (j<anns.size())
has_anns = true;
if (has_anns)
{
mzxml.initial_read(config,i);
vector<MZXML_single> good_singles;
int j;
for (j=0; j<mzxml.single_spectra.size(); j++)
{
int scan_num = mzxml.single_spectra[j].scan_number;
if (scan_num < ann_size && annotation_idxs[i][scan_num]>=0)
{
int ann_val = annotation_idxs[i][scan_num];
mzxml.single_spectra[j].ann_idx=annotation_idxs[i][scan_num];
good_singles.push_back(mzxml.single_spectra[j]);
}
}
mzxml.single_spectra = good_singles;
}
mzxml_files.push_back(mzxml);
}
else if (list[i][last_pos-2]=='d' && list[i][last_pos-1]=='a' &&
list[i][last_pos ]=='t')
{
DAT_file dat;
dat.dat_name =list[i];
dat.initial_read(config,dat_files.size());
cout << dat.dat_name << " ";
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<DAT_single> good_singles;
int j;
for (j=0; j<dat.single_spectra.size(); j++)
{
int mzxml_file_idx = dat.single_spectra[j].mzxml_file_idx;
int scan_number = dat.single_spectra[j].scan_number;
if (mzxml_file_idx < annotation_idxs.size() &&
scan_number < annotation_idxs[mzxml_file_idx].size() &&
annotation_idxs[mzxml_file_idx][scan_number]>=0)
{
dat.single_spectra[j].ann_idx = annotation_idxs[mzxml_file_idx][scan_number];
good_singles.push_back(dat.single_spectra[j]);
}
}
cout << good_singles.size() << " ..." << endl;
total_dat_read += good_singles.size();
dat.single_spectra = good_singles;
dat_files.push_back(dat);
}
else
{
cout << "Error: couldn't recognize file type for: " << list[i] << endl;
exit(1);
}
}
count_num_spectra();
if (total_dat_read>0)
{
cout << "Read " << total_dat_read << " DAT spectra" << endl;
}
}
// adds the annotations to the ssfs
void FileManager::init_from_list_file_and_add_annotations(Config *config, const char* list_file,
const vector< vector<int> >& annotation_idxs, vector<mzXML_annotation>& annotations,
bool read_only_annotated )
{
int i;
vector<string> list;
read_paths_into_list(list_file,list);
mgf_files.clear();
int total_dat_read=0;
for (i=0; i<list.size(); i++)
{
if (list[i][0] == '#')
continue;
int last_pos = list[i].length()-1;
if (list[i][last_pos-2]=='m' && list[i][last_pos-1]=='g' &&
list[i][last_pos ]=='f')
{
MGF_file mgf;
mgf.mgf_name =list[i];
// cout << "DD3 reading mgf idx : " << mgf_files.size() << endl;
mgf.initial_read(config,mgf_files.size(),true);
cout << mgf.mgf_name << " ";
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<MGF_single> good_singles;
int j;
for (j=0; j<mgf.single_spectra.size(); j++)
{
int mgf_file_idx = mgf.single_spectra[j].file_idx;
int scan_number = mgf.single_spectra[j].idx_in_file;
// cout << mgf_file_idx <<" " << scan_number << endl;
if (mgf_file_idx < annotation_idxs.size() &&
scan_number < annotation_idxs[mgf_file_idx].size() &&
annotation_idxs[mgf_file_idx][scan_number]>=0)
{
mgf.single_spectra[j].peptide.parse_from_string(config,
annotations[annotation_idxs[mgf_file_idx][scan_number]].pep);
mgf.single_spectra[j].charge = annotations[annotation_idxs[mgf_file_idx][scan_number]].charge;
good_singles.push_back(mgf.single_spectra[j]);
}
else
if (! read_only_annotated)
good_singles.push_back(mgf.single_spectra[j]);
}
cout << good_singles.size() << " ..." << endl;
mgf.single_spectra = good_singles;
mgf_files.push_back(mgf);
}
else if (list[i][last_pos-4] == 'm' &&
list[i][last_pos-3] == 'z' &&
list[i][last_pos-2]=='X' && list[i][last_pos-1]=='M' &&
list[i][last_pos ]=='L')
{
MZXML_file mzxml;
mzxml.mzxml_name =list[i];
cout << i << " " << list[i] << endl;
if (i>= annotation_idxs.size() && ! read_only_annotated)
{
mzxml_files.push_back(mzxml);
continue;
}
bool has_anns=false;
const vector<int>& anns = annotation_idxs[i];
const int ann_size = anns.size();
int j;
for (j=0; j<ann_size; j++)
if (anns[j]>=0)
break;
if (j<anns.size())
has_anns = true;
if (has_anns)
{
mzxml.initial_read(config,i);
vector<MZXML_single> good_singles;
int j;
for (j=0; j<mzxml.single_spectra.size(); j++)
{
int scan_num = mzxml.single_spectra[j].scan_number;
if (scan_num < ann_size && annotation_idxs[i][scan_num]>=0)
{
int ann_val = annotation_idxs[i][scan_num];
mzxml.single_spectra[j].peptide.parse_from_string(config,
annotations[ann_val].pep);
mzxml.single_spectra[j].charge = annotations[ann_val].charge;
good_singles.push_back(mzxml.single_spectra[j]);
}
else
if (! read_only_annotated)
good_singles.push_back(mzxml.single_spectra[j]);
}
mzxml.single_spectra = good_singles;
}
mzxml_files.push_back(mzxml);
}
else if (list[i][last_pos-2]=='d' && list[i][last_pos-1]=='a' &&
list[i][last_pos ]=='t')
{
DAT_file dat;
dat.dat_name =list[i];
dat.initial_read(config,dat_files.size());
cout << dat.dat_name << " ";
// change the single spectrum pointers in the mgf file record
// to include only those that have a mass that is in the permitted range
vector<DAT_single> good_singles;
int j;
for (j=0; j<dat.single_spectra.size(); j++)
{
int mzxml_file_idx = dat.single_spectra[j].mzxml_file_idx;
int scan_number = dat.single_spectra[j].scan_number;
if (mzxml_file_idx < annotation_idxs.size() &&
scan_number < annotation_idxs[mzxml_file_idx].size() &&
annotation_idxs[mzxml_file_idx][scan_number]>=0)
{
int ann_val = annotation_idxs[mzxml_file_idx][scan_number];
dat.single_spectra[j].peptide.parse_from_string(config,
annotations[ann_val].pep);
good_singles.push_back(dat.single_spectra[j]);
}
else
if (! read_only_annotated)
good_singles.push_back(dat.single_spectra[j]);
}
cout << good_singles.size() << " ..." << endl;
total_dat_read += good_singles.size();
dat.single_spectra = good_singles;
dat_files.push_back(dat);
}
else
{
cout << "Error: couldn't recognize file type for: " << list[i] << endl;
exit(1);
}
}
count_num_spectra();
if (total_dat_read>0)
{
cout << "Read " << total_dat_read << " DAT spectra" << endl;
}
}
void DTA_file::initial_read(Config *config)
{
if (single_name.length() == 0)
{
cout << "Error: must first copy name to DTA_file!" << endl;
exit(1);
}
Spectrum s;
s.read_from_dta(config,single_name.c_str());
s.init_spectrum();
this->type = DTA;
this->single_name = single_name;
this->peptide = s.get_peptide();
this->charge = s.get_charge();
this->org_pm_with_19 = s.get_org_pm_with_19();
this->pm_with_19 = s.get_corrected_pm_with_19();
this->num_peaks = s.get_num_peaks();
this->m_over_z = s.get_m_over_z();
}
void MGF_file::initial_read(Config *config, int file_idx, bool quick_flag)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -