⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 asciiimport.cpp

📁 非常著名的曲线拟合程序
💻 CPP
字号:
// This is -*- C++ -*-// $Id: AsciiImport.cpp,v 1.8 1999/05/17 23:52:07 hp Exp $/*  * AsciiImport.cpp * * Copyright (C) 1998 Asger Alstrup Nielsen * * Developed by Asger Alstrup Nielsen <alstrup@diku.dk> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */#include <config.h>#include "AsciiImport.h"#include "RealSet.h"#include "StringSet.h"void AsciiImport::autosetup(int learn_lines) {  warning_controller().sleep();  if (heading_lines() == -1) {    autosetup_heading_lines();  }  // Now, skip the headings while keeping track of the maximum width  ascii_reader().restart(true);  paperwidth = 0;  for (int i=0; i < heading_lines(); ++i) {    size_t l = string_tool.expand_tabs(ascii_reader().readline(), 				       tab_size()).length();    if (l == 0) return;    if (l > paperwidth) paperwidth = l;  }  // Then collect the learning material  learning_lines.clear();  for (int i=0; i < learn_lines; ++i) {    string line = ascii_reader().readline();    if (line.empty()) break;    size_t width = string_tool.expand_tabs(line, tab_size()).length();    if (width > paperwidth) paperwidth = width;    learning_lines.push_back(line);  }  // Now, determine and setup the kind of column divider we have  if (column_divider_ == 0) {    autosetup_column_divider();  } else {    column_divider().autosetup(learning_lines, paperwidth);  }  // Set up the data converters  autosetup_dataconverters();  // And finally, set up the data sets  autosetup_datasets();  warning_controller().wake_up();}// This imports the datavoid AsciiImport::import(int lines) {  ascii_reader().restart(lines != -1);  // Reset performance results  missing_ = 0;  errors_ = 0;  correct_ = 0;  // First, we create the empty headings  headings().clear();  while (headings().size() <= column_divider().columns()) {    headings().push_back(string());  }  // Then we read the headings  for (int i=0; i < heading_lines(); ++i) {    string line = ascii_reader().readline();    if (line.empty())      break;    string_tools::strings cols = column_divider().split(line);    string superflous = enforceSize(cols);    for (size_t j=0; j < cols.size(); ++j) {      headings()[j] = string_tool.concatenate_words(headings()[j], cols[j]);      // If there was superflous text, add it into the heading      if (!superflous.empty() && j == cols.size() - 1) {	headings()[j] = string_tool.concatenate_words(headings()[j], superflous);      }    }  }  for (unsigned int i=0; i < column_divider().columns(); ++i) {    datasets()[i]->set_label(headings()[i]);  }  if (progress_ != 0)    {      progress_->start();      progress_->set_operation("Reading file...");    }  // And then the data  int l = 0;  while (l < lines || lines == -1) {    if (progress_ != 0)      {        if (l % 100 == 0) // should really be based on time, but.          {            if (progress_->cancelled())              break; // stop the loop            if (lines == -1)              {                progress_->pulse();              }            else              {                progress_->set_percent(float(l)/float(lines));              }          }      }    // Get a row    string line = ascii_reader().readline();    if (line.empty())      break;    ++l;    // Split it up    string_tools::strings cols = column_divider().split(line);    string superflous = enforceSize(cols);    if (!superflous.empty()) {      ++errors_;      warning_controller().warn(string("Ignoring superflous text '") + superflous + string("'"));    }    // And feed it into the container    for (size_t i=0; i < cols.size(); ++i) {      string canonical_value;      if (cols[i].empty()) {	++missing_;      } else {	string superflous;	canonical_value = dataconverters()[i]->to_canonical(cols[i], superflous);	if (superflous == cols[i]) {	  ++errors_;	  warning_controller().warn(string("Error parsing string '") + cols[i] + string("'"));	} else if (!superflous.empty()) {	  warning_controller().warn(string("Superflous chars '") + superflous + 				    string("' parsing string '") + cols[i] + string("'"));	  // We count it as a half correct	  ++correct_;	} else {	  // A correct gets two points	  ++correct_;	  ++correct_;	}      }      if (canonical_value.empty() && !datasets()[i]->support_missing()) {	// If we didn't get any value, and do not support missing data,	// and give a warning to this effetc	warning_controller().warn(string("Using default value in row ") 				  + string_tool.int2str(datasets()							[i]->size() + 1) 				  + string(" column ")				  + string_tool.int2str(i)				  + string(" because of missing data."));      }      datasets()[i]->push_back(canonical_value);    }  }  if (progress_ != 0)    progress_->stop();}string AsciiImport::enforceSize(string_tools::strings & result) {  string msg;  size_t size = column_divider().columns();  // Is the size correct?  if (result.size() != size) {    string empty;    while (result.size() < size) {      result.push_back(empty);    }    while (result.size() > size) {      string last=result.back();      result.pop_back();      if (!msg.empty()) {	msg = string("' '") + msg;      }      msg = last + msg;    }  }  return msg;}// Try to determine number of heading linesvoid AsciiImport::autosetup_heading_lines() {  // Algorithm:  When the number of numbers in a line  // is higher than the number of letters, we have data.  // The characters ',', '.', 'e', 'E', tab and space  // are judged ambigious, and thus not included in either count.  ascii_reader().restart(true);  set_heading_lines(0);  for (int i=0; i < 10; ++i) {    string line = ascii_reader().readline();    if (line.empty())       return;    size_t count_of_number_chars = 0;    size_t count_of_heading_chars = 0;    for (size_t j = 0; j < line.length(); ++j) {      char buf[2]; buf[0] = line[j]; buf[1] = 0;      if (string_tool.is_number_char(buf))	++count_of_number_chars;      else if (!string_tool.ambigious_number_char(buf)) 	++count_of_heading_chars;    }    if (count_of_number_chars > count_of_heading_chars) {      set_heading_lines(i);      return;    }  }  warning_controller().warn("Confused about headings. Assuming there are no headings.");  set_heading_lines(0);}// Try to determine the best column divider setupvoid AsciiImport::autosetup_column_divider(){  // If we have one, get rid of it  if (column_divider_ != 0) {    delete column_divider_;    column_divider_ = 0;  }  // We have to decide which kind of divider we want  // Algorithm:  Try all available kinds, and then  // choose the one with the best performance  DataConverters original_dataconverters = dataconverters();  Datasets original_datasets = datasets();  FixedColumns * fixed = new FixedColumns(tab_size());  fixed->autosetup(learning_lines, paperwidth);  column_divider_ = fixed;  autosetup_dataconverters();  autosetup_datasets();  import(learning_lines.size());  int score = correct() - errors();  dataconverters_ = original_dataconverters;  datasets_ = original_datasets;    DelimitDivider * delimit = new DelimitDivider();  delimit->autosetup(learning_lines, paperwidth);  column_divider_ = delimit;  autosetup_dataconverters();  autosetup_datasets();  import(learning_lines.size());  dataconverters_ = original_dataconverters;  datasets_ = original_datasets;  if (score >= int(correct() - errors())) {    column_divider_ = fixed;    delete delimit;  } else {    column_divider_ = delimit;    delete fixed;  }}// Try to setup the best data converters for each columnvoid AsciiImport::autosetup_dataconverters() {  for (size_t i=dataconverters().size(); i < column_divider().columns(); ++i) {    vector<string> training_column;    for (string_tools::strings::const_iterator j = learning_lines.begin();          j != learning_lines.end(); ++j) {      string line = *j;      string_tools::strings cols = column_divider().split(line);      string superflous = enforceSize(cols);      training_column.push_back(cols[i]);    }    // Now try each DataConvert until we find the best    RealConverter realconverter;    realconverter.autosetup(training_column);    double score = 0.0;    for (vector<string>::const_iterator i= training_column.begin();         i != training_column.end(); ++i) {      score += realconverter.identify(*i);    }//    warning_controller().warn("Score for column " + string_tool.int2str(i) + " is: " + //                            string_tool.int2str(int(score * 100.0)));    score = score / training_column.size();    if (score >= 0.80) {      RealConverter * type = new RealConverter;      type->autosetup(training_column);      dataconverters().push_back(type);      RealSet * data = new RealSet;      datasets().push_back(data);    } else {      StringConverter * type = new StringConverter;      type->autosetup(training_column);      dataconverters().push_back(type);      StringSet * data = new StringSet;      datasets().push_back(data);    }  }}void AsciiImport::autosetup_datasets() {}void AsciiImport::delete_dataconverters(DataConverters& dc){  DataConverters::iterator i = dc.begin();  while (i != dc.end())    {      delete *i;      ++i;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -