⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 columndivider.cpp

📁 非常著名的曲线拟合程序
💻 CPP
字号:
// This is -*- C++ -*-// $Id: ColumnDivider.cpp,v 1.2 1999/03/16 18:27:59 alstrup Exp $/*  * ColumnDivider.cpp * * Copyright (C) 1998 Asger Alstrup Nielsen * * Developed by Asger Alstrup Nielsen <alstrup@diku.dk> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */#include <config.h>#include "ColumnDivider.h"#include <algorithm>// #include <iostream>void FixedColumns::autosetup(string_tools::strings const & lines, size_t paperwidth) {   spans_.clear();  // If we don't get any learning material, we define the  // column to be the entire line  if (lines.empty()) {    Span s(0, paperwidth);    spans_.push_back(s);    set_columns(1);    return;  }  learn = lines;  // Expand tabs  for (string_tools::strings::iterator i = learn.begin(); i < learn.end(); ++i) {    *i = string_tool.expand_tabs(*i, tab_size());  }  // How much non-whitespace is allowed in an empty column?  size_t threshold = size_t(learn.size() * 0.05);  if (learn.size() >= 10) {    threshold += 1;    // We cut some slack  }  // TODO: Refine this algorithm by first collecting the numbers  // for each column.  Then iterate over the numbers to filter  // out too small columns.  Then iterate to find the break-points  // in the middle of empty spans if no bias exist.  // When we have bias, we want to be more clever:  // We want to break [4 4 4 0 0 1 4 4] as [4 4 4] [0 0 1 4 4].  // We want to break [4 4 1 0 0 4 4 4] as [4 4 1 0 0] [4 4 4].  bool lastempty = true;  size_t colstart = 0;  bool open = false;  for (size_t i=0; i <= paperwidth; ++i) {    bool empty = countColumn(i, threshold) <= threshold;    if (!empty)      open = true;    else if (!lastempty) {      // We demand that a column is at least two characters wide      if ((i-1) - colstart > 1) {	Span s(colstart, i-1);	spans_.push_back(s);	colstart = i;	open = false;      }    }    lastempty = empty;  }  if (open) {    Span s(colstart, paperwidth);    spans_.push_back(s);  } else {    spans_.back().second = paperwidth;  }  set_columns(spans_.size());#if 0  for (Spans::const_iterator i= spans_.begin(); i != spans_.end(); i++) {    cout << "[" << i->first << ", " << i->second << "] ";  }  cout << endl;#endif}// Counts how many non-white-space characters we have in a particular column// If the count surpassed a threshold, we return the thresholdsize_t FixedColumns::countColumn(size_t col, size_t threshold) {  size_t not_whitespace = 0;  for (string_tools::strings::iterator j = learn.begin(); j < learn.end(); ++j) {    if (col < (*j).length() && (*j)[col] != ' ') {      ++not_whitespace;      if (not_whitespace > threshold)	return not_whitespace;    }  }  return not_whitespace;}// Split a line into stringsstring_tools::strings FixedColumns::split(string const & line) {  string l = string_tool.expand_tabs(line, tab_size());  string_tools::strings result;  for (Spans::const_iterator s = spans().begin(); s < spans().end(); ++s) {    size_t from = (*s).first;    size_t to = (*s).second;    string col;    if (from < l.length()) {      if (to > l.length())	to = l.length() - 1;      col = string_tool.substring(l, from, to);      if (handle_spill() && l[to] != ' ') {	// Ok, we have to handle spilling.	++to;	while (l[to] != ' ' && to < l.length()) {	  col += l[to];	  l.erase(to, 1);	}      }    }    result.push_back(col);  }  return result;}void DelimitDivider::autosetup(string_tools::strings const & lines, size_t paperwidth) {   // Without learning material, we assume whitespace is the delimiter  // and we only have one column  if (lines.empty()) {    set_delimiter(" ");     set_whitespace(" \t");    set_columns(1);    return;  }  // Else, we try each delimiter, and chooses the one with most columns  string_tools::strings delimiters;  string_tools::strings whitespaces;  delimiters.push_back(";");  whitespaces.push_back(" \t");  delimiters.push_back(":");  whitespaces.push_back(" \t");  delimiters.push_back("/");  whitespaces.push_back(" \t");  delimiters.push_back("|");  whitespaces.push_back(" \t");  delimiters.push_back("*");  whitespaces.push_back(" \t");  delimiters.push_back(",");  whitespaces.push_back(" \t");  delimiters.push_back(" ");  whitespaces.push_back(" \t");  delimiters.push_back("\t"); whitespaces.push_back(" \t");  size_t most_columns = 0;  size_t most_frequent = 0;  int best_delimiter = 0;  // How many of the training-lines had this number of columns?  Numbers delimiter_frequency;  for (size_t i = 0 ; i < delimiters.size(); ++i) {    set_delimiter(delimiters[i]);    set_whitespace(whitespaces[i]);//    cout << "Delimiter: '" << delimiter() << "' Whitespace: '" << whitespace() << "':\n";    // The number of columns for each training-line    Numbers columns;    for (string_tools::strings::const_iterator l=lines.begin(); l < lines.end(); ++l) {      size_t cols = split(*l).size();      columns.push_back(cols);//      cout << cols << " ";    }    // The number of columns for this delimiter is the one    // that appears the most    size_t most_com = most_common(columns);    size_t frequency = count(columns.begin(), columns.end(), most_com);//    cout << " most common: " << most_com << " freq: " << frequency << "\n";    if (most_com > 1 && frequency > most_frequent) {      most_columns = most_com;      best_delimiter = i;      most_frequent = frequency;    }  }#if 0  cout << "Most columns: " << most_columns << " frequency: " << most_frequent       << " delimiter '" << delimiters[best_delimiter].c_str() << "' whitespace '"       << whitespaces[best_delimiter].c_str() << "'\n";#endif  set_delimiter(delimiters[best_delimiter]);  set_whitespace(whitespaces[best_delimiter]);  set_columns(most_columns);}// Returns the most common value in a listsize_t DelimitDivider::most_common(Numbers const & list) {  size_t max_value = *max_element(list.begin(), list.end());  Numbers frequency(max_value + 1, 0);  for (Numbers::const_iterator i = list.begin(); i < list.end(); ++i) {    ++frequency[*i];  }  return max_element(frequency.begin(), frequency.end()) - frequency.begin();}string_tools::strings DelimitDivider::split(string const & l) {  // TODO: Add the special handling such that a delimiter ", "  //       with whitespace " \t" will make the delimiter match ",\t" too.  // This is needed to be able to handle strings like "12,2, 23,0, 25,2, 23,0"  string line = l;  string_tools::strings result;  while (!line.empty()) {    string col;    string_tool.split(line, delimiter(), col, line);    col = string_tool.trim(col, whitespace());    result.push_back(col);//       cout << "<" << col.c_str() << ">";    line = string_tool.cut(line, whitespace());  }//    cout << "\n";  return result;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -