⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tabdelim.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
{ const_ITERATE(vector<string>, ai, atoms)
    if ((*ai).length())
      return false;
  return true;
}


int trimAtomsList(vector<string> &atoms)
{
  if (!atoms.size())
    return 0;

  vector<string>::iterator ei(atoms.end()-1), bi(atoms.begin());
  for(; !(*ei).length() && ei!=bi; ei--);
  if (!(*ei).length())
    atoms.clear();
  else
    atoms.erase(++ei, atoms.end());
  return atoms.size();
}

/*  Reads a list of atoms from a line of tab or comma delimited file. Atom consists of any characters
    except \n, \r and \t (and ',' if csv=true). Multiple spaces are replaced by a single space. Atoms
    are separated by \t or ',' if csv=true. Lines end with \n or \r. Lines which begin with | are ignored.
   
    Returns number of atoms, -1 for comment line and -2 for EOF
    */
int readTabAtom(TFileExampleIteratorData &fei, vector<string> &atoms, bool escapeSpaces, bool csv)
{
  atoms.clear();

  if (!fei.file)
    raiseErrorWho("TabDelimExampleGenerator", "file not opened");

  if (feof(fei.file))
    return -2;

  fei.line++;

  char c;
  int col = 0;
  string atom;
  for(;;) {
    c = fgetc(fei.file);

    if (c==EOF)
      break;
    if (!col && (c=='|')) {
      for (c=fgetc(fei.file); (c!='\r') && (c!='\n') && (c!=EOF); c=fgetc(fei.file));
      return -1;
    }

    col++;

    switch(c) {
      case '\r':
      case '\n':
        if (atom.length() || atoms.size())
          atoms.push_back(trim(atom));  // end of line
        if (c == '\r') {
          c = fgetc(fei.file);
          if (c != '\n')
            fseek(fei.file, SEEK_CUR, -1);
        }
        return trimAtomsList(atoms);

      case '\t':
        atoms.push_back(trim(atom));
        atom = string();
        break;

      case ',':
        if (csv) {
          atoms.push_back(trim(atom));
          atom = string();
          break;
        }
        // else fallthrough

      case ' ':
        atom += c;
        break;

      case '\\':
        if (escapeSpaces) {
          c = fgetc(fei.file);
          if (c != ' ')
            atom += '\\';
        }

      default:
        // trim left
        if ((c>=' ') || (c<0))
          atom += c;
    };
  }
  
  if (ferror(fei.file))
    raiseErrorWho("TabDelimExampleGenerator", "error while reading line %i of file '%s'", fei.line, fei.filename.c_str());

  if (atom.length() || atoms.size())
    atoms.push_back(csv ? trim(atom) : atom);

  return trimAtomsList(atoms);
}




// ********* Output ********* //


#define PUTDELIM { if (ho) putc(delim, file); else ho = true; }

void tabDelim_writeExample(FILE *file, const TExample &ex, char delim)
{ 
}


void tabDelim_writeExamples(FILE *file, PExampleGenerator rg, char delim, const char *DK, const char *DC)
{ 
  const TDomain domain = rg->domain.getReference();
  TVarList::const_iterator vb(domain.variables->begin()), vi, ve(domain.variables->end());

  PEITERATE(ex, rg) {
    vi = vb;
    TExample::const_iterator ri((*ex).begin());
    string st;
    bool ho = false;

    for(; vi!=ve; vi++, ri++) {
      PUTDELIM;
      if (DK && ((*ri).valueType == valueDK))
        fprintf(file, DK);
      else if (DC && ((*ri).valueType == valueDC))
        fprintf(file, DC);
      else {
        (*vi)->val2filestr(*ri, st, *ex);
        fprintf(file, st.c_str());
      }
    }

    TMetaVector::const_iterator mb((*ex).domain->metas.begin()), mi, me((*ex).domain->metas.end());

    for(mi = mb; mi != me; mi++) {
      if (!(*mi).optional) {
        PUTDELIM;
        if (DK && ((*ri).valueType == valueDK))
          fprintf(file, DK);
        else if (DC && ((*ri).valueType == valueDC))
          fprintf(file, DC);
        else {
          (*mi).variable->val2filestr((*ex)[(*mi).id], st, *ex);
          fprintf(file, "%s", st.c_str());
        }
      }
    }
    
    bool first = true;
    for(mi = mb; mi != me; mi++) {
      if ((*mi).optional) {
        const TVariable &var = (*mi).variable.getReference();
        if ((var.varType == TValue::FLOATVAR) && (*ex).hasMeta((*mi).id)) {
          const TValue &mval = (*ex).getMeta((*mi).id);
          if (!mval.isSpecial()) {
            if (first) {
              PUTDELIM;
              first = false;
            }
            else
              fprintf(file, " ");

            if (mval.floatV == 1.0)
              fprintf(file, var.name.c_str());
            else {
              var.val2filestr(mval, st, *ex);
              fprintf(file, "%s=%s", var.name.c_str(), st.c_str());
            }
          }
        }
      }
    }
    fprintf(file, "\n");
  }
}

string escSpaces(const string &s)
{ string res;
  const_ITERATE(string, si, s)
    if (*si==' ')
      res += "\\ ";
    else
      res += *si;
  return res;
}

extern TOrangeType PyOrPythonVariable_Type;

void printVarType(FILE *file, PVariable var, bool listDiscreteValues)
{
  TEnumVariable *enumv = var.AS(TEnumVariable);
  if (enumv) {
    TValue val;
    string sval;
    if (!enumv->firstValue(val) || !listDiscreteValues)
      fprintf(file, "d");
    else {
      enumv->val2str(val, sval); 
      fprintf(file, escSpaces(sval).c_str());
      while(enumv->nextValue(val)) {
        enumv->val2str(val, sval);
        fprintf(file, " %s", escSpaces(sval).c_str());
      }
    }
  }
  else if (var.is_derived_from(TFloatVariable))
    fprintf(file, "continuous");
  else if (var.is_derived_from(TStringVariable))
    fprintf(file, "string");
  else if (var.is_derived_from(TPythonVariable)) {
    if (var.counter->ob_type == (PyTypeObject *)&PyOrPythonVariable_Type)
      fprintf(file, "python");
    else {
      PyObject *pyclassname = PyObject_GetAttrString((PyObject *)(var.counter)->ob_type, "__name__");
      fprintf(file, "python:%s", PyString_AsString(pyclassname));
      Py_DECREF(pyclassname);
    }
  }  
  else
    raiseErrorWho("tabDelim_writeDomain", "tabDelim format supports only discrete, continuous and string variables");
}


void tabDelim_writeDomainWithoutDetection(FILE *file, PDomain dom, char delim, bool listDiscreteValues)
{ 
  TVarList::const_iterator vi, vb(dom->variables->begin()), ve(dom->variables->end());
  TMetaVector::const_iterator mi, mb(dom->metas.begin()), me(dom->metas.end());

  bool ho = false;
  bool hasOptionalFloats = false;

  // First line: attribute names
  for(vi = vb; vi!=ve; vi++) {
    PUTDELIM;
    fprintf(file, "%s", (*vi)->name.c_str());
  }
  for(mi = mb; mi!=me; mi++) {
    if (mi->optional) {
      if ((*mi).variable->varType == TValue::FLOATVAR)
        hasOptionalFloats = true;
    }
    else {
      PUTDELIM;
      fprintf(file, "%s", (*mi).variable->name.c_str());
    }
  }

  if (hasOptionalFloats) {
    PUTDELIM;
    fprintf(file, "__basket_foo");
  }

  fprintf(file, "\n");

  
  // Second line: types
  ho = false;
  for(vi = vb; vi!=ve; vi++) {
    PUTDELIM;
    printVarType(file, *vi, listDiscreteValues);
  }
  for(mi = mb; mi!=me; mi++) {
    if (mi->optional)
      continue;
    PUTDELIM;
    printVarType(file, (*mi).variable, listDiscreteValues);
  }

  if (hasOptionalFloats) {
    PUTDELIM;
    fprintf(file, "basket");
  }

  fprintf(file, "\n");


  // Third line: "meta" and "-ordered"
  ho = false;
  for(vb = vi = dom->attributes->begin(), ve = dom->attributes->end(); vi!=ve; vi++) {
    PUTDELIM;
    if (((*vi)->varType == TValue::INTVAR) && (*vi)->ordered)
      fprintf(file, "-ordered");
  }
  if (dom->classVar) {
    PUTDELIM;
    fprintf(file, "class");
  }
  for(mi = mb; mi!=me; mi++) {
    if (mi->optional)
      continue;
    PUTDELIM;
    fprintf(file, "meta");
    if (((*mi).variable->varType == TValue::INTVAR) && (*mi).variable->ordered)
      fprintf(file, " -ordered");
 }

 if (hasOptionalFloats)
   PUTDELIM;

 fprintf(file, "\n");
}


/* If discrete value can be mistakenly read as continuous, we need to add the prefix.
   This needs to be checked. */
bool tabDelim_checkNeedsD(PVariable var)
{
  bool floated = false;
  TEnumVariable *enumv = var.AS(TEnumVariable);
  if (enumv) {
    TValue val;
    string sval;
    char svalc[65];

    if (!enumv->firstValue(val))
      return true;
    
    do {
      enumv->val2str(val, sval);
      if (sval.size()>63)
        return false;

      if ((sval.size()==1) && (sval[0]>='0') && (sval[0]<='9'))
        continue;

      // Convert commas into dots
      char *sc = svalc;
      ITERATE(string, si, sval) {
        *(sc++) = *si==',' ? '.' : *si;
        *sc = 0;

        char *eptr;
        strtod(svalc, &eptr);
        if (*eptr)
          return false;
        else
          floated = true;
      }
    } while (enumv->nextValue(val));
  }
  
  // All values were either one digit or successfully interpreted as continuous
  // We need to return true if there were some that were not one-digit...
  return floated;
}


void tabDelim_writeDomainWithDetection(FILE *file, PDomain dom, char delim)
{
  bool ho = false;
  const_PITERATE(TVarList, vi, dom->attributes) {
    PUTDELIM;
    fprintf(file, "%s%s", (tabDelim_checkNeedsD(*vi) ? "D#" : ""), (*vi)->name.c_str());
  }
  
  if (dom->classVar) {
    PUTDELIM;
    fprintf(file, "%s%s", (tabDelim_checkNeedsD(dom->classVar) ? "cD#" : "c#"), dom->classVar->name.c_str());
  }


  bool hasOptionalFloats = false;

  const_ITERATE(TMetaVector, mi, dom->metas) {
    if (mi->optional) {
      if ((*mi).variable->varType == TValue::FLOATVAR)
        hasOptionalFloats = true;
    }
    else {
      PUTDELIM;
      fprintf(file, "%s%s", (tabDelim_checkNeedsD((*mi).variable) ? "mD#" : "m#"), (*mi).variable->name.c_str());
    }
  }

  if (hasOptionalFloats) {
    PUTDELIM;
    fprintf(file, "B#__basket_foo");
  }

  fprintf(file, "\n");
}


void tabDelim_writeDomain(FILE *file, PDomain dom, bool autodetect, char delim, bool listDiscreteValues)
{ if (autodetect)
    tabDelim_writeDomainWithDetection(file, dom, delim);
  else 
    tabDelim_writeDomainWithoutDetection(file, dom, delim, listDiscreteValues);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -