⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 excel.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// row = example number (1..nExamples), or 0 for attribute row
// col = 0..nAttrs-1
void TExcelReader::cellAsVariant(const int &row, const int &col)
{
  VariantInit(&result);
  long pos[] = {rowsLow + row, columnsLow + col};
  SafeArrayGetElement(cells, pos, &result);
  if (result.vt == VT_ERROR)
    raiseError("invalid value in cell %s%i", column2Chars(col), row+1);
}


char *TExcelReader::cellAsText(const int &row, const int &col)
{
  if (cellvalue) {
    mldelete cellvalue;
    cellvalue = NULL;
  }

  cellAsVariant(row, col);

  int prevvt = result.vt;
  if (   (VariantChangeType(&result, &result, 0, VT_BSTR) != S_OK)
      || (result.vt != VT_BSTR))
    raiseError("cannot convert the cell %s%i content into a string", column2Chars(col), row+1);

  const int blen = SysStringLen(result.bstrVal)+1;
  cellvalue = mlnew char[blen];
  const int res = WideCharToMultiByte(CP_ACP, 0, result.bstrVal, -1, cellvalue, blen, NULL, NULL);
  VariantClear(&result);
  if (!res)
    raiseError("invalid value in cell %s%i", column2Chars(col), row+1);

  return cellvalue;    
}


int TExcelReader::cellType(const int &row, const int &col) // 0 cannot be continuous, 1 can be continuous, 2 can even be coded discrete
{ cellAsVariant(row, col);

  if (result.vt == VT_R8) {
    float t = float(result.dblVal);
    if (floor(t) != t)
      return 1;
  }

  cellAsText(row, col);

  if (   !*cellvalue
      || !cellvalue[1] && (*cellvalue>='0') && (*cellvalue<='9'))
    return 2;

  float f;
  int ssr = sscanf(cellvalue, "%f", &f);
  return (ssr && (ssr!=EOF)) ? 1 : 0;
}


// specials: 0 = normal, -1 = class, 1 = ignore, <-1 = meta id
PDomain TExcelReader::constructDomain(vector<int> &specials, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{
  TDomainDepot::TAttributeDescriptions attributeDescriptions;
  TDomainDepot::TAttributeDescriptions metas;
  TDomainDepot::TAttributeDescription classDescription("", -1);

  for (int attrNo = 0; attrNo < nAttrs; attrNo++) {
    TDomainDepot::TAttributeDescription *attributeDescription;

    char *name = cellAsText(0, attrNo);
    char special = 0;

    int type = - 1;
    char *cptr = name;
    if (*cptr && (cptr[1]=='#')) {
      if (*cptr == 'i') {
        specials.push_back(1);
        continue;
      }

      else if ((*cptr == 'm') || (*cptr == 'c'))
        special = *cptr;

      else if (*cptr == 'D')
        type = TValue::INTVAR;
      else if (*cptr == 'C')
        type = TValue::FLOATVAR;
      else if (*cptr == 'S')
        type = STRINGVAR;
      else
        raiseError("unrecognized flags in attribute name '%s'", cptr);

      cptr += 2;
    }

    else if (*cptr && cptr[1] && (cptr[2]=='#')) {

      if (*cptr == 'i') {
        specials.push_back(1);
        continue;
      }
      else if ((*cptr == 'm') || (*cptr == 'c'))
        special = *cptr;
      else
        raiseError("unrecognized flags in attribute name '%s'", cptr);

      cptr++;
      if (*cptr == 'D')
        type = TValue::INTVAR;
      else if (*cptr == 'C')
        type = TValue::FLOATVAR;
      else if (*cptr == 'S')
        type = STRINGVAR;
      else
        raiseError("unrecognized flags in attribute name '%s'", cptr);

      cptr += 2; // we have already increased cptr once
    }

    switch (special) {
      case 0:
        attributeDescriptions.push_back(TDomainDepot::TAttributeDescription(cptr, type));
        attributeDescription = &attributeDescriptions.back();
        specials.push_back(0);
        break;

      case 'm':
        metas.push_back(TDomainDepot::TAttributeDescription(cptr, type));
        attributeDescription = &metas.back();
        specials.push_back(-2); // this will later be replaced with a real id
        break;

      case 'c':
        classDescription.name = cptr;
        classDescription.varType = type;
        attributeDescription = &classDescription;
        specials.push_back(-1);
        break;
    };
        
    if (type<0) {
      char minCellType = 2; // 0 cannot be continuous, 1 can be continuous, 2 can even be coded discrete
      for (int row = 1; row<=nExamples; row++) {
        const char tct = cellType(row, attrNo);
        if (!tct) {
          attributeDescription->varType = TValue::INTVAR;
          break;
        }
        if (tct < minCellType)
          minCellType = tct;
      }

      attributeDescription->varType = minCellType==1 ? TValue::FLOATVAR : TValue::INTVAR;
    }
  }

  if (classDescription.varType >= 0)
    attributeDescriptions.push_back(classDescription);

  if (sourceDomain) {
    if (!domainDepot.checkDomain(sourceDomain.AS(TDomain), &attributeDescriptions, true, NULL))
      raiseError("given domain does not match the file");
    else
      return sourceDomain;
  }

  int *metaIDs = mlnew int[metas.size()];
  PDomain newDomain = domainDepot.prepareDomain(&attributeDescriptions, classDescription.varType>=0, &metas, sourceVars, NULL, dontStore, dontCheckStored, NULL, metaIDs);

  int *mid = metaIDs;
  ITERATE(vector<int>, ii, specials)
    if (*ii == -2)
      *ii = *(mid++);

  mldelete metaIDs;

  return newDomain;
}


void TExcelReader::readValue(const int &row, const int &col, PVariable var, TValue &value)
{ 
  if (cellvalue) {
    mldelete cellvalue;
    cellvalue = NULL;
  }

  cellAsVariant(row, col);

  if ((result.vt == VT_R8) && (var->varType == TValue::FLOATVAR))
    value = TValue(float(result.dblVal));

  else {
    int prevvt = result.vt;
    if (   (VariantChangeType(&result, &result, 0, VT_BSTR) != S_OK)
        || (result.vt != VT_BSTR))
      raiseError("cannot convert content of cell %s%i", column2Chars(col), row+1);
  }


  if ((result.vt & VT_BSTR) != 0) {
    const int blen = SysStringLen(result.bstrVal)+1;
    cellvalue = mlnew char[blen];
    const int res = WideCharToMultiByte(CP_ACP, 0, result.bstrVal, -1, cellvalue, blen, NULL, NULL);
    VariantClear(&result);
    if (!res)
      raiseError("invalid value in cell %s%i", column2Chars(col), row+1);

    try {
      var->str2val_add(cellvalue, value);
    }
    catch (mlexception err) {
      raiseError("cannot convert content of cell %s%i ('%s')", column2Chars(col), row+1, cellvalue);
    }
  }

    else {
      cellvalue = mlnew char[32];
      sprintf(cellvalue, "%8.6f", result.dblVal);
      var->str2val_add(cellvalue, value);
    }
}


TExampleTable *TExcelReader::readExamples(PDomain domain, const vector<int> &specials)
{ TExampleTable *table = mlnew TExampleTable(domain);
  PVariable &classVar = domain->classVar;
  try {
    for (int row = 1; row <= nExamples; row++) {
      TExample example(domain);
      vector<int>::const_iterator speci(specials.begin());
      TVarList::const_iterator vari(domain->attributes->begin());
      TMetaVector::const_iterator meti(domain->metas.begin());
      TExample::iterator exi(example.begin());
      for (int col = 0; col < nAttrs ; col++, speci++)
        if (!*speci)
          readValue(row, col, *(vari++), *(exi++));
        else if (*speci == -1) {
          TValue value;
          readValue(row, col, classVar, value);
          example.setClass(value);
        }
        else if (*speci < -1) {
          TValue value;
          readValue(row, col, (*meti).variable, value);
          example.setMeta((*meti).id, value);
          meti++;
        }

      table->addExample(example);
    }
  }
  catch (...) {
    mldelete table;
    throw;
  }
  return table;
}


TExampleTable *TExcelReader::operator ()(char *filename, char *sheet, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{ openFile(filename, sheet);
  
  vector<int> specials;
  PDomain domain = constructDomain(specials, sourceVars, sourceDomain, dontCheckStored, dontStore);
  return readExamples(domain, specials);
}

TExampleTable *readExcelFile(char *filename, char *sheet, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{ return TExcelReader()(filename, sheet, sourceVars, sourceDomain, dontCheckStored, dontStore); }

// import orange; t = orange.ExampleTable(r"D:\ai\Domene\Imp\imp\merged2.xls")

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -