📄 excel.cpp
字号:
// row = example number (1..nExamples), or 0 for attribute row
// col = 0..nAttrs-1
void TExcelReader::cellAsVariant(const int &row, const int &col)
{
VariantInit(&result);
long pos[] = {rowsLow + row, columnsLow + col};
SafeArrayGetElement(cells, pos, &result);
if (result.vt == VT_ERROR)
raiseError("invalid value in cell %s%i", column2Chars(col), row+1);
}
char *TExcelReader::cellAsText(const int &row, const int &col)
{
if (cellvalue) {
mldelete cellvalue;
cellvalue = NULL;
}
cellAsVariant(row, col);
int prevvt = result.vt;
if ( (VariantChangeType(&result, &result, 0, VT_BSTR) != S_OK)
|| (result.vt != VT_BSTR))
raiseError("cannot convert the cell %s%i content into a string", column2Chars(col), row+1);
const int blen = SysStringLen(result.bstrVal)+1;
cellvalue = mlnew char[blen];
const int res = WideCharToMultiByte(CP_ACP, 0, result.bstrVal, -1, cellvalue, blen, NULL, NULL);
VariantClear(&result);
if (!res)
raiseError("invalid value in cell %s%i", column2Chars(col), row+1);
return cellvalue;
}
int TExcelReader::cellType(const int &row, const int &col) // 0 cannot be continuous, 1 can be continuous, 2 can even be coded discrete
{ cellAsVariant(row, col);
if (result.vt == VT_R8) {
float t = float(result.dblVal);
if (floor(t) != t)
return 1;
}
cellAsText(row, col);
if ( !*cellvalue
|| !cellvalue[1] && (*cellvalue>='0') && (*cellvalue<='9'))
return 2;
float f;
int ssr = sscanf(cellvalue, "%f", &f);
return (ssr && (ssr!=EOF)) ? 1 : 0;
}
// specials: 0 = normal, -1 = class, 1 = ignore, <-1 = meta id
PDomain TExcelReader::constructDomain(vector<int> &specials, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{
TDomainDepot::TAttributeDescriptions attributeDescriptions;
TDomainDepot::TAttributeDescriptions metas;
TDomainDepot::TAttributeDescription classDescription("", -1);
for (int attrNo = 0; attrNo < nAttrs; attrNo++) {
TDomainDepot::TAttributeDescription *attributeDescription;
char *name = cellAsText(0, attrNo);
char special = 0;
int type = - 1;
char *cptr = name;
if (*cptr && (cptr[1]=='#')) {
if (*cptr == 'i') {
specials.push_back(1);
continue;
}
else if ((*cptr == 'm') || (*cptr == 'c'))
special = *cptr;
else if (*cptr == 'D')
type = TValue::INTVAR;
else if (*cptr == 'C')
type = TValue::FLOATVAR;
else if (*cptr == 'S')
type = STRINGVAR;
else
raiseError("unrecognized flags in attribute name '%s'", cptr);
cptr += 2;
}
else if (*cptr && cptr[1] && (cptr[2]=='#')) {
if (*cptr == 'i') {
specials.push_back(1);
continue;
}
else if ((*cptr == 'm') || (*cptr == 'c'))
special = *cptr;
else
raiseError("unrecognized flags in attribute name '%s'", cptr);
cptr++;
if (*cptr == 'D')
type = TValue::INTVAR;
else if (*cptr == 'C')
type = TValue::FLOATVAR;
else if (*cptr == 'S')
type = STRINGVAR;
else
raiseError("unrecognized flags in attribute name '%s'", cptr);
cptr += 2; // we have already increased cptr once
}
switch (special) {
case 0:
attributeDescriptions.push_back(TDomainDepot::TAttributeDescription(cptr, type));
attributeDescription = &attributeDescriptions.back();
specials.push_back(0);
break;
case 'm':
metas.push_back(TDomainDepot::TAttributeDescription(cptr, type));
attributeDescription = &metas.back();
specials.push_back(-2); // this will later be replaced with a real id
break;
case 'c':
classDescription.name = cptr;
classDescription.varType = type;
attributeDescription = &classDescription;
specials.push_back(-1);
break;
};
if (type<0) {
char minCellType = 2; // 0 cannot be continuous, 1 can be continuous, 2 can even be coded discrete
for (int row = 1; row<=nExamples; row++) {
const char tct = cellType(row, attrNo);
if (!tct) {
attributeDescription->varType = TValue::INTVAR;
break;
}
if (tct < minCellType)
minCellType = tct;
}
attributeDescription->varType = minCellType==1 ? TValue::FLOATVAR : TValue::INTVAR;
}
}
if (classDescription.varType >= 0)
attributeDescriptions.push_back(classDescription);
if (sourceDomain) {
if (!domainDepot.checkDomain(sourceDomain.AS(TDomain), &attributeDescriptions, true, NULL))
raiseError("given domain does not match the file");
else
return sourceDomain;
}
int *metaIDs = mlnew int[metas.size()];
PDomain newDomain = domainDepot.prepareDomain(&attributeDescriptions, classDescription.varType>=0, &metas, sourceVars, NULL, dontStore, dontCheckStored, NULL, metaIDs);
int *mid = metaIDs;
ITERATE(vector<int>, ii, specials)
if (*ii == -2)
*ii = *(mid++);
mldelete metaIDs;
return newDomain;
}
void TExcelReader::readValue(const int &row, const int &col, PVariable var, TValue &value)
{
if (cellvalue) {
mldelete cellvalue;
cellvalue = NULL;
}
cellAsVariant(row, col);
if ((result.vt == VT_R8) && (var->varType == TValue::FLOATVAR))
value = TValue(float(result.dblVal));
else {
int prevvt = result.vt;
if ( (VariantChangeType(&result, &result, 0, VT_BSTR) != S_OK)
|| (result.vt != VT_BSTR))
raiseError("cannot convert content of cell %s%i", column2Chars(col), row+1);
}
if ((result.vt & VT_BSTR) != 0) {
const int blen = SysStringLen(result.bstrVal)+1;
cellvalue = mlnew char[blen];
const int res = WideCharToMultiByte(CP_ACP, 0, result.bstrVal, -1, cellvalue, blen, NULL, NULL);
VariantClear(&result);
if (!res)
raiseError("invalid value in cell %s%i", column2Chars(col), row+1);
try {
var->str2val_add(cellvalue, value);
}
catch (mlexception err) {
raiseError("cannot convert content of cell %s%i ('%s')", column2Chars(col), row+1, cellvalue);
}
}
else {
cellvalue = mlnew char[32];
sprintf(cellvalue, "%8.6f", result.dblVal);
var->str2val_add(cellvalue, value);
}
}
TExampleTable *TExcelReader::readExamples(PDomain domain, const vector<int> &specials)
{ TExampleTable *table = mlnew TExampleTable(domain);
PVariable &classVar = domain->classVar;
try {
for (int row = 1; row <= nExamples; row++) {
TExample example(domain);
vector<int>::const_iterator speci(specials.begin());
TVarList::const_iterator vari(domain->attributes->begin());
TMetaVector::const_iterator meti(domain->metas.begin());
TExample::iterator exi(example.begin());
for (int col = 0; col < nAttrs ; col++, speci++)
if (!*speci)
readValue(row, col, *(vari++), *(exi++));
else if (*speci == -1) {
TValue value;
readValue(row, col, classVar, value);
example.setClass(value);
}
else if (*speci < -1) {
TValue value;
readValue(row, col, (*meti).variable, value);
example.setMeta((*meti).id, value);
meti++;
}
table->addExample(example);
}
}
catch (...) {
mldelete table;
throw;
}
return table;
}
TExampleTable *TExcelReader::operator ()(char *filename, char *sheet, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{ openFile(filename, sheet);
vector<int> specials;
PDomain domain = constructDomain(specials, sourceVars, sourceDomain, dontCheckStored, dontStore);
return readExamples(domain, specials);
}
TExampleTable *readExcelFile(char *filename, char *sheet, PVarList sourceVars, PDomain sourceDomain, bool dontCheckStored, bool dontStore)
{ return TExcelReader()(filename, sheet, sourceVars, sourceDomain, dontCheckStored, dontStore); }
// import orange; t = orange.ExampleTable(r"D:\ai\Domene\Imp\imp\merged2.xls")
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -