📄 tabdelim.cpp
字号:
{ const_ITERATE(vector<string>, ai, atoms)
if ((*ai).length())
return false;
return true;
}
int trimAtomsList(vector<string> &atoms)
{
if (!atoms.size())
return 0;
vector<string>::iterator ei(atoms.end()-1), bi(atoms.begin());
for(; !(*ei).length() && ei!=bi; ei--);
if (!(*ei).length())
atoms.clear();
else
atoms.erase(++ei, atoms.end());
return atoms.size();
}
/* Reads a list of atoms from a line of tab or comma delimited file. Atom consists of any characters
except \n, \r and \t (and ',' if csv=true). Multiple spaces are replaced by a single space. Atoms
are separated by \t or ',' if csv=true. Lines end with \n or \r. Lines which begin with | are ignored.
Returns number of atoms, -1 for comment line and -2 for EOF
*/
int readTabAtom(TFileExampleIteratorData &fei, vector<string> &atoms, bool escapeSpaces, bool csv)
{
atoms.clear();
if (!fei.file)
raiseErrorWho("TabDelimExampleGenerator", "file not opened");
if (feof(fei.file))
return -2;
fei.line++;
char c;
int col = 0;
string atom;
for(;;) {
c = fgetc(fei.file);
if (c==EOF)
break;
if (!col && (c=='|')) {
for (c=fgetc(fei.file); (c!='\r') && (c!='\n') && (c!=EOF); c=fgetc(fei.file));
return -1;
}
col++;
switch(c) {
case '\r':
case '\n':
if (atom.length() || atoms.size())
atoms.push_back(trim(atom)); // end of line
if (c == '\r') {
c = fgetc(fei.file);
if (c != '\n')
fseek(fei.file, SEEK_CUR, -1);
}
return trimAtomsList(atoms);
case '\t':
atoms.push_back(trim(atom));
atom = string();
break;
case ',':
if (csv) {
atoms.push_back(trim(atom));
atom = string();
break;
}
// else fallthrough
case ' ':
atom += c;
break;
case '\\':
if (escapeSpaces) {
c = fgetc(fei.file);
if (c != ' ')
atom += '\\';
}
default:
// trim left
if ((c>=' ') || (c<0))
atom += c;
};
}
if (ferror(fei.file))
raiseErrorWho("TabDelimExampleGenerator", "error while reading line %i of file '%s'", fei.line, fei.filename.c_str());
if (atom.length() || atoms.size())
atoms.push_back(csv ? trim(atom) : atom);
return trimAtomsList(atoms);
}
// ********* Output ********* //
#define PUTDELIM { if (ho) putc(delim, file); else ho = true; }
void tabDelim_writeExample(FILE *file, const TExample &ex, char delim)
{
}
void tabDelim_writeExamples(FILE *file, PExampleGenerator rg, char delim, const char *DK, const char *DC)
{
const TDomain domain = rg->domain.getReference();
TVarList::const_iterator vb(domain.variables->begin()), vi, ve(domain.variables->end());
PEITERATE(ex, rg) {
vi = vb;
TExample::const_iterator ri((*ex).begin());
string st;
bool ho = false;
for(; vi!=ve; vi++, ri++) {
PUTDELIM;
if (DK && ((*ri).valueType == valueDK))
fprintf(file, DK);
else if (DC && ((*ri).valueType == valueDC))
fprintf(file, DC);
else {
(*vi)->val2filestr(*ri, st, *ex);
fprintf(file, st.c_str());
}
}
TMetaVector::const_iterator mb((*ex).domain->metas.begin()), mi, me((*ex).domain->metas.end());
for(mi = mb; mi != me; mi++) {
if (!(*mi).optional) {
PUTDELIM;
if (DK && ((*ri).valueType == valueDK))
fprintf(file, DK);
else if (DC && ((*ri).valueType == valueDC))
fprintf(file, DC);
else {
(*mi).variable->val2filestr((*ex)[(*mi).id], st, *ex);
fprintf(file, "%s", st.c_str());
}
}
}
bool first = true;
for(mi = mb; mi != me; mi++) {
if ((*mi).optional) {
const TVariable &var = (*mi).variable.getReference();
if ((var.varType == TValue::FLOATVAR) && (*ex).hasMeta((*mi).id)) {
const TValue &mval = (*ex).getMeta((*mi).id);
if (!mval.isSpecial()) {
if (first) {
PUTDELIM;
first = false;
}
else
fprintf(file, " ");
if (mval.floatV == 1.0)
fprintf(file, var.name.c_str());
else {
var.val2filestr(mval, st, *ex);
fprintf(file, "%s=%s", var.name.c_str(), st.c_str());
}
}
}
}
}
fprintf(file, "\n");
}
}
string escSpaces(const string &s)
{ string res;
const_ITERATE(string, si, s)
if (*si==' ')
res += "\\ ";
else
res += *si;
return res;
}
extern TOrangeType PyOrPythonVariable_Type;
void printVarType(FILE *file, PVariable var, bool listDiscreteValues)
{
TEnumVariable *enumv = var.AS(TEnumVariable);
if (enumv) {
TValue val;
string sval;
if (!enumv->firstValue(val) || !listDiscreteValues)
fprintf(file, "d");
else {
enumv->val2str(val, sval);
fprintf(file, escSpaces(sval).c_str());
while(enumv->nextValue(val)) {
enumv->val2str(val, sval);
fprintf(file, " %s", escSpaces(sval).c_str());
}
}
}
else if (var.is_derived_from(TFloatVariable))
fprintf(file, "continuous");
else if (var.is_derived_from(TStringVariable))
fprintf(file, "string");
else if (var.is_derived_from(TPythonVariable)) {
if (var.counter->ob_type == (PyTypeObject *)&PyOrPythonVariable_Type)
fprintf(file, "python");
else {
PyObject *pyclassname = PyObject_GetAttrString((PyObject *)(var.counter)->ob_type, "__name__");
fprintf(file, "python:%s", PyString_AsString(pyclassname));
Py_DECREF(pyclassname);
}
}
else
raiseErrorWho("tabDelim_writeDomain", "tabDelim format supports only discrete, continuous and string variables");
}
void tabDelim_writeDomainWithoutDetection(FILE *file, PDomain dom, char delim, bool listDiscreteValues)
{
TVarList::const_iterator vi, vb(dom->variables->begin()), ve(dom->variables->end());
TMetaVector::const_iterator mi, mb(dom->metas.begin()), me(dom->metas.end());
bool ho = false;
bool hasOptionalFloats = false;
// First line: attribute names
for(vi = vb; vi!=ve; vi++) {
PUTDELIM;
fprintf(file, "%s", (*vi)->name.c_str());
}
for(mi = mb; mi!=me; mi++) {
if (mi->optional) {
if ((*mi).variable->varType == TValue::FLOATVAR)
hasOptionalFloats = true;
}
else {
PUTDELIM;
fprintf(file, "%s", (*mi).variable->name.c_str());
}
}
if (hasOptionalFloats) {
PUTDELIM;
fprintf(file, "__basket_foo");
}
fprintf(file, "\n");
// Second line: types
ho = false;
for(vi = vb; vi!=ve; vi++) {
PUTDELIM;
printVarType(file, *vi, listDiscreteValues);
}
for(mi = mb; mi!=me; mi++) {
if (mi->optional)
continue;
PUTDELIM;
printVarType(file, (*mi).variable, listDiscreteValues);
}
if (hasOptionalFloats) {
PUTDELIM;
fprintf(file, "basket");
}
fprintf(file, "\n");
// Third line: "meta" and "-ordered"
ho = false;
for(vb = vi = dom->attributes->begin(), ve = dom->attributes->end(); vi!=ve; vi++) {
PUTDELIM;
if (((*vi)->varType == TValue::INTVAR) && (*vi)->ordered)
fprintf(file, "-ordered");
}
if (dom->classVar) {
PUTDELIM;
fprintf(file, "class");
}
for(mi = mb; mi!=me; mi++) {
if (mi->optional)
continue;
PUTDELIM;
fprintf(file, "meta");
if (((*mi).variable->varType == TValue::INTVAR) && (*mi).variable->ordered)
fprintf(file, " -ordered");
}
if (hasOptionalFloats)
PUTDELIM;
fprintf(file, "\n");
}
/* If discrete value can be mistakenly read as continuous, we need to add the prefix.
This needs to be checked. */
bool tabDelim_checkNeedsD(PVariable var)
{
bool floated = false;
TEnumVariable *enumv = var.AS(TEnumVariable);
if (enumv) {
TValue val;
string sval;
char svalc[65];
if (!enumv->firstValue(val))
return true;
do {
enumv->val2str(val, sval);
if (sval.size()>63)
return false;
if ((sval.size()==1) && (sval[0]>='0') && (sval[0]<='9'))
continue;
// Convert commas into dots
char *sc = svalc;
ITERATE(string, si, sval) {
*(sc++) = *si==',' ? '.' : *si;
*sc = 0;
char *eptr;
strtod(svalc, &eptr);
if (*eptr)
return false;
else
floated = true;
}
} while (enumv->nextValue(val));
}
// All values were either one digit or successfully interpreted as continuous
// We need to return true if there were some that were not one-digit...
return floated;
}
void tabDelim_writeDomainWithDetection(FILE *file, PDomain dom, char delim)
{
bool ho = false;
const_PITERATE(TVarList, vi, dom->attributes) {
PUTDELIM;
fprintf(file, "%s%s", (tabDelim_checkNeedsD(*vi) ? "D#" : ""), (*vi)->name.c_str());
}
if (dom->classVar) {
PUTDELIM;
fprintf(file, "%s%s", (tabDelim_checkNeedsD(dom->classVar) ? "cD#" : "c#"), dom->classVar->name.c_str());
}
bool hasOptionalFloats = false;
const_ITERATE(TMetaVector, mi, dom->metas) {
if (mi->optional) {
if ((*mi).variable->varType == TValue::FLOATVAR)
hasOptionalFloats = true;
}
else {
PUTDELIM;
fprintf(file, "%s%s", (tabDelim_checkNeedsD((*mi).variable) ? "mD#" : "m#"), (*mi).variable->name.c_str());
}
}
if (hasOptionalFloats) {
PUTDELIM;
fprintf(file, "B#__basket_foo");
}
fprintf(file, "\n");
}
void tabDelim_writeDomain(FILE *file, PDomain dom, bool autodetect, char delim, bool listDiscreteValues)
{ if (autodetect)
tabDelim_writeDomainWithDetection(file, dom, delim);
else
tabDelim_writeDomainWithoutDetection(file, dom, delim, listDiscreteValues);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -