📄 fileobject.c
字号:
Py_INCREF(Py_None);
return Py_None;
onioerror:
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
return NULL;
}
#endif /* HAVE_FTRUNCATE */
static PyObject *
file_tell(PyFileObject *f)
{
Py_off_t pos;
if (f->f_fp == NULL)
return err_closed();
Py_BEGIN_ALLOW_THREADS
errno = 0;
pos = _portable_ftell(f->f_fp);
Py_END_ALLOW_THREADS
if (pos == -1) {
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
return NULL;
}
#if !defined(HAVE_LARGEFILE_SUPPORT)
return PyInt_FromLong(pos);
#else
return PyLong_FromLongLong(pos);
#endif
}
static PyObject *
file_fileno(PyFileObject *f)
{
if (f->f_fp == NULL)
return err_closed();
return PyInt_FromLong((long) fileno(f->f_fp));
}
static PyObject *
file_flush(PyFileObject *f)
{
int res;
if (f->f_fp == NULL)
return err_closed();
Py_BEGIN_ALLOW_THREADS
errno = 0;
res = fflush(f->f_fp);
Py_END_ALLOW_THREADS
if (res != 0) {
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
return NULL;
}
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
file_isatty(PyFileObject *f)
{
long res;
if (f->f_fp == NULL)
return err_closed();
Py_BEGIN_ALLOW_THREADS
res = isatty((int)fileno(f->f_fp));
Py_END_ALLOW_THREADS
return PyInt_FromLong(res);
}
#if BUFSIZ < 8192 && !defined(SYMBIAN)
#define SMALLCHUNK 8192
#else
#define SMALLCHUNK BUFSIZ
#endif
#if SIZEOF_INT < 4
#define BIGCHUNK (512 * 32)
#else
#define BIGCHUNK (512 * 1024)
#endif
static size_t
new_buffersize(PyFileObject *f, size_t currentsize)
{
#ifdef HAVE_FSTAT
off_t pos, end;
struct stat st;
if (fstat(fileno(f->f_fp), &st) == 0) {
end = st.st_size;
/* The following is not a bug: we really need to call lseek()
*and* ftell(). The reason is that some stdio libraries
mistakenly flush their buffer when ftell() is called and
the lseek() call it makes fails, thereby throwing away
data that cannot be recovered in any way. To avoid this,
we first test lseek(), and only call ftell() if lseek()
works. We can't use the lseek() value either, because we
need to take the amount of buffered data into account.
(Yet another reason why stdio stinks. :-) */
#ifdef USE_GUSI2
pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
#else
pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
#endif
if (pos >= 0) {
pos = ftell(f->f_fp);
}
if (pos < 0)
clearerr(f->f_fp);
if (end > pos && pos >= 0)
return currentsize + end - pos + 1;
/* Add 1 so if the file were to grow we'd notice. */
}
#endif
if (currentsize > SMALLCHUNK) {
/* Keep doubling until we reach BIGCHUNK;
then keep adding BIGCHUNK. */
if (currentsize <= BIGCHUNK)
return currentsize + currentsize;
else
return currentsize + BIGCHUNK;
}
return currentsize + SMALLCHUNK;
}
static PyObject *
file_read(PyFileObject *f, PyObject *args)
{
long bytesrequested = -1;
size_t bytesread, buffersize, chunksize;
PyObject *v;
if (f->f_fp == NULL)
return err_closed();
if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
return NULL;
if (bytesrequested < 0)
buffersize = new_buffersize(f, (size_t)0);
else
buffersize = bytesrequested;
if (buffersize > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"requested number of bytes is more than a Python string can hold");
return NULL;
}
v = PyString_FromStringAndSize((char *)NULL, buffersize);
if (v == NULL)
return NULL;
bytesread = 0;
for (;;) {
Py_BEGIN_ALLOW_THREADS
errno = 0;
chunksize = fread(BUF(v) + bytesread, 1,
buffersize - bytesread, f->f_fp);
Py_END_ALLOW_THREADS
if (chunksize == 0) {
if (!ferror(f->f_fp))
break;
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
Py_DECREF(v);
return NULL;
}
bytesread += chunksize;
if (bytesread < buffersize)
break;
if (bytesrequested < 0) {
buffersize = new_buffersize(f, buffersize);
if (_PyString_Resize(&v, buffersize) < 0)
return NULL;
}
}
if (bytesread != buffersize)
_PyString_Resize(&v, bytesread);
return v;
}
static PyObject *
file_readinto(PyFileObject *f, PyObject *args)
{
char *ptr;
int ntodo;
size_t ndone, nnow;
if (f->f_fp == NULL)
return err_closed();
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
return NULL;
ndone = 0;
while (ntodo > 0) {
Py_BEGIN_ALLOW_THREADS
errno = 0;
nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
Py_END_ALLOW_THREADS
if (nnow == 0) {
if (!ferror(f->f_fp))
break;
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
return NULL;
}
ndone += nnow;
ntodo -= nnow;
}
return PyInt_FromLong((long)ndone);
}
/**************************************************************************
Routine to get next line using platform fgets().
Under MSVC 6:
+ MS threadsafe getc is very slow (multiple layers of function calls before+
after each character, to lock+unlock the stream).
+ The stream-locking functions are MS-internal -- can't access them from user
code.
+ There's nothing Tim could find in the MS C or platform SDK libraries that
can worm around this.
+ MS fgets locks/unlocks only once per line; it's the only hook we have.
So we use fgets for speed(!), despite that it's painful.
MS realloc is also slow.
Reports from other platforms on this method vs getc_unlocked (which MS doesn't
have):
Linux a wash
Solaris a wash
Tru64 Unix getline_via_fgets significantly faster
CAUTION: The C std isn't clear about this: in those cases where fgets
writes something into the buffer, can it write into any position beyond the
required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
known on which it does; and it would be a strange way to code fgets. Still,
getline_via_fgets may not work correctly if it does. The std test
test_bufio.py should fail if platform fgets() routinely writes beyond the
trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
**************************************************************************/
/* Use this routine if told to, or by default on non-get_unlocked()
* platforms unless told not to. Yikes! Let's spell that out:
* On a platform with getc_unlocked():
* By default, use getc_unlocked().
* If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
* On a platform without getc_unlocked():
* By default, use fgets().
* If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
*/
#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
#define USE_FGETS_IN_GETLINE
#endif
#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
#undef USE_FGETS_IN_GETLINE
#endif
#ifdef USE_FGETS_IN_GETLINE
static PyObject*
getline_via_fgets(FILE *fp)
{
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
* no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
* to fill this much of the buffer with a known value in order to figure out
* how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
* than "most" lines, we waste time filling unused buffer slots. 100 is
* surely adequate for most peoples' email archives, chewing over source code,
* etc -- "regular old text files".
* MAXBUFSIZE is the maximum line length that lets us get away with the less
* fast (but still zippy) no-realloc, two-fgets()-call path. See above for
* cautions about boosting that. 300 was chosen because the worst real-life
* text-crunching job reported on Python-Dev was a mail-log crawler where over
* half the lines were 254 chars.
* INCBUFSIZE is the amount by which we grow the buffer, if MAXBUFSIZE isn't
* enough. It doesn't much matter what this is set to: we only get here for
* absurdly long lines anyway.
*/
#define INITBUFSIZE 100
#define MAXBUFSIZE 300
#define INCBUFSIZE 1000
char* p; /* temp */
char buf[MAXBUFSIZE];
PyObject* v; /* the string object result */
char* pvfree; /* address of next free slot */
char* pvend; /* address one beyond last free slot */
size_t nfree; /* # of free buffer slots; pvend-pvfree */
size_t total_v_size; /* total # of slots in buffer */
/* Optimize for normal case: avoid _PyString_Resize if at all
* possible via first reading into stack buffer "buf".
*/
total_v_size = INITBUFSIZE; /* start small and pray */
pvfree = buf;
for (;;) {
Py_BEGIN_ALLOW_THREADS
pvend = buf + total_v_size;
nfree = pvend - pvfree;
memset(pvfree, '\n', nfree);
p = fgets(pvfree, nfree, fp);
Py_END_ALLOW_THREADS
if (p == NULL) {
clearerr(fp);
if (PyErr_CheckSignals())
return NULL;
v = PyString_FromStringAndSize(buf, pvfree - buf);
return v;
}
/* fgets read *something* */
p = memchr(pvfree, '\n', nfree);
if (p != NULL) {
/* Did the \n come from fgets or from us?
* Since fgets stops at the first \n, and then writes
* \0, if it's from fgets a \0 must be next. But if
* that's so, it could not have come from us, since
* the \n's we filled the buffer with have only more
* \n's to the right.
*/
if (p+1 < pvend && *(p+1) == '\0') {
/* It's from fgets: we win! In particular,
* we haven't done any mallocs yet, and can
* build the final result on the first try.
*/
++p; /* include \n from fgets */
}
else {
/* Must be from us: fgets didn't fill the
* buffer and didn't find a newline, so it
* must be the last and newline-free line of
* the file.
*/
assert(p > pvfree && *(p-1) == '\0');
--p; /* don't include \0 from fgets */
}
v = PyString_FromStringAndSize(buf, p - buf);
return v;
}
/* yuck: fgets overwrote all the newlines, i.e. the entire
* buffer. So this line isn't over yet, or maybe it is but
* we're exactly at EOF. If we haven't already, try using the
* rest of the stack buffer.
*/
assert(*(pvend-1) == '\0');
if (pvfree == buf) {
pvfree = pvend - 1; /* overwrite trailing null */
total_v_size = MAXBUFSIZE;
}
else
break;
}
/* The stack buffer isn't big enough; malloc a string object and read
* into its buffer.
*/
total_v_size = MAXBUFSIZE + INCBUFSIZE;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL)
return v;
/* copy over everything except the last null byte */
memcpy(BUF(v), buf, MAXBUFSIZE-1);
pvfree = BUF(v) + MAXBUFSIZE - 1;
/* Keep reading stuff into v; if it ever ends successfully, break
* after setting p one beyond the end of the line. The code here is
* very much like the code above, except reads into v's buffer; see
* the code above for detailed comments about the logic.
*/
for (;;) {
Py_BEGIN_ALLOW_THREADS
pvend = BUF(v) + total_v_size;
nfree = pvend - pvfree;
memset(pvfree, '\n', nfree);
p = fgets(pvfree, nfree, fp);
Py_END_ALLOW_THREADS
if (p == NULL) {
clearerr(fp);
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
p = pvfree;
break;
}
p = memchr(pvfree, '\n', nfree);
if (p != NULL) {
if (p+1 < pvend && *(p+1) == '\0') {
/* \n came from fgets */
++p;
break;
}
/* \n came from us; last line of file, no newline */
assert(p > pvfree && *(p-1) == '\0');
--p;
break;
}
/* expand buffer and try again */
assert(*(pvend-1) == '\0');
total_v_size += INCBUFSIZE;
if (total_v_size > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"line is longer than a Python string can hold");
Py_DECREF(v);
return NULL;
}
if (_PyString_Resize(&v, (int)total_v_size) < 0)
return NULL;
/* overwrite the trailing null byte */
pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
}
if (BUF(v) + total_v_size != p)
_PyString_Resize(&v, p - BUF(v));
return v;
#undef INITBUFSIZE
#undef MAXBUFSIZE
#undef INCBUFSIZE
}
#endif /* ifdef USE_FGETS_IN_GETLINE */
/* Internal routine to get a line.
Size argument interpretation:
> 0: max length;
<= 0: read arbitrary line
*/
#ifdef HAVE_GETC_UNLOCKED
#define GETC(f) getc_unlocked(f)
#define FLOCKFILE(f) flockfile(f)
#define FUNLOCKFILE(f) funlockfile(f)
#else
#define GETC(f) getc(f)
#define FLOCKFILE(f)
#define FUNLOCKFILE(f)
#endif
static PyObject *
get_line(PyFileObject *f, int n)
{
FILE *fp = f->f_fp;
int c;
char *buf, *end;
size_t n1, n2;
PyObject *v;
#ifdef USE_FGETS_IN_GETLINE
if (n <= 0)
return getline_via_fgets(fp);
#endif
n2 = n > 0 ? n : 100;
v = PyString_FromStringAndSize((char *)NULL, n2);
if (v == NULL)
return NULL;
buf = BUF(v);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -