X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fgetndelim2.c;h=9dcb9feb46f663b7c0b477880569cf961134a497;hb=825d4b05a9f432fa39078a91b24d05da759a5ad6;hp=8930a5bd369f12b2f06d9623f635e288e44d7391;hpb=463890eaa2cb1f303b100f6889421761149e936f;p=gnulib.git diff --git a/lib/getndelim2.c b/lib/getndelim2.c index 8930a5bd3..9dcb9feb4 100644 --- a/lib/getndelim2.c +++ b/lib/getndelim2.c @@ -23,8 +23,10 @@ #include "getndelim2.h" -#include +#include #include +#include +#include #if USE_UNLOCKED_IO # include "unlocked-io.h" @@ -41,10 +43,21 @@ #include #include +#include "freadptr.h" +#include "freadseek.h" +#include "memchr2.h" + #ifndef SSIZE_MAX # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) #endif +/* Use this to suppress gcc's `...may be used before initialized' warnings. */ +#ifdef lint +# define IF_LINT(Code) Code +#else +# define IF_LINT(Code) /* empty */ +#endif + /* The maximum value that getndelim2 can return without suffering from overflow problems, either internally (because of pointer subtraction overflow) or due to the API (because of ssize_t). */ @@ -63,6 +76,7 @@ getndelim2 (char **lineptr, size_t *linesize, size_t offset, size_t nmax, ssize_t bytes_stored = -1; char *ptr = *lineptr; size_t size = *linesize; + bool found_delimiter; if (!ptr) { @@ -81,23 +95,69 @@ getndelim2 (char **lineptr, size_t *linesize, size_t offset, size_t nmax, if (nbytes_avail == 0 && nmax <= size) goto done; + /* Normalize delimiters, since memchr2 doesn't handle EOF. */ + if (delim1 == EOF) + delim1 = delim2; + else if (delim2 == EOF) + delim2 = delim1; + flockfile (stream); - for (;;) + found_delimiter = false; + do { - /* Here always ptr + size == read_pos + nbytes_avail. */ + /* Here always ptr + size == read_pos + nbytes_avail. + Also nbytes_avail > 0 || size < nmax. */ + + int c IF_LINT (= 0); + const char *buffer; + size_t buffer_len; - int c; + buffer = freadptr (stream, &buffer_len); + if (buffer) + { + if (delim1 != EOF) + { + const char *end = memchr2 (buffer, delim1, delim2, buffer_len); + if (end) + { + buffer_len = end - buffer + 1; + found_delimiter = true; + } + } + } + else + { + c = getc (stream); + if (c == EOF) + { + /* Return partial line, if any. */ + if (read_pos == ptr) + goto unlock_done; + else + break; + } + if (c == delim1 || c == delim2) + found_delimiter = true; + buffer_len = 1; + } /* We always want at least one byte left in the buffer, since we always (unless we get an error while reading the first byte) NUL-terminate the line buffer. */ - if (nbytes_avail < 2 && size < nmax) + if (nbytes_avail < buffer_len + 1 && size < nmax) { + /* Grow size proportionally, not linearly, to avoid O(n^2) + running time. */ size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size; char *newptr; + /* Increase newsize so that it becomes + >= (read_pos - ptr) + buffer_len. */ + if (newsize - (read_pos - ptr) < buffer_len + 1) + newsize = (read_pos - ptr) + buffer_len + 1; + /* Respect nmax. This handles possible integer overflow. */ if (! (size < newsize && newsize <= nmax)) newsize = nmax; @@ -118,26 +178,28 @@ getndelim2 (char **lineptr, size_t *linesize, size_t offset, size_t nmax, read_pos = size - nbytes_avail + ptr; } - c = getc (stream); - if (c == EOF) + /* Here, if size < nmax, nbytes_avail >= buffer_len + 1. + If size == nmax, nbytes_avail > 0. */ + + if (1 < nbytes_avail) { - /* Return partial line, if any. */ - if (read_pos == ptr) - goto unlock_done; + size_t copy_len = nbytes_avail - 1; + if (buffer_len < copy_len) + copy_len = buffer_len; + if (buffer) + memcpy (read_pos, buffer, copy_len); else - break; + *read_pos = c; + read_pos += copy_len; + nbytes_avail -= copy_len; } - if (nbytes_avail >= 2) - { - *read_pos++ = c; - nbytes_avail--; - } + /* Here still nbytes_avail > 0. */ - if (c == delim1 || c == delim2) - /* Return the line. */ - break; + if (buffer && freadseek (stream, buffer_len)) + goto unlock_done; } + while (!found_delimiter); /* Done - NUL terminate and return the number of bytes read. At this point we know that nbytes_avail >= 1. */