X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Freadtokens.c;h=fbed2ca7f6cffeb2aeb5c312dbd7a214e9e76e38;hb=1276a2c5f24c0c932426aca9c899fa524d2443f2;hp=d8d5ec8b707c6f334ce669b9d29e73030c50a685;hpb=548d6eee7d88e61814f06f6e39162006918a1938;p=gnulib.git diff --git a/lib/readtokens.c b/lib/readtokens.c index d8d5ec8b7..fbed2ca7f 100644 --- a/lib/readtokens.c +++ b/lib/readtokens.c @@ -1,10 +1,12 @@ /* readtokens.c -- Functions for reading tokens from an input stream. - Copyright (C) 1990-1991, 1999, 2001 Jim Meyering. - This program is free software; you can redistribute it and/or modify + Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2014 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,203 +14,174 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + along with this program. If not, see . Written by Jim Meyering. */ -/* This almost supercedes xreadline stuff -- using delim="\n" +/* This almost supersedes xreadline stuff -- using delim="\n" gives the same functionality, except that these functions - would never return empty lines. + would never return empty lines. */ - To Do: - - To allow '\0' as a delimiter, I will have to change - interfaces to permit specification of delimiter-string - length. - */ +#include -#ifdef HAVE_CONFIG_H -# include -#endif +#include "readtokens.h" +#include #include +#include +#include +#include -#ifdef STDC_HEADERS -# include -#endif - -#if defined (STDC_HEADERS) || defined(HAVE_STRING_H) -# include -/* An ANSI string.h and pre-ANSI memory.h might conflict. */ -# if !defined (STDC_HEADERS) && defined (HAVE_MEMORY_H) -# include -# endif /* not STDC_HEADERS and HAVE_MEMORY_H */ -#else /* not STDC_HEADERS and not HAVE_STRING_H */ -# include -/* memory.h and strings.h conflict on some systems. */ -#endif /* not STDC_HEADERS and not HAVE_STRING_H */ - -#include "readtokens.h" -#include "unlocked-io.h" #include "xalloc.h" -#define STREQ(a,b) ((a) == (b) || ((a) && (b) && *(a) == *(b) \ - && strcmp(a, b) == 0)) +#if USE_UNLOCKED_IO +# include "unlocked-io.h" +#endif /* Initialize a tokenbuffer. */ void -init_tokenbuffer (tokenbuffer) - token_buffer *tokenbuffer; +init_tokenbuffer (token_buffer *tokenbuffer) { - tokenbuffer->size = INITIAL_TOKEN_LENGTH; - tokenbuffer->buffer = ((char *) xmalloc (INITIAL_TOKEN_LENGTH)); + tokenbuffer->size = 0; + tokenbuffer->buffer = NULL; } -/* Read a token from `stream' into `tokenbuffer'. +typedef size_t word; +enum { bits_per_word = sizeof (word) * CHAR_BIT }; + +static bool +get_nth_bit (size_t n, word const *bitset) +{ + return bitset[n / bits_per_word] >> n % bits_per_word & 1; +} + +static void +set_nth_bit (size_t n, word *bitset) +{ + size_t one = 1; + bitset[n / bits_per_word] |= one << n % bits_per_word; +} + +/* Read a token from STREAM into TOKENBUFFER. + A token is delimited by any of the N_DELIM bytes in DELIM. Upon return, the token is in tokenbuffer->buffer and - has a trailing '\0' instead of the original delimiter. + has a trailing '\0' instead of any original delimiter. The function value is the length of the token not including - the final '\0'. When EOF is reached (i.e. on the call - after the last token is read), -1 is returned and tokenbuffer - isn't modified. + the final '\0'. Upon EOF (i.e. on the call after the last + token is read) or error, return -1 without modifying tokenbuffer. + The EOF and error conditions may be distinguished in the caller + by testing ferror (STREAM). - This function will work properly on lines containing NUL bytes - and on files that aren't newline-terminated. */ + This function works properly on lines containing NUL bytes + and on files that do not end with a delimiter. */ -long +size_t readtoken (FILE *stream, - const char *delim, - int n_delim, - token_buffer *tokenbuffer) + const char *delim, + size_t n_delim, + token_buffer *tokenbuffer) { char *p; - int c, i, n; - static const char *saved_delim = NULL; - static char isdelim[256]; - int same_delimiters; - - if (delim == NULL && saved_delim == NULL) - abort (); + int c; + size_t i, n; + word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word]; - same_delimiters = 0; - if (delim != saved_delim && saved_delim != NULL) + memset (isdelim, 0, sizeof isdelim); + for (i = 0; i < n_delim; i++) { - same_delimiters = 1; - for (i = 0; i < n_delim; i++) - { - if (delim[i] != saved_delim[i]) - { - same_delimiters = 0; - break; - } - } + unsigned char ch = delim[i]; + set_nth_bit (ch, isdelim); } - if (!same_delimiters) + /* skip over any leading delimiters */ + for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream)) { - const char *t; - saved_delim = delim; - for (i = 0; i < sizeof (isdelim); i++) - isdelim[i] = 0; - for (t = delim; *t; t++) - isdelim[(unsigned int) *t] = 1; + /* empty */ } p = tokenbuffer->buffer; n = tokenbuffer->size; i = 0; - - /* FIXME: don't fool with this caching BS. Use strchr instead. */ - /* skip over any leading delimiters */ - for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream)) - { - /* empty */ - } - for (;;) { - if (i >= n) - { - n = 3 * (n / 2 + 1); - p = xrealloc (p, (unsigned int) n); - } + if (c < 0 && i == 0) + return -1; + + if (i == n) + p = x2nrealloc (p, &n, sizeof *p); + if (c < 0) - { - if (i == 0) - return (-1); - p[i] = 0; - break; - } - if (isdelim[c]) - { - p[i] = 0; - break; - } + { + p[i] = 0; + break; + } + if (get_nth_bit (c, isdelim)) + { + p[i] = 0; + break; + } p[i++] = c; c = getc (stream); } tokenbuffer->buffer = p; tokenbuffer->size = n; - return (i); + return i; } -/* Return a NULL-terminated array of pointers to tokens - read from `stream.' The number of tokens is returned - as the value of the function. - All storage is obtained through calls to malloc(); +/* Build a NULL-terminated array of pointers to tokens + read from STREAM. Return the number of tokens read. + All storage is obtained through calls to xmalloc-like functions. %%% Question: is it worth it to do a single - %%% realloc() of `tokens' just before returning? */ + %%% realloc() of 'tokens' just before returning? */ -int +size_t readtokens (FILE *stream, - int projected_n_tokens, - const char *delim, - int n_delim, - char ***tokens_out, - long **token_lengths) + size_t projected_n_tokens, + const char *delim, + size_t n_delim, + char ***tokens_out, + size_t **token_lengths) { token_buffer tb, *token = &tb; - int token_length; char **tokens; - long *lengths; - int sz; - int n_tokens; + size_t *lengths; + size_t sz; + size_t n_tokens; - n_tokens = 0; - if (projected_n_tokens > 0) - projected_n_tokens++; /* add one for trailing NULL pointer */ - else + if (projected_n_tokens == 0) projected_n_tokens = 64; + else + projected_n_tokens++; /* add one for trailing NULL pointer */ + sz = projected_n_tokens; - tokens = (char **) xmalloc (sz * sizeof (char *)); - lengths = (long *) xmalloc (sz * sizeof (long)); + tokens = xnmalloc (sz, sizeof *tokens); + lengths = xnmalloc (sz, sizeof *lengths); + n_tokens = 0; init_tokenbuffer (token); for (;;) { char *tmp; - token_length = readtoken (stream, delim, n_delim, token); + size_t token_length = readtoken (stream, delim, n_delim, token); if (n_tokens >= sz) - { - sz *= 2; - tokens = (char **) xrealloc (tokens, sz * sizeof (char *)); - lengths = (long *) xrealloc (lengths, sz * sizeof (long)); - } - - if (token_length < 0) - { - /* don't increment n_tokens for NULL entry */ - tokens[n_tokens] = NULL; - lengths[n_tokens] = -1; - break; - } - tmp = (char *) xmalloc ((token_length + 1) * sizeof (char)); + { + tokens = x2nrealloc (tokens, &sz, sizeof *tokens); + lengths = xnrealloc (lengths, sz, sizeof *lengths); + } + + if (token_length == (size_t) -1) + { + /* don't increment n_tokens for NULL entry */ + tokens[n_tokens] = NULL; + lengths[n_tokens] = 0; + break; + } + tmp = xnmalloc (token_length + 1, sizeof *tmp); lengths[n_tokens] = token_length; - tokens[n_tokens] = strncpy (tmp, token->buffer, - (unsigned) (token_length + 1)); + tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1); n_tokens++; } @@ -216,5 +189,7 @@ readtokens (FILE *stream, *tokens_out = tokens; if (token_lengths != NULL) *token_lengths = lengths; + else + free (lengths); return n_tokens; }