1 /* Word breaks in UTF-8 strings.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "uniwbrk/wbrktable.h"
28 #define FUNC u8_wordbreaks
30 #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe
31 #include "u-wordbreaks.h"
39 /* Read the contents of an input stream, and return it, terminated with a NUL
42 read_file (FILE *stream)
50 while (! feof (stream))
52 if (size + BUFSIZE > alloc)
54 alloc = alloc + alloc / 2;
55 if (alloc < size + BUFSIZE)
56 alloc = size + BUFSIZE;
57 buf = realloc (buf, alloc);
60 fprintf (stderr, "out of memory\n");
64 count = fread (buf + size, 1, BUFSIZE, stream);
76 buf = realloc (buf, size + 1);
79 fprintf (stderr, "out of memory\n");
88 main (int argc, char * argv[])
92 /* Display all the word breaks in the input string. */
93 char *input = read_file (stdin);
94 int length = strlen (input);
95 char *breaks = malloc (length);
98 u8_wordbreaks ((uint8_t *) input, length, breaks);
100 for (i = 0; i < length; i++)
105 /* U+2027 in UTF-8 encoding */
106 putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
113 putc (input[i], stdout);