maint: update copyright
[gnulib.git] / lib / unistr / u8-mbtouc-aux.c
1 /* Conversion UTF-8 to UCS-4.
2    Copyright (C) 2001-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU Lesser General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18 #include <config.h>
19
20 /* Specification.  */
21 #include "unistr.h"
22
23 #if defined IN_LIBUNISTRING || HAVE_INLINE
24
25 int
26 u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27 {
28   uint8_t c = *s;
29
30   if (c >= 0xc2)
31     {
32       if (c < 0xe0)
33         {
34           if (n >= 2)
35             {
36               if ((s[1] ^ 0x80) < 0x40)
37                 {
38                   *puc = ((unsigned int) (c & 0x1f) << 6)
39                          | (unsigned int) (s[1] ^ 0x80);
40                   return 2;
41                 }
42               /* invalid multibyte character */
43             }
44           else
45             {
46               /* incomplete multibyte character */
47               *puc = 0xfffd;
48               return 1;
49             }
50         }
51       else if (c < 0xf0)
52         {
53           if (n >= 3)
54             {
55               if ((s[1] ^ 0x80) < 0x40)
56                 {
57                   if ((s[2] ^ 0x80) < 0x40)
58                     {
59                       if ((c >= 0xe1 || s[1] >= 0xa0)
60                           && (c != 0xed || s[1] < 0xa0))
61                         {
62                           *puc = ((unsigned int) (c & 0x0f) << 12)
63                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
64                                  | (unsigned int) (s[2] ^ 0x80);
65                           return 3;
66                         }
67                       /* invalid multibyte character */
68                       *puc = 0xfffd;
69                       return 3;
70                     }
71                   /* invalid multibyte character */
72                   *puc = 0xfffd;
73                   return 2;
74                 }
75               /* invalid multibyte character */
76             }
77           else
78             {
79               /* incomplete multibyte character */
80               *puc = 0xfffd;
81               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
82                 return 1;
83               else
84                 return 2;
85             }
86         }
87       else if (c < 0xf8)
88         {
89           if (n >= 4)
90             {
91               if ((s[1] ^ 0x80) < 0x40)
92                 {
93                   if ((s[2] ^ 0x80) < 0x40)
94                     {
95                       if ((s[3] ^ 0x80) < 0x40)
96                         {
97                           if ((c >= 0xf1 || s[1] >= 0x90)
98 #if 1
99                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
100 #endif
101                              )
102                             {
103                               *puc = ((unsigned int) (c & 0x07) << 18)
104                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
105                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
106                                      | (unsigned int) (s[3] ^ 0x80);
107                               return 4;
108                             }
109                           /* invalid multibyte character */
110                           *puc = 0xfffd;
111                           return 4;
112                         }
113                       /* invalid multibyte character */
114                       *puc = 0xfffd;
115                       return 3;
116                     }
117                   /* invalid multibyte character */
118                   *puc = 0xfffd;
119                   return 2;
120                 }
121               /* invalid multibyte character */
122             }
123           else
124             {
125               /* incomplete multibyte character */
126               *puc = 0xfffd;
127               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
128                 return 1;
129               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
130                 return 2;
131               else
132                 return 3;
133             }
134         }
135 #if 0
136       else if (c < 0xfc)
137         {
138           if (n >= 5)
139             {
140               if ((s[1] ^ 0x80) < 0x40)
141                 {
142                   if ((s[2] ^ 0x80) < 0x40)
143                     {
144                       if ((s[3] ^ 0x80) < 0x40)
145                         {
146                           if ((s[4] ^ 0x80) < 0x40)
147                             {
148                               if (c >= 0xf9 || s[1] >= 0x88)
149                                 {
150                                   *puc = ((unsigned int) (c & 0x03) << 24)
151                                          | ((unsigned int) (s[1] ^ 0x80) << 18)
152                                          | ((unsigned int) (s[2] ^ 0x80) << 12)
153                                          | ((unsigned int) (s[3] ^ 0x80) << 6)
154                                          | (unsigned int) (s[4] ^ 0x80);
155                                   return 5;
156                                 }
157                               /* invalid multibyte character */
158                               *puc = 0xfffd;
159                               return 5;
160                             }
161                           /* invalid multibyte character */
162                           *puc = 0xfffd;
163                           return 4;
164                         }
165                       /* invalid multibyte character */
166                       *puc = 0xfffd;
167                       return 3;
168                     }
169                   /* invalid multibyte character */
170                   return 2;
171                 }
172               /* invalid multibyte character */
173             }
174           else
175             {
176               /* incomplete multibyte character */
177               *puc = 0xfffd;
178               return n;
179             }
180         }
181       else if (c < 0xfe)
182         {
183           if (n >= 6)
184             {
185               if ((s[1] ^ 0x80) < 0x40)
186                 {
187                   if ((s[2] ^ 0x80) < 0x40)
188                     {
189                       if ((s[3] ^ 0x80) < 0x40)
190                         {
191                           if ((s[4] ^ 0x80) < 0x40)
192                             {
193                               if ((s[5] ^ 0x80) < 0x40)
194                                 {
195                                   if (c >= 0xfd || s[1] >= 0x84)
196                                     {
197                                       *puc = ((unsigned int) (c & 0x01) << 30)
198                                              | ((unsigned int) (s[1] ^ 0x80) << 24)
199                                              | ((unsigned int) (s[2] ^ 0x80) << 18)
200                                              | ((unsigned int) (s[3] ^ 0x80) << 12)
201                                              | ((unsigned int) (s[4] ^ 0x80) << 6)
202                                              | (unsigned int) (s[5] ^ 0x80);
203                                       return 6;
204                                     }
205                                   /* invalid multibyte character */
206                                   *puc = 0xfffd;
207                                   return 6;
208                                 }
209                               /* invalid multibyte character */
210                               *puc = 0xfffd;
211                               return 5;
212                             }
213                           /* invalid multibyte character */
214                           *puc = 0xfffd;
215                           return 4;
216                         }
217                       /* invalid multibyte character */
218                       *puc = 0xfffd;
219                       return 3;
220                     }
221                   /* invalid multibyte character */
222                   return 2;
223                 }
224               /* invalid multibyte character */
225             }
226           else
227             {
228               /* incomplete multibyte character */
229               *puc = 0xfffd;
230               return n;
231             }
232         }
233 #endif
234     }
235   /* invalid multibyte character */
236   *puc = 0xfffd;
237   return 1;
238 }
239
240 #endif