maint: update copyright
[gnulib.git] / lib / unistr / u8-mbtouc-unsafe-aux.c
1 /* Conversion UTF-8 to UCS-4.
2    Copyright (C) 2001-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU Lesser General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18 #include <config.h>
19
20 /* Specification.  */
21 #include "unistr.h"
22
23 #if defined IN_LIBUNISTRING || HAVE_INLINE
24
25 int
26 u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27 {
28   uint8_t c = *s;
29
30   if (c >= 0xc2)
31     {
32       if (c < 0xe0)
33         {
34           if (n >= 2)
35             {
36 #if CONFIG_UNICODE_SAFETY
37               if ((s[1] ^ 0x80) < 0x40)
38 #endif
39                 {
40                   *puc = ((unsigned int) (c & 0x1f) << 6)
41                          | (unsigned int) (s[1] ^ 0x80);
42                   return 2;
43                 }
44 #if CONFIG_UNICODE_SAFETY
45               /* invalid multibyte character */
46 #endif
47             }
48           else
49             {
50               /* incomplete multibyte character */
51               *puc = 0xfffd;
52               return 1;
53             }
54         }
55       else if (c < 0xf0)
56         {
57           if (n >= 3)
58             {
59 #if CONFIG_UNICODE_SAFETY
60               if ((s[1] ^ 0x80) < 0x40)
61                 {
62                   if ((s[2] ^ 0x80) < 0x40)
63                     {
64                       if ((c >= 0xe1 || s[1] >= 0xa0)
65                           && (c != 0xed || s[1] < 0xa0))
66 #endif
67                         {
68                           *puc = ((unsigned int) (c & 0x0f) << 12)
69                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
70                                  | (unsigned int) (s[2] ^ 0x80);
71                           return 3;
72                         }
73 #if CONFIG_UNICODE_SAFETY
74                       /* invalid multibyte character */
75                       *puc = 0xfffd;
76                       return 3;
77                     }
78                   /* invalid multibyte character */
79                   *puc = 0xfffd;
80                   return 2;
81                 }
82               /* invalid multibyte character */
83 #endif
84             }
85           else
86             {
87               /* incomplete multibyte character */
88               *puc = 0xfffd;
89               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
90                 return 1;
91               else
92                 return 2;
93             }
94         }
95       else if (c < 0xf8)
96         {
97           if (n >= 4)
98             {
99 #if CONFIG_UNICODE_SAFETY
100               if ((s[1] ^ 0x80) < 0x40)
101                 {
102                   if ((s[2] ^ 0x80) < 0x40)
103                     {
104                       if ((s[3] ^ 0x80) < 0x40)
105                         {
106                           if ((c >= 0xf1 || s[1] >= 0x90)
107 #if 1
108                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
109 #endif
110                              )
111 #endif
112                             {
113                               *puc = ((unsigned int) (c & 0x07) << 18)
114                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
115                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
116                                      | (unsigned int) (s[3] ^ 0x80);
117                               return 4;
118                             }
119 #if CONFIG_UNICODE_SAFETY
120                           /* invalid multibyte character */
121                           *puc = 0xfffd;
122                           return 4;
123                         }
124                       /* invalid multibyte character */
125                       *puc = 0xfffd;
126                       return 3;
127                     }
128                   /* invalid multibyte character */
129                   *puc = 0xfffd;
130                   return 2;
131                 }
132               /* invalid multibyte character */
133 #endif
134             }
135           else
136             {
137               /* incomplete multibyte character */
138               *puc = 0xfffd;
139               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
140                 return 1;
141               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
142                 return 2;
143               else
144                 return 3;
145             }
146         }
147 #if 0
148       else if (c < 0xfc)
149         {
150           if (n >= 5)
151             {
152 #if CONFIG_UNICODE_SAFETY
153               if ((s[1] ^ 0x80) < 0x40)
154                 {
155                   if ((s[2] ^ 0x80) < 0x40)
156                     {
157                       if ((s[3] ^ 0x80) < 0x40)
158                         {
159                           if ((s[4] ^ 0x80) < 0x40)
160                             {
161                               if (c >= 0xf9 || s[1] >= 0x88)
162 #endif
163                                 {
164                                   *puc = ((unsigned int) (c & 0x03) << 24)
165                                          | ((unsigned int) (s[1] ^ 0x80) << 18)
166                                          | ((unsigned int) (s[2] ^ 0x80) << 12)
167                                          | ((unsigned int) (s[3] ^ 0x80) << 6)
168                                          | (unsigned int) (s[4] ^ 0x80);
169                                   return 5;
170                                 }
171 #if CONFIG_UNICODE_SAFETY
172                               /* invalid multibyte character */
173                               *puc = 0xfffd;
174                               return 5;
175                             }
176                           /* invalid multibyte character */
177                           *puc = 0xfffd;
178                           return 4;
179                         }
180                       /* invalid multibyte character */
181                       *puc = 0xfffd;
182                       return 3;
183                     }
184                   /* invalid multibyte character */
185                   return 2;
186                 }
187               /* invalid multibyte character */
188 #endif
189             }
190           else
191             {
192               /* incomplete multibyte character */
193               *puc = 0xfffd;
194               return n;
195             }
196         }
197       else if (c < 0xfe)
198         {
199           if (n >= 6)
200             {
201 #if CONFIG_UNICODE_SAFETY
202               if ((s[1] ^ 0x80) < 0x40)
203                 {
204                   if ((s[2] ^ 0x80) < 0x40)
205                     {
206                       if ((s[3] ^ 0x80) < 0x40)
207                         {
208                           if ((s[4] ^ 0x80) < 0x40)
209                             {
210                               if ((s[5] ^ 0x80) < 0x40)
211                                 {
212                                   if (c >= 0xfd || s[1] >= 0x84)
213 #endif
214                                     {
215                                       *puc = ((unsigned int) (c & 0x01) << 30)
216                                              | ((unsigned int) (s[1] ^ 0x80) << 24)
217                                              | ((unsigned int) (s[2] ^ 0x80) << 18)
218                                              | ((unsigned int) (s[3] ^ 0x80) << 12)
219                                              | ((unsigned int) (s[4] ^ 0x80) << 6)
220                                              | (unsigned int) (s[5] ^ 0x80);
221                                       return 6;
222                                     }
223 #if CONFIG_UNICODE_SAFETY
224                                   /* invalid multibyte character */
225                                   *puc = 0xfffd;
226                                   return 6;
227                                 }
228                               /* invalid multibyte character */
229                               *puc = 0xfffd;
230                               return 5;
231                             }
232                           /* invalid multibyte character */
233                           *puc = 0xfffd;
234                           return 4;
235                         }
236                       /* invalid multibyte character */
237                       *puc = 0xfffd;
238                       return 3;
239                     }
240                   /* invalid multibyte character */
241                   return 2;
242                 }
243               /* invalid multibyte character */
244 #endif
245             }
246           else
247             {
248               /* incomplete multibyte character */
249               *puc = 0xfffd;
250               return n;
251             }
252         }
253 #endif
254     }
255   /* invalid multibyte character */
256   *puc = 0xfffd;
257   return 1;
258 }
259
260 #endif