tests/unistr/test-u8-mbtouc.c

   1 /* Test of u8_mbtouc() function.
   2    Copyright (C) 2010-2014 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2010.  */
  18
  19 #include <config.h>
  20
  21 #include "unistr.h"
  22
  23 #include "macros.h"
  24
  25 #include "test-u8-mbtouc.h"
  26
  27 static void
  28 test_safe_function (int (*my_u8_mbtouc) (ucs4_t *, const uint8_t *, size_t))
  29 {
  30   ucs4_t uc;
  31   int ret;
  32
  33   /* Test behaviour required by ISO 10646-1, sections R.7 and 2.3c, namely,
  34      that a "malformed sequence" is interpreted in the same way as
  35      "a character that is outside the adopted subset".
  36      Reference:
  37        Markus Kuhn: UTF-8 decoder capability and stress test
  38        <http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
  39        <http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html>
  40    */
  41   /* 3.1. Test that each unexpected continuation byte is signalled as a
  42      malformed sequence of its own.  */
  43   {
  44     static const uint8_t input[] = { '"', 0x80, 0xBF, 0x80, 0xBF, '"' };
  45     uc = 0xBADFACE;
  46     ret = my_u8_mbtouc (&uc, input, 6);
  47     ASSERT (ret == 1);
  48     ASSERT (uc == 0x0022);
  49     uc = 0xBADFACE;
  50     ret = my_u8_mbtouc (&uc, input + 1, 5);
  51     ASSERT (ret == 1);
  52     ASSERT (uc == 0xFFFD);
  53     uc = 0xBADFACE;
  54     ret = my_u8_mbtouc (&uc, input + 2, 4);
  55     ASSERT (ret == 1);
  56     ASSERT (uc == 0xFFFD);
  57     uc = 0xBADFACE;
  58     ret = my_u8_mbtouc (&uc, input + 3, 3);
  59     ASSERT (ret == 1);
  60     ASSERT (uc == 0xFFFD);
  61     uc = 0xBADFACE;
  62     ret = my_u8_mbtouc (&uc, input + 4, 2);
  63     ASSERT (ret == 1);
  64     ASSERT (uc == 0xFFFD);
  65     uc = 0xBADFACE;
  66     ret = my_u8_mbtouc (&uc, input + 5, 1);
  67     ASSERT (ret == 1);
  68     ASSERT (uc == 0x0022);
  69   }
  70   /* 3.2. Lonely start characters.  */
  71   {
  72     ucs4_t c;
  73     uint8_t input[2];
  74
  75     for (c = 0xC0; c <= 0xFF; c++)
  76       {
  77         input[0] = c;
  78         input[1] = ' ';
  79
  80         uc = 0xBADFACE;
  81         ret = my_u8_mbtouc (&uc, input, 2);
  82         ASSERT (ret == 1);
  83         ASSERT (uc == 0xFFFD);
  84       }
  85   }
  86   /* 3.3. Sequences with last continuation byte missing.  */
  87   /* 3.3.1. 2-byte sequence with last byte missing.  */
  88   {
  89     static const uint8_t input[] = { '"', 0xC0, '"' };
  90     uc = 0xBADFACE;
  91     ret = my_u8_mbtouc (&uc, input, 3);
  92     ASSERT (ret == 1);
  93     ASSERT (uc == 0x0022);
  94     uc = 0xBADFACE;
  95     ret = my_u8_mbtouc (&uc, input + 1, 2);
  96     ASSERT (ret == 1);
  97     ASSERT (uc == 0xFFFD);
  98     uc = 0xBADFACE;
  99     ret = my_u8_mbtouc (&uc, input + 2, 1);
 100     ASSERT (ret == 1);
 101     ASSERT (uc == 0x0022);
 102   }
 103   /* 3.3.6. 2-byte sequence with last byte missing.  */
 104   {
 105     static const uint8_t input[] = { '"', 0xDF, '"' };
 106     uc = 0xBADFACE;
 107     ret = my_u8_mbtouc (&uc, input, 3);
 108     ASSERT (ret == 1);
 109     ASSERT (uc == 0x0022);
 110     uc = 0xBADFACE;
 111     ret = my_u8_mbtouc (&uc, input + 1, 2);
 112     ASSERT (ret == 1);
 113     ASSERT (uc == 0xFFFD);
 114     uc = 0xBADFACE;
 115     ret = my_u8_mbtouc (&uc, input + 2, 1);
 116     ASSERT (ret == 1);
 117     ASSERT (uc == 0x0022);
 118   }
 119   /* 3.3.2. 3-byte sequence with last byte missing.  */
 120   {
 121     static const uint8_t input[] = { '"', 0xE0, 0x80, '"' };
 122     uc = 0xBADFACE;
 123     ret = my_u8_mbtouc (&uc, input, 4);
 124     ASSERT (ret == 1);
 125     ASSERT (uc == 0x0022);
 126     uc = 0xBADFACE;
 127     ret = my_u8_mbtouc (&uc, input + 1, 3);
 128     ASSERT (ret == 2);
 129     ASSERT (uc == 0xFFFD);
 130     uc = 0xBADFACE;
 131     ret = my_u8_mbtouc (&uc, input + 3, 1);
 132     ASSERT (ret == 1);
 133     ASSERT (uc == 0x0022);
 134   }
 135   /* 3.3.7. 3-byte sequence with last byte missing.  */
 136   {
 137     static const uint8_t input[] = { '"', 0xEF, 0xBF, '"' };
 138     uc = 0xBADFACE;
 139     ret = my_u8_mbtouc (&uc, input, 4);
 140     ASSERT (ret == 1);
 141     ASSERT (uc == 0x0022);
 142     uc = 0xBADFACE;
 143     ret = my_u8_mbtouc (&uc, input + 1, 3);
 144     ASSERT (ret == 2);
 145     ASSERT (uc == 0xFFFD);
 146     uc = 0xBADFACE;
 147     ret = my_u8_mbtouc (&uc, input + 3, 1);
 148     ASSERT (ret == 1);
 149     ASSERT (uc == 0x0022);
 150   }
 151   /* 3.3.3. 4-byte sequence with last byte missing.  */
 152   {
 153     static const uint8_t input[] = { '"', 0xF0, 0x80, 0x80, '"' };
 154     uc = 0xBADFACE;
 155     ret = my_u8_mbtouc (&uc, input, 5);
 156     ASSERT (ret == 1);
 157     ASSERT (uc == 0x0022);
 158     uc = 0xBADFACE;
 159     ret = my_u8_mbtouc (&uc, input + 1, 4);
 160     ASSERT (ret == 3);
 161     ASSERT (uc == 0xFFFD);
 162     uc = 0xBADFACE;
 163     ret = my_u8_mbtouc (&uc, input + 4, 1);
 164     ASSERT (ret == 1);
 165     ASSERT (uc == 0x0022);
 166   }
 167   /* 3.3.8. 4-byte sequence with last byte missing.  */
 168   {
 169     static const uint8_t input[] = { '"', 0xF7, 0xBF, 0xBF, '"' };
 170     uc = 0xBADFACE;
 171     ret = my_u8_mbtouc (&uc, input, 5);
 172     ASSERT (ret == 1);
 173     ASSERT (uc == 0x0022);
 174     uc = 0xBADFACE;
 175     ret = my_u8_mbtouc (&uc, input + 1, 4);
 176     ASSERT (ret == 3);
 177     ASSERT (uc == 0xFFFD);
 178     uc = 0xBADFACE;
 179     ret = my_u8_mbtouc (&uc, input + 4, 1);
 180     ASSERT (ret == 1);
 181     ASSERT (uc == 0x0022);
 182   }
 183 }
 184
 185 int
 186 main ()
 187 {
 188   test_function (u8_mbtouc);
 189   test_safe_function (u8_mbtouc);
 190
 191   return 0;
 192 }