~singpolyma/biboumi

ref: 66887c225b63cecea62d17bcfae40cddef38c9d1 biboumi/tests/encoding.cpp -rw-r--r-- 2.1 KiB
66887c22 — Florent Le Coz Add a few tests 7 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include "catch.hpp"

#include <utils/encoding.hpp>


TEST_CASE("UTF-8 validation")
{
  const char* valid = "C̡͔͕̩͙̽ͫ̈́ͥ̿̆ͧ̚r̸̩̘͍̻͖̆͆͛͊̉̕͡o͇͈̳̤̱̊̈͢q̻͍̦̮͕ͥͬͬ̽ͭ͌̾ͅǔ͉͕͇͚̙͉̭͉̇̽ȇ͈̮̼͍͔ͣ͊͞͝ͅ ͫ̾ͪ̓ͥ̆̋̔҉̢̦̠͈͔̖̲̯̦ụ̶̯͐̃̋ͮ͆͝n̬̱̭͇̻̱̰̖̤̏͛̏̿̑͟ë́͐҉̸̥̪͕̹̻̙͉̰ ̹̼̱̦̥ͩ͑̈́͑͝ͅt͍̥͈̹̝ͣ̃̔̈̔ͧ̕͝ḙ̸̖̟̙͙ͪ͢ų̯̞̼̲͓̻̞͛̃̀́b̮̰̗̩̰̊̆͗̾̎̆ͯ͌͝.̗̙͎̦ͫ̈́ͥ͌̈̓ͬ";
  CHECK(utils::is_valid_utf8(valid));
  CHECK_FALSE(utils::is_valid_utf8("\xF0\x0F"));
  CHECK_FALSE(utils::is_valid_utf8("\xFE\xFE\xFF\xFF"));

  std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
  INFO(in);
  CHECK(utils::is_valid_utf8(in.data()));
}

TEST_CASE("UTF-8 conversion")
{
  std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
  REQUIRE(utils::is_valid_utf8(in.data()));

  SECTION("Converting UTF-8 to UTF-8 should return the same string")
    {
      std::string res = utils::convert_to_utf8(in, "UTF-8");
      CHECK(utils::is_valid_utf8(res.c_str()) == true);
      CHECK(res == in);
    }

  SECTION("Testing latin-1 conversion")
    {
      std::string original_utf8("couc¥ou");
      std::string original_latin1("couc\xa5ou");

      SECTION("Convert proper latin-1 to UTF-8")
        {
          std::string from_latin1 = utils::convert_to_utf8(original_latin1.c_str(), "ISO-8859-1");
          CHECK(from_latin1 == original_utf8);
        }
      SECTION("Check the behaviour when the decoding fails (here because we provide a wrong charset)")
        {
          std::string from_ascii = utils::convert_to_utf8(original_latin1, "US-ASCII");
          CHECK(from_ascii == "couc�ou");
        }
    }
}

TEST_CASE("Remove invalid XML chars")
{
  std::string without_ctrl_char("𤭢€¢$");
  std::string in = "Biboumi ╯°□°)╯︵ ┻━┻";
  INFO(in);
  CHECK(utils::remove_invalid_xml_chars(without_ctrl_char) == without_ctrl_char);
  CHECK(utils::remove_invalid_xml_chars(in) == in);
  CHECK(utils::remove_invalid_xml_chars("\acouco\u0008u\uFFFEt\uFFFFe\r\n♥") == "coucoute\r\n♥");
}