# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt # Defines byteorder and endianess byte_order="\xFE\xFF" # Reorder the display of text for RTL reading right_to_left="\x20\x2E" # Mongolian Vowel Separator: invisible and has the whitespace property invisible_separator="\x18\x03" # Invisible zero-width character. word_join="\x20\x60" # Reserved code point reserved="\xfe\xfe" # Invalid code point invalid1="\xff\xff" invalid2="\x01\xff\xff" invalid3="\xfdd0" # unassigned code point unassigned="\x0f\xed" # illegal low half-surrogate illegal_low="\xde\xad" # illegal high half-surrogate illegal_high="\xda\xad" # private use area code usbed by apple for its logo apple="\xf8\xff" # hostname normalization fullwidth_solidus="\xff\x0f" # numerical mapping and a value bold_eight="\x01\xd7\xd6" # # U+00DF normalizes to "ss" during IDNA2003's mapping phase, # different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/ weird="\x00\xdf" # U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC expansion="\xfd\xfd" # U+0390 expands by 3x (UTF-8) under NFD expansion2="\x03\x90" # U+1F82 expands by 4x (UTF-16) under NFD expansion3= "\x1F\x82" # U+FB2C expands by 3x (UTF-16) under NFC expansion4="\xFB\x2C" # Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304 low_exp1="\x02\x3a" low_exp2="\x02\x3e" low_exp3="\x00\xdf" low_exp4="\x1e\x9e" # Null byte null="\x00\x00" "\xfc\x80\x80\x80\x80\x80" "fc\x80\x80\x80\x80\xaf" # Confusing new lines "\x00\x1b" "\x00\x85" "\x20\x28" "\x20\x29"