| |
| /pliant/util/encoding/utf8.pli |
| |
| 1 |
module "/pliant/install/ring2.pli" | |
| 2 |
module "/pliant/language/type/text/str32.pli" | |
| 3 |
| |
| 4 |
| |
| 5 |
function utf8_encode clear -> encoded | |
| 6 |
arg Str32 clear ; arg Str encoded | |
| 7 |
var Int length := 0 | |
| 8 |
for (var Int i) 0 clear:len-1 | |
| 9 |
var Int c := clear:i:number | |
| 10 |
if c<2^7 | |
| 11 |
length += 1 | |
| 12 |
eif c<2^11 | |
| 13 |
length += 2 | |
| 14 |
eif c<2^16 | |
| 15 |
length += 3 | |
| 16 |
eif c<2^21 | |
| 17 |
length += 4 | |
| 18 |
encoded set (memory_allocate length addressof:encoded) length true | |
| 19 |
var Address ptr := encoded characters | |
| 20 |
for (var Int i) 0 clear:len-1 | |
| 21 |
var Int c := clear:i:number | |
| 22 |
if c<2^7 | |
| 23 |
ptr map uInt8 := c ; ptr := ptr translate Byte 1 | |
| 24 |
eif c<2^11 | |
| 25 |
ptr map uInt8 := 0C0h+c\2^6 ; ptr := ptr translate Byte 1 | |
| 26 |
ptr map uInt8 := 080h+(c .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 27 |
eif c<2^16 | |
| 28 |
ptr map uInt8 := 0E0h+c\2^12 ; ptr := ptr translate Byte 1 | |
| 29 |
ptr map uInt8 := 080h+(c\2^6 .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 30 |
ptr map uInt8 := 080h+(c .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 31 |
eif c<2^21 | |
| 32 |
ptr map uInt8 := 0F0h+c\2^18 ; ptr := ptr translate Byte 1 | |
| 33 |
ptr map uInt8 := 080h+(c\2^12 .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 34 |
ptr map uInt8 := 080h+(c\2^6 .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 35 |
ptr map uInt8 := 080h+(c .and. 3Fh) ; ptr := ptr translate Byte 1 | |
| 36 |
| |
| 37 |
| |
| 38 |
function utf8_length8 c -> l | |
| 39 |
arg Int c l | |
| 40 |
if c<80h | |
| 41 |
l := 1 | |
| 42 |
eif c<0C0h | |
| 43 |
l := undefined | |
| 44 |
eif c<0E0h | |
| 45 |
l := 2 | |
| 46 |
eif c<0F0h | |
| 47 |
l := 3 | |
| 48 |
eif c<0F8h | |
| 49 |
l := 4 | |
| 50 |
else | |
| 51 |
l := undefined | |
| 52 |
| |
| 53 |
function utf8_length encoded -> length | |
| 54 |
arg Str encoded ; arg Int length | |
| 55 |
length := 0 | |
| 56 |
var Address src := encoded characters ; var Int remain := encoded len | |
| 57 |
while remain>0 | |
| 58 |
var Int l := utf8_length8 (src map uInt8) | |
| 59 |
if l=undefined | |
| 60 |
l := 1 | |
| 61 |
eif l<=remain | |
| 62 |
length += 1 | |
| 63 |
src := src translate uInt8 l ; remain -= l | |
| 64 |
| |
| 65 |
| |
| 66 |
function utf8_check encoded -> status | |
| 67 |
arg Str encoded ; arg Status status | |
| 68 |
var Address ptr := encoded characters ; var Int remain := encoded len | |
| 69 |
while remain>0 | |
| 70 |
var Int l := utf8_length8 (ptr map uInt8) | |
| 71 |
if l=undefined or l>remain | |
| 72 |
return failure | |
| 73 |
for (var Int i) 1 l-1 | |
| 74 |
if (((ptr translate uInt8 i) map uInt8) .and. 0C0h)<>80h | |
| 75 |
return failure | |
| 76 |
ptr := ptr translate uInt8 l ; remain -= l | |
| 77 |
status := success | |
| 78 |
| |
| 79 |
| |
| 80 |
function utf8_decode encoded -> clear | |
| 81 |
arg Str encoded ; arg Str32 clear | |
| 82 |
var Int length := utf8_length encoded | |
| 83 |
clear set (memory_allocate length*Char32:size addressof:clear) length true | |
| 84 |
var Address src := encoded characters ; var Int remain := encoded len | |
| 85 |
var Address dest := clear characters | |
| 86 |
while remain>0 | |
| 87 |
var Int c := src map uInt8 | |
| 88 |
var Int l := utf8_length8 c | |
| 89 |
if l=1 | |
| 90 |
dest map Int32 := c .and. 7Fh ; dest := dest translate Int32 1 | |
| 91 |
eif l=undefined | |
| 92 |
l := 1 | |
| 93 |
eif l<=remain | |
| 94 |
c := c .and. 07Fh\2^l | |
| 95 |
for (var Int i) 1 l-1 | |
| 96 |
c := c*2^6+(((src translate uInt8 i) map uInt8) .and. 3Fh) | |
| 97 |
dest map Int32 := c ; dest := dest translate Int32 1 | |
| 98 |
src := src translate uInt8 l ; remain -= l | |
| 99 |
check dest=(clear:characters translate Char32 length) | |
| 100 |
| |
| 101 |
| |
| 102 |
export utf8_encode utf8_decode | |
| 103 |
export utf8_length utf8_check | |
| |