看板 Mesak
作者 標題 [PHP] Unicode convert to decimal
時間 2012年12月22日 Sat. PM 05:40:17
http://php.net/manual/en/function.htmlentities.php
<?php
// converts a UTF8-string into HTML entities
// - $utf8: the UTF8-string to convert
// - $encodeTags: booloean. TRUE will convert "<" to "<"
// - return: returns the converted HTML-string
function utf8tohtml($utf8, $encodeTags=TRUE) {
$result = '';
for ($i = 0; $i < strlen($utf8); $i++) {
$char = $utf8[$i];
$ascii = ord($char);
if ($ascii < 128) {
// one-byte character
$result .= ($encodeTags) ? htmlentities($char) : $char;
} else if ($ascii < 192) {
// non-utf8 character or not a start byte
} else if ($ascii < 224) {
// two-byte character
$result .= htmlentities(substr($utf8, $i, 2), ENT_QUOTES, 'UTF-8');
$i++;
} else if ($ascii < 240) {
// three-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$unicode = (15 & $ascii) * 4096 +
(63 & $ascii1) * 64 +
(63 & $ascii2);
$result .= "&#$unicode;";
$i += 2;
} else if ($ascii < 248) {
// four-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$ascii3 = ord($utf8[$i+3]);
$unicode = (15 & $ascii) * 262144 +
(63 & $ascii1) * 4096 +
(63 & $ascii2) * 64 +
(63 & $ascii3);
$result .= "&#$unicode;";
$i += 3;
}
}
return $result;
}
echo utf8tohtml($anyUTF8string, TRUE);
?>
// converts a UTF8-string into HTML entities
// - $utf8: the UTF8-string to convert
// - $encodeTags: booloean. TRUE will convert "<" to "<"
// - return: returns the converted HTML-string
function utf8tohtml($utf8, $encodeTags=TRUE) {
$result = '';
for ($i = 0; $i < strlen($utf8); $i++) {
$char = $utf8[$i];
$ascii = ord($char);
if ($ascii < 128) {
// one-byte character
$result .= ($encodeTags) ? htmlentities($char) : $char;
} else if ($ascii < 192) {
// non-utf8 character or not a start byte
} else if ($ascii < 224) {
// two-byte character
$result .= htmlentities(substr($utf8, $i, 2), ENT_QUOTES, 'UTF-8');
$i++;
} else if ($ascii < 240) {
// three-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$unicode = (15 & $ascii) * 4096 +
(63 & $ascii1) * 64 +
(63 & $ascii2);
$result .= "&#$unicode;";
$i += 2;
} else if ($ascii < 248) {
// four-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$ascii3 = ord($utf8[$i+3]);
$unicode = (15 & $ascii) * 262144 +
(63 & $ascii1) * 4096 +
(63 & $ascii2) * 64 +
(63 & $ascii3);
$result .= "&#$unicode;";
$i += 3;
}
}
return $result;
}
echo utf8tohtml($anyUTF8string, TRUE);
?>
--
Mesak Blog
http://mesak.oow.me
--
※ 作者: mesak 時間: 2012-12-22 17:40:17
※ 看板: Mesak 文章推薦值: 0 目前人氣: 0 累積人氣: 125
回列表(←)
分享