Zum Basteln ;)
Funktional, 3, 2, 1 Byte, den Rest kannste selber bauen, Codevorlage s.u.
<?php
class Codepoint{
private function examine($part){
static $idx = 1;
if( ($part > 223) && ($part < 240)){ // 3 bytes
$this->CODEPOINTS[] = ($part - 224 << 12) + ($this->OCTS[$idx + 1] - 128 << 6) + $this->OCTS[$idx + 2] - 128;
$idx += 3;
}
elseif($part > 191 && $part < 224){ // 2 bytes
$this->CODEPOINTS[] = ($part - 192 << 6) + $this->OCTS[$idx + 1] - 128;
$idx += 2;
}
else{
$this->CODEPOINTS[] = $part;
$idx += 1;
}
return $idx;
}
public function cps($str){
$this->OCTS = unpack('C*', $str);
$this->CODEPONTS = array();
for($i = 1; $i <= sizeof($this->OCTS);){
$i = $this->examine($this->OCTS[$i]);
}
return($this->CODEPOINTS);
}
}
$c = new Codepoint();
print_r($c->cps('€äöüA'));
#StringView.loadUTF8CharCode = function (aChars, nIdx) {
#
# var nLen = aChars.length, nPart = aChars[nIdx];
#
# return nPart > 251 && nPart < 254 && nIdx + 5 < nLen ?
# /* (nPart - 252 << 32) is not possible in ECMAScript! So...: */
# /* six bytes */ (nPart - 252) * 1073741824 + (aChars[nIdx + 1] - 128 << 24) + (aChars[nIdx + 2] - 128 << 18) + (aChars[nIdx + 3] - 128 << 12) + (aChars[nIdx + 4] - 128 << 6) + aChars[nIdx + 5] - 128
# : nPart > 247 && nPart < 252 && nIdx + 4 < nLen ?
# /* five bytes */ (nPart - 248 << 24) + (aChars[nIdx + 1] - 128 << 18) + (aChars[nIdx + 2] - 128 << 12) + (aChars[nIdx + 3] - 128 << 6) + aChars[nIdx + 4] - 128
# : nPart > 239 && nPart < 248 && nIdx + 3 < nLen ?
# /* four bytes */(nPart - 240 << 18) + (aChars[nIdx + 1] - 128 << 12) + (aChars[nIdx + 2] - 128 << 6) + aChars[nIdx + 3] - 128
# : nPart > 223 && nPart < 240 && nIdx + 2 < nLen ?
# /* three bytes */ (nPart - 224 << 12) + (aChars[nIdx + 1] - 128 << 6) + aChars[nIdx + 2] - 128
# : nPart > 191 && nPart < 224 && nIdx + 1 < nLen ?
# /* two bytes */ (nPart - 192 << 6) + aChars[nIdx + 1] - 128
# :
# /* one byte */ nPart;
#
#}
?>
Codepoints sind dezimal, für hex siehe sprintf %X
Array
(
[0] => 8364
[1] => 228
[2] => 246
[3] => 252
[4] => 65
)