Revision [790]
This is an old revision of HandlingUTF8 made by AndreaRossato on 2004-07-28 09:55:20.
Real Multilanguage Support
Here's some code to provide real multilanguage support.
The first 3 functions are used within the functions that do the real enconding conversions.
str2utf8, str2ascii and str2iso8859 can take any encodend string and convert it into the desired encoding: ascii plus unicode entities for html output, iso8859-1 plus unicode entities for database storage and utf8 for forms.
Unfortunately the ascii and iso8859 output is not compatible with htmlspecialchars. This is the reason of a valid_xml function. It has the same scope of htmlspecialchars , but will correctly handle &.
How to use this function? For istance,
- in formatters/wakka.php you should use:
- print($this->str2ascii($text));
- in wakka.php, function SavePage you should use:
- "body = '".mysql_escape_string(trim($this->str2iso8859($body)))."'");
- in handlers/page/edit.php you should use:
- "<textarea rows=\"40\" cols=\"60\" onkeydown=\"fKeyDown()\" name=\"body\" style=\"width: 100%; height: 400px\">".$this->valid_xml($this->str2utf8($body))."</textarea><br />\n"
And so on....
Check it out here.
The bits:
<?php
//Multilanguage support. We will use: utf-8 for user input, iso8859-1 + unicode for database storage and ascii + unicode for printing
function utf8_to_unicode($str) {
$unicode = array();
$values = array();
$lookingFor = 1;
for ($i = 0; $i < strlen($str); $i++ ) {
$thisValue = ord( $str[$i] );
if ( $thisValue < 128 ) $unicode[] = $thisValue;
else {
if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
$values[] = $thisValue;
if ( count( $values ) == $lookingFor ) {
$number = ( $lookingFor == 3 ) ?
( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
$unicode[] = $number;
$values = array();
$lookingFor = 1;
}
}
}
return $unicode;
}
function deCP1252 ($str) {
$str = str_replace("€", "€", $str);
$str = str_replace("", "", $str);
$str = str_replace("‚", "‚", $str);
$str = str_replace("ƒ", "ƒ", $str);
$str = str_replace("„", "„", $str);
$str = str_replace("…", "…", $str);
$str = str_replace("†", "†", $str);
$str = str_replace("‡", "‡", $str);
$str = str_replace("ˆ", "ˆ", $str);
$str = str_replace("‰", "‰", $str);
$str = str_replace("Š", "Š", $str);
$str = str_replace("‹", "‹", $str);
$str = str_replace("Œ", "Œ", $str);
$str = str_replace("‘", "‘", $str);
$str = str_replace("’", "’", $str);
$str = str_replace("“", "“", $str);
$str = str_replace("”", "”", $str);
$str = str_replace("•", "•", $str);
$str = str_replace("–", "–", $str);
$str = str_replace("—", "—", $str);
$str = str_replace("˜", "˜", $str);
$str = str_replace("™", "™", $str);
$str = str_replace("š", "š", $str);
$str = str_replace("›", "›", $str);
$str = str_replace("œ", "œ", $str);
$str = str_replace("Ÿ", "Ÿ", $str);
return $str;
}
function code2utf($num){
if($num<128)return chr($num);
if($num<2048)return chr(($num>>6)+192).chr(($num&63)+128);
if($num<65536)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<2097152)return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128). chr(($num&63)+128);
return '';
}
//to print in a form
function str2utf8($str) {
mb_detect_order("ASCII, UTF-8, ISO-8859-1");
if (mb_detect_encoding($str) == "UTF-8") {
preg_match_all("/&#([0-9]*?);/", $str, $unicode);
foreach( $unicode[0] as $key => $value) {
$str = preg_replace("/".$value."/", $this->code2utf($unicode[1][$key]), $str);
}
return $str;
} else {
$mystr = $str;
$str = "";
for ($i = 0; $i < strlen($mystr); $i++ ) {
$code = ord( $mystr[$i] );
if ($code >= 128 && $code < 160) {
$str .= "&#".$code.";";
} else {
$str .= $this->code2utf($code);
}
}
$str = $this->deCP1252($str);
preg_match_all("/&#([0-9]*?);/", $str, $unicode);
foreach( $unicode[0] as $key => $value) {
$str = preg_replace("/".$value."/", $this->code2utf($unicode[1][$key]), $str);
}
return $str;
}
}
//to print html
function str2ascii ($str) {
$str = $this->str2utf8($str);
$unicode = $this->utf8_to_unicode($str);
$entities = '';
foreach( $unicode as $value ) {
$entities .= ( $value > 127 ) ? '&#' . $value . ';' : chr( $value );
} //foreach
return $this->deCP1252($entities);
}
//for database storage
function str2iso8859 ($str) {
$str = $this->str2utf8($str);
$unicode = $this->utf8_to_unicode($str);
$entities = "";
foreach( $unicode as $value ) {
if ($value <= 127)
$entities .= chr( $value );
elseif ($value > 159 && $value <= 255 )
$entities .= chr( $value );
else $entities .= '&#' . $value . ';';
} //foreach
return $this->deCP1252($entities);
}
function valid_xml ($str) {
$str = str_replace("\"", """, $str);
$str = str_replace("<", "<", $str);
$str = str_replace(">", ">", $str);
$str = preg_replace("/&(?![a-zA-Z0-9#]+?;)/", "&", $str);
return $str;
}
?>
//Multilanguage support. We will use: utf-8 for user input, iso8859-1 + unicode for database storage and ascii + unicode for printing
function utf8_to_unicode($str) {
$unicode = array();
$values = array();
$lookingFor = 1;
for ($i = 0; $i < strlen($str); $i++ ) {
$thisValue = ord( $str[$i] );
if ( $thisValue < 128 ) $unicode[] = $thisValue;
else {
if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
$values[] = $thisValue;
if ( count( $values ) == $lookingFor ) {
$number = ( $lookingFor == 3 ) ?
( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
$unicode[] = $number;
$values = array();
$lookingFor = 1;
}
}
}
return $unicode;
}
function deCP1252 ($str) {
$str = str_replace("€", "€", $str);
$str = str_replace("", "", $str);
$str = str_replace("‚", "‚", $str);
$str = str_replace("ƒ", "ƒ", $str);
$str = str_replace("„", "„", $str);
$str = str_replace("…", "…", $str);
$str = str_replace("†", "†", $str);
$str = str_replace("‡", "‡", $str);
$str = str_replace("ˆ", "ˆ", $str);
$str = str_replace("‰", "‰", $str);
$str = str_replace("Š", "Š", $str);
$str = str_replace("‹", "‹", $str);
$str = str_replace("Œ", "Œ", $str);
$str = str_replace("‘", "‘", $str);
$str = str_replace("’", "’", $str);
$str = str_replace("“", "“", $str);
$str = str_replace("”", "”", $str);
$str = str_replace("•", "•", $str);
$str = str_replace("–", "–", $str);
$str = str_replace("—", "—", $str);
$str = str_replace("˜", "˜", $str);
$str = str_replace("™", "™", $str);
$str = str_replace("š", "š", $str);
$str = str_replace("›", "›", $str);
$str = str_replace("œ", "œ", $str);
$str = str_replace("Ÿ", "Ÿ", $str);
return $str;
}
function code2utf($num){
if($num<128)return chr($num);
if($num<2048)return chr(($num>>6)+192).chr(($num&63)+128);
if($num<65536)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<2097152)return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128). chr(($num&63)+128);
return '';
}
//to print in a form
function str2utf8($str) {
mb_detect_order("ASCII, UTF-8, ISO-8859-1");
if (mb_detect_encoding($str) == "UTF-8") {
preg_match_all("/&#([0-9]*?);/", $str, $unicode);
foreach( $unicode[0] as $key => $value) {
$str = preg_replace("/".$value."/", $this->code2utf($unicode[1][$key]), $str);
}
return $str;
} else {
$mystr = $str;
$str = "";
for ($i = 0; $i < strlen($mystr); $i++ ) {
$code = ord( $mystr[$i] );
if ($code >= 128 && $code < 160) {
$str .= "&#".$code.";";
} else {
$str .= $this->code2utf($code);
}
}
$str = $this->deCP1252($str);
preg_match_all("/&#([0-9]*?);/", $str, $unicode);
foreach( $unicode[0] as $key => $value) {
$str = preg_replace("/".$value."/", $this->code2utf($unicode[1][$key]), $str);
}
return $str;
}
}
//to print html
function str2ascii ($str) {
$str = $this->str2utf8($str);
$unicode = $this->utf8_to_unicode($str);
$entities = '';
foreach( $unicode as $value ) {
$entities .= ( $value > 127 ) ? '&#' . $value . ';' : chr( $value );
} //foreach
return $this->deCP1252($entities);
}
//for database storage
function str2iso8859 ($str) {
$str = $this->str2utf8($str);
$unicode = $this->utf8_to_unicode($str);
$entities = "";
foreach( $unicode as $value ) {
if ($value <= 127)
$entities .= chr( $value );
elseif ($value > 159 && $value <= 255 )
$entities .= chr( $value );
else $entities .= '&#' . $value . ';';
} //foreach
return $this->deCP1252($entities);
}
function valid_xml ($str) {
$str = str_replace("\"", """, $str);
$str = str_replace("<", "<", $str);
$str = str_replace(">", ">", $str);
$str = preg_replace("/&(?![a-zA-Z0-9#]+?;)/", "&", $str);
return $str;
}
?>
--AndreaRossato