php的汉字转换一直是比较麻烦的事
该类内置了四个过滤"&#[dec];","&#x[hex];","%u[hex]","utf8转换"
方便用户的使用,同时也可自定义过滤进行自己喜欢的操作
qswhU.php 从这里下载
http://www.blueidea.com/user/qswh/qswhU.zip
class qswhU{ var $qswhData; function qswhU($filename="qswhU.php"){ $this->qswhData=file($filename); } function decode($str,$pattern=0){ $arr=array("/&#(\w+);/iU","/((%\w\w)+)/i","/%u(\w{4,5})/iU"); if(is_integer($pattern)){ if($pattern>=count($arr))die("Invalid Function"); $pattern=$arr[$pattern]; } return preg_replace_callback($pattern,array($this,"u2gb"),$str); } function u2gb($arr){ $ret="";$str=$arr[1]; if(preg_match_all("/%\w{2}/",$str,$matches)){ for($i=0;$i<count($matches[0]);$i++){ $chr1=hexdec(substr($matches[0][$i],1)); $arr=array("f0","e0","c0","0"); for($j=0;$j<count($arr);$j++)if($chr1>hexdec($arr[$j]))break; $chr=hexdec(substr($matches[0][$i],1))-hexdec($arr[$j]); while(++$j<count($arr))$chr=$chr*0x40+(hexdec(substr($matches[0][++$i],1))-0x80); $str=dechex($chr); if(strlen($str)==4){ $p=hexdec(substr($str,0,2))-0x4d; $q=hexdec(substr($str,2))*4; $ret.=chr(hexdec(substr($this->qswhData[$p],$q,2))); $ret.=chr(hexdec(substr($this->qswhData[$p],$q+2,2))); }else $ret.=chr(hexdec($str)); } } else{ if(strtolower($str[0])=="x") $str=substr($str,1); else if(strlen($str)!=4)$str=dechex($str); if(strlen($str)==4){ $p=hexdec(substr($str,0,2))-0x4d; $q=hexdec(substr($str,2))*4; $ret.=chr(hexdec(substr($this->qswhData[$p],$q,2))); $ret.=chr(hexdec(substr($this->qswhData[$p],$q+2,2))); }else $ret.=chr(hexdec($str)); } return $ret; } }
使用范例
$qswh=new qswhU("qswhU.php");//如果文件名是qswhU.php,可省参数 echo "<xmp>不带参数(默认过滤为:&#[num];):"; echo "\n".$qswh->decode("中文Abc"); echo "\n".$qswh->decode("中文Abc"); echo "\n调用内置过滤(UTF转码):".$qswh->decode("%E4%B8%AD%E6%96%87%20!%22%23%24%25%26''()*%2B%2C%2F%3A%3B%3C%3D%3E%3F%40%5B%5D%5E%60%7B%7C%7D~%25Abc",1); echo "\n调用内置过滤unescape(%u[num]):".$qswh->decode("%u4E2D%u6587Abc",2); echo "\n自定义过滤([x+num]):".$qswh->decode("[x4E2D][x6587][x41][x62][x63]","/\[(\w+)\]/");
效果如下:
不带参数(默认过滤为:&#[num];):
中文Abc
中文Abc
调用内置过滤(UTF转码):中文 !"#$%&''()*+,/:;<=>?@[]^`{|}~%Abc
调用内置过滤unescape(%u[num]):中文Abc
自定义过滤([x+num]):中文Abc |