3.预定义字符范围
以下为引用的内容: [:alpha:]同[a-zA-Z] [:alnum:]同[a-zA-Z0-9] [:cntrl:]匹配控制字符,比如制表符,反斜杠,退格符 [:digit:]同[0-9] [:graph:]所有ASCII33~166范围内可以打印的字符 [:lower:]同[a-z] [:punct:]标点符号 [:upper:]同[A-Z] [:space:]空白字符,可以是空格、水平制表符、换行、换页、回车 [:xdigit:]十六进制符同[a-fA-F0-9] |
废话不多说,直接上我的源码吧,有什么不懂的可以上百度查查。
以下为引用的内容: <?php header("Content-type: text/html; charset=utf-8"); getinfo("http://rss.sina.com.cn/rollnews/news/gn_total.js",1); getinfo("http://rss.sina.com.cn/rollnews/news/gj_total.js",2); getinfo("http://rss.sina.com.cn/rollnews/news/sh_total.js",3); getinfo("http://rss.sina.com.cn/rollnews/sports/sports_total.js",4); getinfo("http://rss.sina.com.cn/rollnews/tech/tech1_total.js",5); getinfo("http://rss.sina.com.cn/rollnews/finance/finance1_news_total.js",6); getinfo("http://rss.sina.com.cn/rollnews/ent/ent_total.js",7); getinfo("http://rss.sina.com.cn/rollnews/jczs/jczs_total.js",8); function getinfo($infourl,$catid) { $pagecontent=getwebcontent($infourl); preg_match_all("/title:\"(.*?)\"/", $pagecontent, $match); $titlearr=$match[1]; preg_match_all("/link:\"(.*?)\"/", $pagecontent, $match); $urlarr=$match[1]; for ($i=1;$i<count($urlarr);$i++){ echo "go {$titlearr[$i-1]}\n"; $title=iconv("gbk","utf-8",$titlearr[$i-1]); $content=iconv("gbk","utf-8",getnewscontent($urlarr[$i])); $content=mysql_escape_string($content); if(!insertdb($title,$content,$catid)) break; } } function insertdb($title,$content,$catid){ 将数据写入你的库 } function getnewscontent($newsurl){ $newscontent=getwebcontent($newsurl); preg_match_all("/<div class=\"blkContainerSblkCon\" id=\"artibody\">([\s\S]*?)<!-- publish_helper_end -->/",$newscontent,$match); $content=preg_replace("/<a.*?<\/a>/si","",$match[1][0]); $content=preg_replace("/<div style=\"overflow:hidden;zoom:1;\" class=\"otherContent_01\">.*?<\/div>/si","",$content); $content=preg_replace("/<div class=\"blk-video\">.*?<div class=\"clearcl\"><\/div>/si","",$content); $content=str_replace("<div style=\"clear:both;height:0;visibility:hiddden;overflow:hidden;\"></div>","",$content); return $content; } function getwebcontent($url){ $ch = curl_init(); $timeout = 10; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); $contents = trim(curl_exec($ch)); curl_close($ch); return $contents; } ?> |
|