PHP源码:使用CURL抓取淘宝页面函数
2018-06-23 站长 站长日志
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
/**
* 根据地址抓取淘宝页面html代码
* @param type $url 地址
* @return boolean
*/
public function getTaoBaoHtml( $url ) {
if ( empty ( $url )) {
return false;
}
$ch = curl_init();
// 设置 url
curl_setopt( $ch , CURLOPT_URL, $url );
// 设置浏览器的特定header
curl_setopt( $ch , CURLOPT_HTTPHEADER, array (
"User-Agent: {Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0}" ,
"Accept: {text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8}" ,
"Accept-Language: {zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3}" ,
"Cookie:{cq=ccp%3D1; cna=a7suCzOmSTECAXgg9iCf4AtX; t=671b2069c7e8ac444da66d664a397a5f; tracknick=%5Cu4F0D%5Cu6653%5Cu8F8901; _tb_token_=nDiU1vCuzFd0; cookie2=c54709ffbe04a5ccb80283c34d6b00fa; pnm_cku822=128WsMPac%2FFS4KgNn%2BYfhzduo4U2NC0zh9cAS4%3D%7CWUCLjKhqr873bOIFQcMecSw%3D%7CWMEKRlV%2B3D9a6XWaidNWNQOSWXwaXugvQHzhxALh%7CX0YLbX78NUR2b2DHoxnIqZENQqR35TBZbfQ5vooI0b6GHZA3U1kr%7CXkdILogCr878ZK9I%2B%2FE3QjAD3lFJJaAZRA%3D%3D%7CXUeMwMR2s%2BTUQk8IPP5TNgWfUjQwonccMCxihTa0fRYgtjgfa4j6%7CXMYK7F8liOvH3hMUpzXkiaU%2FJw%3D%3D}" ,
));
// 页面内容我们并不需要
curl_setopt( $ch , CURLOPT_NOBODY, 0);
// 只需返回HTTP header
curl_setopt( $ch , CURLOPT_HEADER, 0);
// 返回结果,而不是输出它
//curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt( $ch , CURLOPT_FOLLOWLOCATION, 1);
ob_start();
curl_exec( $ch );
$html = ob_get_contents();
ob_end_clean();
curl_close( $ch );
return $html ;
}
|
上一篇:帝国cms多值字段分割处理教程