关于 PHP curl 和 curl 命令


regarding php curl and curl command

Php curl 返回一些 curl 命令工作正常的站点的空白页。

例如:curl www.wikipedia.org 生成输出,但 PHP curl 给出带有 <html> 标签的空白页

$ch = curl_init(); // initialize curl with given url
//TBD: all setopt commands return true/false. Should be handled
//curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]); // set  useragent
$res = curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // write the response to a variable
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // follow redirects if any
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); // max. seconds to execute
curl_setopt($ch, CURLOPT_FAILONERROR, 0); // stop when it encounters an error
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // iNetClean is web-crawling, no need to verify certificates
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_ENCODING, ""); // accept all encodings - identity, deflate, gzip
// now set the URL
curl_setopt($ch, CURLOPT_URL, $url);
$fp_out = fopen($html_file, 'w');
if (!$fp_out) {
    if ($DEBUG) {
        error_log("couldn't create " . $html_file);
    }
} else {
    if ($DEBUG) {
        error_log("file created " . $html_file);
    }
}
$fp_err = fopen($html_err_file, 'w');
if (!$fp_err) {
    if ($DEBUG) {
        error_log("couldn't create " . $html_err_file);
    }
} else {
    if ($DEBUG) {
        error_log("error file created " . $html_err_file);
    }
}
curl_setopt($ch, CURLOPT_FILE, $fp_out); //rawurlencode($url) . "txt"); // for debugging only
curl_setopt($ch, CURLOPT_STDERR, $fp_err);
$result = curl_exec($ch);
//0 size file is created if no data is downloaded or URL does not exist such as pron00.com. Hence added handler to such errors.
if (@filesize($html_file) > 0) {
    //file exists and contain some data
} else {
    return false;
}
if ($result == false) {
    trigger_error(curl_error($ch));
    if ($DEBUG) {
        error_log("Curl_exec fail");
    }
    return false;
}
fclose($fp_out);
fclose($fp_err);
curl_close($ch);
return $result;

您可以使用以下代码执行此操作:

<?php
$debug = 1;
$fb_page_url = "http://www.wikipedia.org";
$cookies = 'cookies.txt';
touch($cookies);
$uagent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36';

/**
    Get __VIEWSTATE & __EVENTVALIDATION
 */
$ch = curl_init($fb_page_url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);
$html = curl_exec($ch);
curl_close($ch);
preg_match('~<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />~', $html, $viewstate);
preg_match('~<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />~', $html, $eventValidation);
$viewstate = $viewstate[1];
$eventValidation = $eventValidation[1];

/**
 Start Fetching process
 */
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $fb_page_url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 9850);
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);
// Collecting all POST fields
$postfields = array();
//$postfields['__EVENTTARGET'] = ""; //this is for further clicking any link
//$postfields['__EVENTARGUMENT'] = ""; //this is for further clicking any link
$postfields['__LASTFOCUS'] = "";
$postfields['__VIEWSTATE'] = $viewstate;
$postfields['__EVENTVALIDATION'] = $eventValidation;
$postfields['hidStates'] = "";
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postfields);
$ret = curl_exec($ch); // Get result as fetched web page.
    if ($debug) {
        echo $ret;
    }
    curl_close($ch);
?>