如何从登录后的链接刮数据一次


how to scrape data from a link after login once

我开发了一个脚本,在几分钟后运行并获得数据。每次我都要先登录,然后从特定链接获取数据。我只是想知道如何在登录一次后从特定链接获取数据,而不是每次登录。

登录页面自动生成隐藏字段名"testname"所以我首先从中抓取登录页面并获取隐藏字段

抓取登录页面获取隐藏字段。

<?php
$url="http://www.example.com/login";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
$cookie = 'cookies.txt';
curl_setopt($ch, CURLOPT_COOKIEJAR,       $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE,      $cookie);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
$content = curl_exec($ch);
$result3 = preg_match('/<input type="hidden" name="testname" value=" (.*?)"/', $content, $matches);
//Script to login the page after getting hidden field from login page and I want to login only once this code and do not login each time
$fields=array('testname'=>$matches[1],'email' => 'username', 'password' => 'password','btnLogin'=>'login');
$url1 = "http://www.example.com/comapny/index.php";
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url1);
$timeout = 30;
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_TIMEOUT,         10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,  $timeout );
curl_setopt($ch, CURLOPT_COOKIEJAR,       $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE,      $cookie);
curl_setopt($ch, CURLOPT_REFERER, $url1);
curl_setopt($ch,CURLOPT_POST, 1);
curl_setopt ($ch,CURLOPT_POSTFIELDS,$fields);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
// After login I want to get the data from below link after every 10min
$url1="http://www.example.com/abc/detail.php";
curl_setopt ($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_COOKIEJAR,       $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE,      $cookie);
curl_setopt($ch, CURLOPT_URL, $url1);
$result = curl_exec($ch);
   //#Login and save data in cookie file
    $curl=curl_init();
    curl_setopt($curl, CURLOPT_URL, "path");
    curl_setopt($curl, CURLOPT_COOKIEJAR, "cookie.txt");
    curl_setopt($curl, CURLOPT_HEADER, 0);
    curl_setopt($curl, CURLOPT_HTTPHEADER, array("Content-Type:application/json"));
    curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");
    curl_setopt($curl, CURLOPT_POSTFIELDS, $Data);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
    $result = curl_exec($curl);
    //# Use cookie data using CURLOPT_COOKIEFILE
    $curl=curl_init();
    curl_setopt($curl, CURLOPT_URL, "path");
    curl_setopt($curl, CURLOPT_COOKIEFILE, "cookie.txt");
    curl_setopt($curl, CURLOPT_HEADER, 0);
    curl_setopt($curl, CURLOPT_HTTPGET, TRUE);
    curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflat');
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
    $result = curl_exec($curl);