执行queryXpath 后,我的编码出现问题
<?php
header ( 'Content-Type: text/html; charset=utf-8' );
mb_internal_encoding ( 'utf-8' );
mb_http_output ( 'utf-8' );
mb_http_input ( 'utf-8' );
mb_regex_encoding ( 'utf-8' );
ini_set ( 'include_path', 'ZendFramework-2.4.9'library' );
require_once 'Zend/Loader/StandardAutoloader.php';
$autoloader = new Zend'Loader'StandardAutoloader ( array (
'fallback_autoloader' => true
) );
$autoloader->register ();
use Zend'Dom'Query;
use Zend'Debug'Debug;
$url = "http://expert.com.pt/115-5-programas/14865-02-809-002-00263-meireles-maq-lavar-loica-mll-125-w-5604409141651.html";
$ch = curl_init ( $url );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_HEADER, 0 );
curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, true );
$content = curl_exec ( $ch );
curl_close ( $ch );
$pdom = new Query ( mb_convert_encoding ( $content, 'HTML-ENTITIES', "UTF-8" ) );
// $pdom->setEncoding('UTF-8');
// echo $pdom->getEncoding();
$result = $pdom->queryXpath ( '//*[@itemtype="http://schema.org/Product"]' );
if ($result->count ()) {
foreach ( $result as $r ) {
// echo "----------------------------------------";
if ($r->hasChildnodes ()) {
$lbHtml = $r->C14N ();
$dom2 = new Query ( $lbHtml );
$nome_produto = $dom2->queryXpath ( '//*[@itemprop="name"]' );
$ref_expert = $dom2->queryXpath ( '//*[@itemprop="sku"]' );
$preco = $dom2->queryXpath ( '//*[@itemprop="price"]' );
// *[@itemprop="image"] // small pic
$imagem = $dom2->queryXpath ( '//*[@id="bigpic"]' );
$peq_desc = $dom2->queryXpath ( '//*[@itemprop="description"]' );
// *[contains(@class,"product-desc")]
$url_prod = $dom2->queryXpath ( '//*[contains(@class,"pb-center-column col-xs-12 col-sm-4")]/p[4]/a' );
$categoria = $pdom->queryXpath ( '//*[contains(@class,"breadcrumb clearfix")]/a[4]' ); // categoria
if ($nome_produto->count ()) {
foreach ( $nome_produto as $name ) {
$_arr ['name'] = $name->nodeValue;
}
}
if ($ref_expert->count ()) {
foreach ( $ref_expert as $ref ) {
$_arr ['ref'] = $ref->nodeValue;
}
}
if ($preco->count ()) {
foreach ( $preco as $_preco ) {
preg_match ( "/((?:[0-9]+,)*[0-9]+(?:'.[0-9]+)?)/", $_preco->nodeValue, $_preco );
$_arr ['price'] = ( float ) str_replace ( ",", ".", $_preco [0] );
}
}
if ($imagem->count ()) {
foreach ( $imagem as $_image ) {
$_arr ['image'] = $_image->getAttribute ( 'src' );
}
}
if ($peq_desc->count ()) {
foreach ( $peq_desc as $_peqdesc ) {
$_arr ['description_small'] = $_peqdesc->C14N ();
}
}
if ($url_prod->count ()) {
foreach ( $url_prod as $_url_prod ) {
$_arr ['url_prod'] = $_url_prod->getAttribute ( 'href' );
}
}
if ($categoria->count ()) {
foreach ( $categoria as $_categoria ) {
$_arr ['categoria'] = $_categoria->nodeValue;
}
}
// die();
}
}
}
echo "<pre>";
print_r ( $_arr );
代码总是返回
数组
(
〔name〕=>迈尔斯-马。Lavar Loiâ§a MLL 125 W
[ref]=>02.809.002.00263
[价格]=>289.99
[image]=>http://expert.com.pt/180503-large_default/02-809-002-00263-[categoria]=>5个程序
)
发现问题
对于我拥有的每个Zend_DOM_Query($html),我必须放置标签
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />