我需要提取存在于一些html标签中的json。如何使用正则表达式
从json下面提取名称(键)值<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/typewriter-keys-giclee/261307",
"listIndex": "2",
"minimumPrice": 259,
"categoryOnSale": "false",
"mfPartNumber": "WD813",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [
{
"S7 - Overlay 1": "blank"
}
],
"partNumber": "p_WD813",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/typewriter-keys-giclee/261307",
"iapAttributeCode": "",
"beanType": "product",
"name": "Typewriter Keys Giclee",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "261307",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 259,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
到目前为止我所尝试的是
<span class="JSON" style="display: none;">(['s'S]+?)<'/span>
您可以将其转换为数组,然后使用array_keys();
$array = json_decode($json);
$keys = array_keys($array['products']);
为什么,正则表达式?正如这里的其他人提到的,您可以使用json_decode将其解析为一个数组并对其进行处理。
但是如果你坚持使用正则表达式,我会说/"(.+?)":/
将匹配所有键,如果你的JSON有精确的格式,如所示。
所以你得到它从一个html字符串。假设变量是$html,并且您坚持使用正则表达式,使用regex解析json,如下所示,然后解码。要解析键,使用array_keys()
preg_match('/<span.*?class="JSON".*?>(.+?)<'/span>/s', $html, $matches);
$decoded_array = json_decode($matches[1], true);
print_r($decoded_array);
$keys = array_keys($decoded_array['products'][0]);
print_r($keys);
您可以使用DOMDocument和DOMXPath来查找包含JSON的span
元素,然后使用json_decode。这里有一个粗略的例子来帮助你:-
<?php
$html = '
<html>
<head>
<title>Example</title>
</head>
<body>
<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
</body>
</html>
';
$document = DOMDocument::loadHTML($html);
$xpath = new DOMXPath($document);
$spans = $xpath->query('//div/span[@class="JSON"]');
foreach ($spans as $span) {
$catalog = json_decode($span->nodeValue);
printf("We found %d products.'n", count($catalog->products));
foreach ($catalog->products as $index => $product) {
printf("Product #%d - %s.'n", ++$index, $product->name);
}
}
/*
We found 2 products.
Product #1 - Athena Mineral Fabric by the Yard.
Product #2 - Athena Slate Fabric by the Yard.
*/