
Hi guys..
Here I am going to share how to fetch/extract url datas like title, description and images from a website like facebook attach url module.
I have used PHP curl to fetch the webpage content as a string. And I have used PHP Domdocument object to extract the meta title and description part.
Finally I have used preg_match_all function to fetch images from the webpage
here is the code below
function file_get_contents_curl($url){
$ch= curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$data= curl_exec($ch);
$info= curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
//checking mime types
if(strstr($info,'text/html')) {
curl_close($ch);
return $data;
}else{
return false;
}}
//fetching url data via curl
$html= file_get_contents_curl($url);
//parsing title and description begins here:
$doc=new DOMDocument();
@$doc->loadHTML($html);
$nodes=$doc->getElementsByTagName('title');
//get and display what you need:
$title=$nodes->item(0)->nodeValue;
$metas=$doc->getElementsByTagName('meta');
for($i= 0;$i<$metas->length;$i++){
$meta=$metas->item($i);
if($meta->getAttribute('name') =='description')
$description=$meta->getAttribute('content'); }
//fetch images
$image_regex='/
]*'.'src=[\"|\'](.*)[\"|\']/Ui';
preg_match_all($image_regex,$html,$img, PREG_PATTERN_ORDER);
View Demo
0 comments:
Post a Comment