Breaking News
Loading...
Monday, October 27, 2014

Info Post
Hi guys.. Here I am going to share how to fetch/extract url datas like title, description and images from a website like facebook attach url module. I have used PHP curl to fetch the webpage content as a string. And I have used PHP Domdocument object to extract the meta title and description part. Finally I have used preg_match_all function to fetch images from the webpage here is the code below function file_get_contents_curl($url){ $ch= curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_URL,$url); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $data= curl_exec($ch); $info= curl_getinfo($ch, CURLINFO_CONTENT_TYPE); //checking mime types if(strstr($info,'text/html')) { curl_close($ch); return $data; }else{ return false; }} //fetching url data via curl $html= file_get_contents_curl($url); //parsing title and description begins here: $doc=new DOMDocument(); @$doc->loadHTML($html); $nodes=$doc->getElementsByTagName('title'); //get and display what you need: $title=$nodes->item(0)->nodeValue; $metas=$doc->getElementsByTagName('meta'); for($i= 0;$i<$metas->length;$i++){ $meta=$metas->item($i); if($meta->getAttribute('name') =='description') $description=$meta->getAttribute('content'); } //fetch images $image_regex='/]*'.'src=[\"|\'](.*)[\"|\']/Ui'; preg_match_all($image_regex,$html,$img, PREG_PATTERN_ORDER); View Demo

0 comments:

Post a Comment