| 
<?
require 'class.Html.php';
 
 $url = 'http://www.yahoo.com/';
 
 $objHtmlParser = new Html($url);
 
 
 $description = '';
 $objHtmlParser->Clean();
 
 //here html page will be parsed into tree structure
 //tree will be saved at $objHtmlParser->tree variable
 //but this tree is not useful still
 $objHtmlParser->Parse(&$description);
 
 //taking all needed tags from this tree
 //will return us more useful list of tags
 $all_tags = array();
 $objHtmlParser->FindAllTags($objHtmlParser->tree,&$all_tags);
 
 //collect phrases with 1,2,3 or 4 keywords
 //add more if you need analyze long phrases
 $stat_prepare = array(1,2,3,4);
 
 
 // here how we can use $all_tags array now
 $charset = $all_tags['meta content-type'][0]['props']['charset'];
 
 //
 /**
 * this is keyword density functions
 * it will return sorted array with all keyword phrases
 * this will return us array with following structure:
 * Array(
 *         [1]=>Array( - phrases with 1 keyword
 *                     [yahoo]=>Array( - tags where phrase was found
 *                                         [title]=>Array(
 *                                                         [count]=>1 - how much repeats current phrase in this tag
 *                                                         [text]=>Yahoo!
 *                                                         )
 *                                        [__total__] => 20, - total repeats in whole page
 *                                        .................................
 
 *                                     )
 *                    .........................
 *                     ),
 *         [2] => Array( - phrases with 2 keywords
 *                       [real estate] => Array
 (  - tags where phrase was found
 [a] => Array
 (
 [count] => 2
 [text] => Real Estate; Yahoo! Real Estate -
 )
 
 [__total__] => 2,
 ............................
 ),
 ........................
 
 * )
 *
 */
 
 
 $words_stat = $objHtmlParser->KeywordsDensity($all_tags,$stat_prepare);
 
 // $words_stat having not needed phrases still.
 // lets delete phrases which not repeated less than 2 times
 $words_top_notsorted = $objHtmlParser->deleteMinEntries($words_stat,2);
 
 // lets sort phrases by __total__(total repeats in page) value
 foreach ($words_stat as $key=>$val){
 $total = $words_stat[$key]['__total__'];
 $words_stat[$key] = $objHtmlParser->SortWordsSataistic($words_stat[$key],2);
 $words_stat[$key]['__total__'] = $total;
 }
 
 $charset_to = "UTF-8";
 
 // here we have good sorted words
 print_r($words_stat);
 
 //converting encode if was used some chines or any other chracters
 $words_stat = $objHtmlParser->ConvertEncoding($words_stat,$charset,$charset_to);
 $words_top_notsorted = $objHtmlParser->ConvertEncoding($words_top_notsorted,$charset,$charset_to);
 
 // here is example how to take another data from html
 $title = $objHtmlParser->helpConvertEncoding($all_tags['title'][0]['text'],$charset,$charset_to);
 $keywords = $objHtmlParser->helpConvertEncoding($all_tags['meta keywords'][0]['text'],$charset,$charset_to);
 $desc = $objHtmlParser->helpConvertEncoding($all_tags['meta description'][0]['text'],$charset,$charset_to);
 $original_text = $objHtmlParser->helpConvertEncoding($objHtmlParser->original_text,$charset,$charset_to);
 
 
 
 ?>
 |