Our main class, amazon_rank_finder.php
<?php
error_reporting(E_ALL ^ E_STRICT);
ini_set('display_errors', 'on');
define('DEFAULT_ASIN', "1430235608");
$asin = DEFAULT_ASIN;
if (isset($_POST) && isset($_POST['asin'])) {
$asin = preg_replace("/[^a-zA-Z0-9]/", "", $_POST['asin']);
}
?>
<!doctype HTML>
<html>
<head>
<title>Amazon Rank Finder</title>
<link rel="stylesheet" href="style.css"/>
</head>
<body>
<form action="<?php echo $_SERVER['PHP_SELF']; ?>" method="POST">
<input value="<?php echo str_pad($asin, 10, '0', STR_PAD_LEFT); ?>"
type="text" name="asin" />
<input type="Submit"/>
</form>
<div>
<?php
require_once('config.php');
require_once('xml_io.php');
require_once('scraper.php');
require_once('view.php');
class AmazonRankFinder {
private $id = 0;
private $domains = null;
//objects
private $model = null;
private $view = null;
public function __construct() {
$this->model = new XML_IO;
$this->view = new View;
}
public function run($id, Array $domains = array('ca', 'com', 'co.uk')) {
$this->domains = $domains;
$this->id = $id;
$this->model->loadXML($this->id);
if ($this->timeToCheck()) {
$this->checkRemoteContent();
$this->model->updateAndWriteFile();
} else {
$this->checkCachedContent();
}
$this->view->display($this->id);
}
private function timeToCheck() {
return (time() - $this->model->getLastCheckTime() > RECHECK_INTERVAL);
}
private function checkRemoteContent() {
$need_title_and_image = true;
$this->view->appendToBody("Checking id: $this->id now...<br/>");
foreach ($this->domains as $domain) {
$results = Scraper::fetchUpdatedData($domain, $this->id, $need_title_and_image);
if ($results) {
if ($need_title_and_image) {
$this->model->setTitle($results['title']);
$this->model->setImage($results['image']);
$this->view->appendToBody(html_entity_decode($this->view->productInformationAsHTML(
$results['title'], $results['image'])));
$need_title_and_image = false;
$this->view->appendToBody("<div style='float: left;'>");
}
//adds best/worst view
$ranks = $this->model->updateXML($domain, $results['sales_rank']);
if (!empty($ranks)) {
$this->view->appendToBody(html_entity_decode(
$this->view->domainRankAsHTML($this->id, $domain, $ranks, false)));
}
}
}
$this->view->appendToBody('</div>');
}
private function checkCachedContent() {
$this->view->appendToBody(html_entity_decode($this->view->productInformationAsHTML(
$this->model->getTitle(), $this->model->getImage())));
$this->view->appendToBody("<div style='float: left;'>Last checked id: $this->id at " . date('m-d-Y h:i:sa', $this->model->getLastCheckTime()) . "<br/>");
foreach ($this->domains as $domain) {
$site_node = $this->model->siteExistsInXML($domain);
$ranks = $this->model->queryRankNodesFromXML($site_node);
if (!empty($ranks)) {
$this->view->appendToBody(html_entity_decode(
$this->view->domainRankAsHTML($this->id, $domain, $ranks)));
}
}
$this->view->appendToBody('</div>');
}
}
if (isset($_POST) && isset($_POST['asin'])) {
$amazon_rank_finder = new AmazonRankFinder();
$amazon_rank_finder->run($asin, array('ca', 'com', 'co.uk'));
}
?>
</div>
</body>
</html>
Our config.php file
<?php
define('RECHECK_INTERVAL', 60 * 60); //one hour
define('SALES_RANK_DOM_ID', "#SalesRank");
define('IMAGE_DOM_ID', "#prodImage");
define('TITLE_DOM_ID', "#btAsinTitle");
define('AMAZON_URL_PREFIX', 'http://www.amazon.');
define('AMAZON_URL_SUFFIX', '/dp/');
date_default_timezone_set('America/Regina');
?>
Our scraper.php file
<?php
require_once('config.php');
require_once('phpQuery.php');
class Scraper {
public static function fetchUpdatedData( $domain, $id, $get_title = false ) {
$title = $image = "";
//http://www.amazon.ca/dp/1234567890
$url = AMAZON_URL_PREFIX.$domain.AMAZON_URL_SUFFIX.$id;
$contents = file_get_contents( $url );
if ($contents === false) {
//return early if URL is not found
return false;
}
phpQuery::newDocument( $contents );
if ($get_title) {
$title = pq( TITLE_DOM_ID );
$image = pq( IMAGE_DOM_ID );
}
return array('sales_rank' => pq( SALES_RANK_DOM_ID ),
'title' => $title,
'image' => $image);
}
public static function parseRankFromDescription( $description ) {
preg_match_all( '{Amazon Bestsellers Rank:</b>\s*#?(([0-9]{0,3},)?([0-9]{0,3},)?[0-9]{1,3}) in Books}mi',
$description, $matches );
if (isset( $matches[ 1 ] ) && isset( $matches[ 1 ][ 0 ] )) {
return intval( str_replace( ',', '', $matches[ 1 ][ 0 ] ) );
}
return -1;
}
}
?>
Our xml_ui.php file
<?php
require_once('config.php');
$default_xml_string = <<<EOT
<?xml version="1.0" encoding="UTF-8" ?>
<info>
<lastcheck></lastcheck>
<title></title>
<image></image>
<sites/>
</info>
EOT;
define( 'DEFAULT_XML_STRING', $default_xml_string );
class Xml_IO {
private $dom = null;
private $id = 0;
private $filename = null;
//nodes
private $lastchecktime_node = null;
private $title_node = null;
private $image_node = null;
//node textcontent
private $lastchecktime = 0;
private $title = null;
private $image = null;
public function __construct() {
$this->dom = new DOMDocument();
}
public function createFileIfItDoesNotExist() {
if (!file_exists( $this->filename )) {
file_put_contents( $this->filename, DEFAULT_XML_STRING );
}
}
public function loadXML( $id ) {
$this->id = $id;
$this->filename = $this->id.'.xml';
$this->createFileIfItDoesNotExist();
$this->dom->load( $this->filename );
$this->xpath = new DomXpath( $this->dom );
$this->lastchecktime_node = $this->xpath->query( "//lastcheck" )->item( 0 );
$this->title_node = $this->xpath->query( "//title" )->item( 0 );
$this->image_node = $this->xpath->query( "//image" )->item( 0 );
$this->sites_node = $this->xpath->query( "//sites" )->item( 0 );
$this->lastchecktime = (int) $this->lastchecktime_node->textContent;
$this->title = $this->title_node->textContent;
$this->image = $this->image_node->textContent;
}
public function updateXML( $domain, $sales_rank ) {
if ($this->siteExistsInXML( $domain )) {
//echo "update<br/>";
return $this->updateSiteInXML( $domain, $sales_rank );
} else {
//echo "append<br/>";
return $this->appendSiteInXML( $domain, $sales_rank );
}
}
public function siteExistsInXML( $domain ) {
$sites = $this->xpath->query( "//domain" );
foreach ($sites as $site) {
if ($site->attributes->getNamedItem( "cc" )->textContent == $domain) {
return $site;
}
}
return false;
}
private function updateSiteInXML( $domain, $sales_rank ) {
$ranks = null;
foreach ($this->sites_node->childNodes as $site) {
if ($site->attributes->getNamedItem( "cc" )->textContent == $domain) {
try {
$ranks = $this->updateDomainRank( $site, $sales_rank );
} catch (Exception $e) {
var_dump( $e );
}
break;
}
}
return $ranks;
}
private function appendSiteInXML( $domain, $sales_rank ) {
$site = $this->dom->createElement( "domain" );
$cc = $this->dom->createAttribute( "cc" );
$cc->value = $domain;
$site->appendChild( $cc );
$this->sites_node->appendChild( $site );
$rank = $this->createAndAppend( $site, "current_rank", $sales_rank );
$exact_rank = Scraper::parseRankFromDescription( $sales_rank );
$this->createAndAppend( $site, "best_rank", $exact_rank );
$this->createAndAppend( $site, "worst_rank", $exact_rank );
return $this->queryRankNodesFromXML( $site );
}
private function createAndAppend( $parent, $child_node_name, $value ) {
$child = $this->dom->createElement( $child_node_name, $value );
$parent->appendChild( $child );
return $child;
}
private function updateNode( $node, $value ) {
if ($node) {
if ($node->firstChild) {
$node->replaceChild( $this->dom->createTextNode( $value ), $node->firstChild );
} else {
$node->appendChild( $this->dom->createTextNode( $value ) );
}
}
}
public function queryRankNodesFromXML( $site_node ) {
if ($site_node) {
$current = $this->xpath->query( "current_rank", $site_node )->item( 0 );
$exact = Scraper::parseRankFromDescription( $current->textContent );
$best = $this->xpath->query( "best_rank", $site_node )->item( 0 );
$worst = $this->xpath->query( "worst_rank", $site_node )->item( 0 );
return array('current' => $current,
'exact' => $exact,
'best' => $best,
'worst' => $worst);
}
return array();
}
private function updateDomainRank( $site, $sales_rank ) {
$this->updateNode( $this->xpath->query( "current_rank", $site )->item( 0 ), $sales_rank );
$ranks = $this->queryRankNodesFromXML( $site );
if (!empty( $ranks )) {
if ($ranks[ 'exact' ] != -1) {
if ($ranks[ 'exact' ] < intval( $ranks[ 'best' ]->textContent )) {
$this->updateNode( $ranks[ 'best' ], $ranks[ 'exact' ] );
}
if ($ranks[ 'exact' ] > intval( $ranks[ 'worst' ]->textContent )) {
$this->updateNode( $ranks[ 'worst' ], $ranks[ 'exact' ] );
}
}
}
return $ranks;
}
public function updateAndWriteFile() {
try {
$this->updateNode( $this->lastchecktime_node, time() );
$this->updateNode( $this->title_node, (htmlentities( $this->title ) ) );
$this->updateNode( $this->image_node, (htmlentities( $this->image ) ) );
file_put_contents( $this->filename, utf8_encode( $this->dom->saveXML() ) );
} catch (Exception $e) {
var_dump( $e );
}
}
public function getLastCheckTime() {
return $this->lastchecktime;
}
public function getTitle() {
return $this->title;
}
public function getImage() {
return $this->image;
}
public function setTitle( $title ) {
$this->title = $title;
}
public function setImage( $image ) {
$this->image = $image;
}
}
?>
Our view.php file
<?php
class View {
private $body;
public function __construct() {
$this->body = "";
}
public function appendToBody( $input ) {
$this->body .= $input;
}
public function productInformationAsHTML( $title, $image ) {
$html = '<div style="float: left; width: 240px; margin-right: 20px;"><h2>'.$title."</h2>";
$html .= $image;
$html .= '</div>';
return $html;
}
public function domainRankAsHTML( $asin, $domain, $ranks, $entities = true ) {
$html = "<div class='domainContainer'>";
if (!empty( $ranks )) {
$html .= "<span class='domainName'>";
$html .= "<a href='".AMAZON_URL_PREFIX.$domain.AMAZON_URL_SUFFIX.$asin."' rel='external'/>";
$html .= $domain."</a>";
$html .= "</span><br/>";
$html .= "<div class='domainRank'>";
if ($entities) {
$html .= htmlentities( $ranks[ 'current' ]->textContent, ENT_QUOTES, 'UTF-8' );
} else {
$html .= $ranks[ 'current' ]->textContent;
}
$html .= "<strong>Best Rank:</strong> ".$ranks[ 'best' ]->textContent."<br/>";
$html .= "<strong>Worst Rank:</strong> ".$ranks[ 'worst' ]->textContent."<br/><br/>";
$html .= "</div>";
}
$html .= "</div>";
return $html;
}
public function display( $id ) {
print $this->body;
}
}
?>
Our style.css file
body{
background: #F7F7F7;
}
.domainContainer{
border-radius: 5px;
border: 1px solid #CCCCCC;
background: #EEEEEE;
padding: 10px 20px;
margin: 10px 0px;
}
.domainName{
background: orange;
padding: 5px 10px;
border-radius: 3px;
font-weight: bold;
}
.domainRank{
margin-top: 10px;
}

All Articles
Comments
TzcgqHToWZtcZUD
gmMueZRcscZoSSjSj
zWrbdrljYlMddSds
CfMjfORlgDJtl
cLWYxcvHYZSIswZ
Add new comment