I'm under no illusion that this is at all the best way or even a good way of doing this. It's simply a working solution. Tips, opinions, suggestions, insults, death threats, let's hear it.
<?php
require_once '../include/include.php';
include FUNCTIONS.DS.'functions.php';
$getPage = CurlPage($URL);
$getPage1httpCode = $getPage['httpCode'];
if ($getPage1httpCode != 200)
{
sleep(rand(5, 10));
echo '<h3 style="color:white;">Attempt #2</h3>';
$getPage2 = CurlPage($URL);
$getPage1httpCode2 = $getPage2['httpCode'];
if ($getPage1httpCode2 != 200)
{
sleep(rand(5, 10));
echo '<h3 style="color:white;">Attempt #3</h3>';
$getPage3 = CurlPage($URL);
$getPage1httpCode3 = $getPage3['httpCode'];
if ($getPage1httpCode3 != 200)
{
sleep(rand(5, 10));
echo '<h3 style="color:white;">Attempt #4</h3>';
$getPage4 = CurlPage($URL);
$getPage1httpCode4 = $getPage4['httpCode'];
if ($getPage1httpCode4 != 200)
{
sleep(rand(5, 10));
echo '<h3 style="color:white;">Attempt #5</h3>';
$getPage5 = CurlPage($URL);
$getPage1httpCode5 = $getPage5['httpCode'];
if ($getPage1httpCode5 != 200)
{
sleep(rand(5, 10));
echo '<h3 style="color:white;">Failed.</h3>';
var_dump('Fatal Error');
die();
}
}
}
}
}
if ($getPage == 200 || $getPage1httpCode == 200 || $getPage1httpCode2 == 200 || $getPage1httpCode3 == 200 || $getPage1httpCode4 == 200 || $getPage1httpCode5 == 200) {
if ($getPage == 200) { $HTML = $getPage['Data']; echo '<h3 style="color:green;>httpCode1 Success: 200</h3>'; }
elseif ($getPage1httpCode == 200) { $HTML = $getPage1httpCode['Data']; echo '<h3 style="color:green;>httpCode2 Success: 200</h3>'; }
elseif ($getPage1httpCode2 == 200) { $HTML = $getPage1httpCode2['Data']; echo '<h3 style="color:green;>httpCode3 Success: 200</h3>'; }
elseif ($getPage1httpCode3 == 200) { $HTML = $getPage1httpCode3['Data']; echo '<h3 style="color:green;>httpCode4 Success: 200</h3>'; }
elseif ($getPage1httpCode4 == 200) { $HTML = $getPage1httpCode4['Data']; echo '<h3 style="color:green;>httpCode5 Success: 200</h3>'; }
elseif ($getPage1httpCode5 == 200) { $HTML = $getPage1httpCode5['Data']; echo '<h3 style="color:green;>httpCode6 Success: 200</h3>'; }
else { var_dump('Fatal Error'); die(); }
if (isHTML($HTML)) {
$saveHTMLfileName = ROOT_DIR.DS.'pages'.DS.date('m-d-Y_hia').'.html';
if (!file_exists($saveHTMLfileName)) { file_put_contents($saveHTMLfileName, $HTML); }
}
TruncateCookieFile();
$conn = NULL;
Associated Functions // include FUNCTIONS.DS.'functions.php';
function CurlPage($URL)
{
$Cookie = ROOT_DIR . DS . '_misc' . DS . 'cookie.txt';
if (!file_exists($Cookie))
{
$OpenCookieFile = fopen($Cookie, "w");
fclose($OpenCookieFile);
}
$Host = 'example.com';
$acceptLanguage = 'en-US';
$Curl = curl_init();
curl_setopt($Curl, CURLOPT_URL, $URL);
curl_setopt($Curl, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($Curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($Curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($Curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($Curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($Curl, CURLOPT_ENCODING, 1);
curl_setopt($Curl, CURLOPT_COOKIESESSION, 1);
curl_setopt($Curl, CURLOPT_COOKIEJAR, $Cookie);
curl_setopt($Curl, CURLOPT_COOKIEFILE, $Cookie);
curl_setopt($Curl, CURLOPT_HTTPHEADER, array(
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language:' . $acceptLanguage . ',en;q=0.9',
'Cache-Control: max-age=0',
'Connection: keep-alive',
'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0',
'Host: ' . $Host
));
$Content = curl_exec($Curl);
$httpCode = curl_getinfo($Curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpCode == 200)
{
return array(
'httpCode' => $httpCode,
'Data' => $Content
);
}
elseif ($httpCode != 200)
{
return array(
'httpCode' => 0,
'Data' => 0
);
}
}
function TruncateCookieFile() {
$Cookie = ROOT_DIR . DS . '_misc' . DS . 'cookie.txt';
$openCookieFile = @fopen($Cookie, "r+");
if ($openCookieFile !== false)
{
ftruncate($openCookieFile, 0);
fclose($openCookieFile);
}
}
function isHTML( $str ) { return preg_match( "/\/[a-z]*>/i", $str ) != 0; }
I'm especially turned off by my loop-from-hell HTTP Code 200 Validator. I know I can add a validator on the HTML being returned, matching content-size to file-size. Of course add a more detailed and verbose error logging system. But these are just new features, irrelevant to the question.
I want to know how much my code sucks and how it can be improved.