Wednesday, February 29, 2012

codeigniter session memcache anti Bots and timestamped cache

Here you can see three different hacks that we can use in CI to make our life easier. However all these are based on my local box implementation and still these patches need to push live.

1. Memcache session handling available with CodeIgniter, So first thing to do was to filter bots that frequently visit my site and deny them sessions, instead a cookie will do them just fine.



function __detectVisit() {
       $this->CI->load->library('user_agent');
       $agent = strtolower($this->CI->input->user_agent());

       $bot_strings = array(
           "google", "bot", "yahoo", "spider", "archiver", "curl",
           "python", "nambu", "twitt", "perl", "sphere", "PEAR",
           "java", "wordpress", "radian", "crawl", "yandex", "eventbox",
           "monitor", "mechanize", "facebookexternal", "bingbot"
       );

       foreach($bot_strings as $bot) {
               if(strpos($agent, $bot) !== false) {
                       return "bot";
               }
       }

       return "normal";
}

The next step was to build namespaces adjusted with some of CodeIgniter’s built in Session handling mechanisms.


function __build_namespace($sess_id, $ip_addr = 0, $user_agent = '') {
$this->namespace .= $sess_id;
if($this->sess_match_ip == TRUE && $ip_addr > 0)
$this->namespace .= '#'.ip2long($ip_addr);
if($this->sess_match_useragent == TRUE && $user_agent != '')
$this->namespace .= '#'.md5($user_agent);
}


2. Second hack is bypass CI to server HTML cached pages. Here is code and its very easy to understand. 



<?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');


class MY_Output extends CI_Output {


function _display($output = '') {
$elapsed = $BM->elapsed_time('total_execution_time_start', 'total_execution_time_end');
$output = str_replace('{elapsed_time}', $elapsed, $output);


$memory = ( ! function_exists('memory_get_usage')) ? '0' : round(memory_get_usage()/1024/1024, 2).'MB';
$output = str_replace('{memory_usage}', $memory, $output);


// Grab the super object.  We'll need it in a moment...
$CI =& get_instance();


// TRAP OUTPUT
if(!empty($output) && ! $CI->access->logged_in() && class_exists('Memcache')) {
$output .= '<!-- DYNAMICALLY GENERATED -->';
$ns = "outputcachememc#".$_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI'];
$m = new Memcache;
$m->addServer('127.0.0.1', '11211', 1);
$m->set($ns, $output, MEMCACHE_COMPRESSED, 60*5);


$output = str_replace('{elapsed_time_cc}', $elapsed, $output);
}
$output = str_replace('{elapsed_time_cc}', $elapsed, $output);


if (method_exists($CI, '_output')) {
$CI->_output($output);
} else {
echo $output;
}


log_message('debug', "Final output sent to browser");
log_message('debug', "Total execution time: ".$elapsed);
}


}


/* End of file Output.php */
/* Location: ./system/libraries/Output.php */


3.  Third and last hack is about get API that pull content from memcached.



public function get($key)
    {
        //How long do we allow for requests to hold for a cache miss. Just in case
        //a request thread dies because mysql went away or something
        $maxWait = 10;
        //How many times should a request poll for the key. CPU/response time tradeoff.
        $iterations = 50;

        $sleeptime = ($maxWait / $iterations) * 1000000;
        $v = $this->_cache->get($key);
        if (!$this->isSuccess()) {
            $lockKey = "::lock::$key";
            //Of all the requests slipping through varnish, only one may pass from here ...
            if (!$this->_cache->add($lockKey,  1, $maxWait)) {
                $i = 0;
                //... The rest will block here, polling until the key is removed ...
                while(true) {
                    usleep($sleeptime);
                    $i++;
                    if (!$this->_cache->get($lockKey) || $i == $iterations) {
                        break;
                    }
                }
                // ... then hopefully get a cache hit here. Technically, one could change
                //this to $this->get($key), which would recursively repeat the process.
                //In my case, I considered this unsafe as this should never happen except
                //if an E_FATAL_ERROR occurred or something similar.
                $v = $this->_cache->get($key);
            }
        }
        return $v;
    }


Let me know if you guys have any query or concern.


Happy Coding !!!



No comments:

Post a Comment