Pluf Framework

Pluf Framework Git Source Tree


Root/src/Pluf/AB.php

<?php
/* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
# ***** BEGIN LICENSE BLOCK *****
# This file is part of Plume Framework, a simple PHP Application Framework.
# Copyright (C) 2001-2010 Loic d'Anterroches and contributors.
#
# Plume Framework is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Plume Framework is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
# ***** END LICENSE BLOCK ***** */

/**
 * Core  A/B testing component.
 *
 * The two importants methods are `test` and `convert`.
 *
 * For performance reasons, the A/B testing component requires you to
 * setup a cache (APC or Memcached) and use the MongoDB database. The
 * amount of data in the MongoDB should not be that big for most of
 * the websites and as such it is fine if you are using the 32bit
 * version of MongoDB.
 *
 * For the moment the storage is not abstracted to use another database.
 *
 * All the configuration variables for the component start with
 * `pluf_ab_`. You need to add 'Pluf_AB' to your list of middleware.
 *
 */
class Pluf_AB
{
    /**
     * MongoDB database handler.
     */
    public static $db = null;

    /**
     * Returns an alternative for a given test.
     * 
     * The middleware is already storing the uid of the user and makes
     * it available as $request->pabuid.
     *
     * @param $test string Unique name of the test
     * @param $request Pluf_HTTP_Request
     * @param $alts array Alternatives to pick from (array(true,false))
     * @param $weights array Weights for the alternatives (null)
     * @param $desc string Optional description of the test ('')
     * @return mixed One value from $alts
     */
    public static function test($test, &$request, $alts=array(true,false), 
                                $weights=null, $desc='')
    {
        if (Pluf::f('pluf_ab_allow_force', false) and 
            isset($request->GET[$test])) {
            return $alts[$request->GET[$test]];
        }
        $db = self::getDb();
        // Get or set test
        $dtest = $db->tests->findOne(array('_id' => $test), 
                                     array('_id', 'active', 'winner'));
        if ($dtest == null) {
            $dtest = array('_id' => $test,
                           'creation_dtime' => gmdate('Y-m-d H:i:s', 
                                                      $request->time),
                           'desc' => $desc,
                           'alts' => $alts,
                           'exp' => 0,
                           'conv' => 0,
                           'active' => true);
            for ($i=0;$i<count($alts);$i++) {
                $dtest['expalt_'.$i] = 0;
                $dtest['convalt_'.$i] = 0;
            }
            $db->tests->update(array('_id'=>$test), $dtest,
                               array('upsert' => true));
        } elseif (!$dtest['active']) {
            // If test closed with given alternative, returns alternative
            return (isset($dtest['winner'])) ? $alts[$dtest['winner']] : $alts[0];
        }
        if (!isset($request->pabuid)) {
            $request->pabuid = self::getUid($request);
        }
        if ($request->pabuid == 'bot') {
            return $alts[0];
        }
        // If $request->pabuid in test, returns corresponding alternative
        $intest = $db->intest->findOne(array('_id' => $test.'##'.$request->pabuid), 
                                      array('_id', 'alt'));
        if ($intest) {
            return $alts[$intest['alt']];
        }
        // Else find alternative, store and return it
        if ($weights == null) {
            $weights = array_fill(0, count($alts), 1.0/count($alts));
        }
        $alt = self::weightedRand($weights);
        $intest = array('_id' => $test.'##'.$request->pabuid, 
                        'test' => $test,
                        'pabuid' => $request->pabuid, 
                        'first_dtime' => gmdate('Y-m-d H:i:s', 
                                                $request->time),
                        'alt' => $alt);
        $db->intest->update(array('_id' => $test.'##'.$request->pabuid),
                            $intest, array('upsert' => true));
        // Update the counts of the test
        $db->tests->update(array('_id' => $test), 
                           array('$inc' => array('exp' => 1, 
                                                 'expalt_'.$alt => 1)));
        return $alts[$alt];
    }

    /**
     * Mark a test as converted.
     *
     * A user which was not exposed to the test or a bot is not marked
     * as converted as it is not significant.
     *
     * @param $test string Test
     * @param $request Pluf_HTTP_Request
     */
    public static function convert($test, $request)
    {
        if (!isset($request->pabuid) or $request->pabuid == 'bot') {
            return;
        }
        $db = self::getDb();
        $id = $test.'##'.$request->pabuid;
        $intest = $db->intest->findOne(array('_id' => $id), 
                                       array('_id', 'alt'));
        if (!$intest) {
            // Not tested
            return;
        }
        $conv = $db->convert->findOne(array('_id' => $id)); 
        if ($conv) {
            // Already converted
            return;
        }
        $dtest = $db->tests->findOne(array('_id' => $test)); 
        if (!$dtest or !$dtest['active']) {
            return;
        }
        $conv = array(
                      '_id' => $id,
                      'test' => $test,
                      );
        $db->convert->update(array('_id' => $id), $conv, 
                             array('upsert' => true));
        // increment the test counters
        $db->tests->update(array('_id' => $test), 
                           array('$inc' => array('conv' => 1, 
                                                 'convalt_'.$intest['alt'] => 1)));
    }

    /**
     * Register a property set for the user.
     *
     * This allows you to segment your users with these properties.
     *
     * @param $request Pluf_HTTP_Request
     * @param $props array Properties
     */
    public static function register($request, $props) 
    {
        $pabuid = (isset($request->pabuid)) ? 
            $request->pabuid : 
            self::getUid($request);
        if ($pabuid == 'bot') {
            return;
        }
        $request->pabuid = $pabuid;
        $request->pabprops = array_merge($request->pabprops, $props);
    }

    /**
     * Track a funnel.
     *
     * The array of properties can be used to track different A/B
     * testing cases.
     *
     * The list of properties must be the same at all the steps of the
     * funnel, you cannot pass array('gender' => 'M') at step 1 and
     * array('age' => 32) at step 2. You need to pass both of them at
     * all steps.
     *
     * @param $funnel string Name of the funnel
     * @param $step int Step in the funnel, from 1 to n
     * @param $stepname string Readable name for the step
     * @param $request Pluf_HTTP_Request Request object
     * @param $props array Array of properties associated with the funnel (array())
     */
    public static function trackFunnel($funnel, $step, $stepname, $request, $props=array())
    {
        $pabuid = (isset($request->pabuid)) ? 
            $request->pabuid : 
            self::getUid($request);
        if ($pabuid == 'bot') {
            return;
        }
        $request->pabuid = $pabuid;
        $cache = Pluf_Cache::factory();
        $key = 'pluf_ab_funnel_'.crc32($funnel.'#'.$step.'#'.$pabuid);
        if ($cache->get($key, false)) {
            return; // The key is valid 60s not to track 2 steps within 60s
        }
        $cache->set($key, '1', 60);
        $what = array(
                      'f' => $funnel,
                      's' => $step,
                      'sn' => $stepname,
                      't' => (int) gmdate('Ymd', $request->time),
                      'u' => $pabuid,
                      'p' => array_merge($request->pabprops, $props),
                      );
        $db = self::getDb();
        $db->funnellogs->insert($what);
    }

    /**
     * Process the response of a view.
     *
     * If the request has no cookie and the request has a pabuid, set
     * the cookie in the response. 
     *
     * @param Pluf_HTTP_Request The request
     * @param Pluf_HTTP_Response The response
     * @return Pluf_HTTP_Response The response
     */
    function process_response($request, $response)
    {
        if (!isset($request->COOKIE['pabuid']) and isset($request->pabuid)
             and $request->pabuid != 'bot') {
            $response->cookies['pabuid'] = $request->pabuid;
        }
        if (isset($request->pabprops) and count($request->pabprops) 
            and $request->pabuid != 'bot') {
            $response->cookies['pabprops'] = Pluf_Sign::dumps($request->pabprops, null, true);
        }
        return $response;
    }

    /**
     * Process the request.
     *
     * If the request has the A/B test cookie, set $request->pabuid.
     *
     * @param Pluf_HTTP_Request The request
     * @return bool False
     */
    function process_request($request)
    {
        if (isset($request->COOKIE['pabuid']) and
            self::check_uid($request->COOKIE['pabuid'])) {
            $request->pabuid = $request->COOKIE['pabuid'];
        }
        $request->pabprops = array();
        if (isset($request->COOKIE['pabprops'])) {
            try {
                $request->pabprops = Pluf_Sign::loads($request->COOKIE['pabprops']);
            } catch (Exception $e) {
            }
        }
        return false;
    }

    /**
     * Get a MongoDB database handle.
     *
     * It opens only one connection per request and tries to keep a
     * persistent connection between the requests.
     *
     * The configuration keys used are:
     *
     * `pluf_ab_mongo_server`: 'mongodb://localhost:27017'
     * `pluf_ab_mongo_options`: array('connect' => true, 
     *                                'persist' => 'pluf_ab_mongo')
     * `pluf_ab_mongo_db`: 'pluf_ab'
     *
     * If you have a default installation of MongoDB, it should work
     * out of the box. 
     *
     */
    public static function getDb()
    {
       if (self::$db !== null) {
            return self::$db;
        }
        $server = Pluf::f('pluf_ab_mongo_server', 'mongodb://localhost:27017');
        $options = Pluf::f('pluf_ab_mongo_options', 
                           array('connect' => true, 'persist' => 'pluf_ab_mongo'));
        $conn = new Mongo($server, $options); 
        self::$db = $conn->selectDB(Pluf::f('pluf_ab_mongo_db', 'pluf_ab'));
        return self::$db;
    }
    
    /**
     * Get the uid of a given request.
     *
     * @param $request Pluf_HTTP_Request
     */
    public static function getUid($request)
    {
        if (isset($request->COOKIE['pabuid']) and 
            self::check_uid($request->COOKIE['pabuid'])) {
            return $request->COOKIE['pabuid'];
        }
        if (!isset($request->SERVER['HTTP_USER_AGENT']) or
            self::isBot($request->SERVER['HTTP_USER_AGENT'])) {
            return 'bot';
        }
        // Here we need to make an uid, first check if a user with
        // same ip/agent exists and was last seen within the last 1h.
        // We get that from MemcacheDB
        $cache = Pluf_Cache::factory();
        $key = 'pluf_ab_'.crc32($request->remote_addr.'#'.$request->SERVER['HTTP_USER_AGENT']);
        if ($uid=$cache->get($key, null)) {
            $cache->set($key, $uid, 3600);
            return $uid;
        }
        $uid = self::make_uid($request);
        $cache->set($key, $uid, 3600);
        return $uid;
    }

    /**
     * Check if a given user agent is a bot.
     *
     * @param $user_agent string User agent string
     * @return bool True if the user agent is a bot
     */
    public static function isBot($user_agent)
    {
        static $bots = array('robot', 'checker', 'crawl', 'discovery', 
                             'hunter', 'scanner', 'spider', 'sucker', 'larbin',
                             'slurp', 'libwww', 'lwp', 'yandex', 'netcraft',
                             'wget', 'twiceler');
        static $pbots = array('/bot[\s_+:,\.\;\/\\\-]/i', 
                              '/[\s_+:,\.\;\/\\\-]bot/i');
        foreach ($bots as $r) {
            if (false !== stristr($user_agent, $r)) {
                return true;
            }
        }
        foreach ($pbots as $p) {
            if (preg_match($p, $user_agent)) {
                return true;
            }
        }
        if (false === strpos($user_agent, '(')) {
            return true;
        }
        return false;
    }

    /**
     * Returns a random weighted alternative.
     *
     * Given a series of weighted alternative in the format:
     * 
     * <pre>
     * array('alt1' => 0.2,
     *       'alt2' => 0.3,
     *       'alt3' => 0.5);
     * </pre>
     *
     * Returns the key of the selected alternative. In the following
     * example, the alternative 3 (alt3) has a 50% chance to be
     * selected, if the selected the results would be 'alt3'.

     * @link: http://20bits.com/downloads/w_rand.phps
     *
     * @param $weights array Weighted alternatives
     * @return mixed Key of the selected $weights array
     */
    public static function weightedRand($weights) 
    {
        $r = mt_rand(1,10000);
        $offset = 0;
        foreach ($weights as $k => $w) {
            $offset += $w*10000;
            if ($r <= $offset) {
                return $k;
            }
        }
    }

    /**
     * Given a request, make a corresponding A/B test UID.
     *
     * The UID is based on the time, the remote address, a random
     * component and is hashed to ensure the integrity and avoid the
     * need of a database hit when controlled.
     *
     * @param $request Pluf_HTTP_Request
     * @return string UID
     */
    public static function make_uid($request)
    {
        $base = sprintf('%08X%08X%08X', $request->time, 
                        sprintf('%u', crc32($request->remote_addr)), 
                        rand());
        return sprintf('%s%08X', $base, sprintf('%u', crc32($base.md5(Pluf::f('secret_key')))));
    }

    /**
     * Validate the uid in the cookie. 
     *
     * @see self::make_uid
     *
     * @param $uid string The UID
     * @return bool True if the UID is valid
     */
    public static function check_uid($uid)
    {
        if (strlen($uid) != 32) {
            return false;
        }
        $check = sprintf('%08X', sprintf('%u', crc32(substr($uid, 0, 24).md5(Pluf::f('secret_key')))));
        return  ($check == substr($uid, -8));
    }

    /* ------------------------------------------------------------
     *
     *                  Statistics Functions
     *
     * Note: I am not a statistician, use at your own risk!
     *
     * ------------------------------------------------------------ */

    /**
     * Given a conversion rate calculate the recommended sample sizes.
     *
     * The sample sizes is calculated to be significant at 95% in the
     * case of a variation of conversion with respect to the other
     * alternative of 25%, 15% and 5%.
     *
     * @param $conv Conversion rate ]0.0;1.0]
     * @return array The 3 sample sizes for 25%, 15% and 5%
     */
    public static function ssize($conv)
    {
        $a = 3.84145882689; // $a = pow(inverse_ncdf(1-(1-0.95)/2),2)
        $res = array();
        $bs = array(0.0625, 0.0225, 0.0025);
        foreach ($bs as $b) {
            $res[] = (int) ((1-$conv)*$a/($b*$conv));
        }
        return $res;
    }


    /**
     * Given a test, returns the corresponding stats.
     *
     * @param $test array Test definition and results
     * @return array Statistics for the test
     */
    public static function getTestStats($test)
    {
        $stats = array(); // Will store the stats
        $n = count($test['alts']);
        $aconvr = array(); // All the conversion rates to sort the alternatives
        for ($i=0;$i<$n;$i++) {
            $conv = (isset($test['convalt_'.$i])) ? $test['convalt_'.$i] : 0;
            $exp = (isset($test['expalt_'.$i])) ? $test['expalt_'.$i] : 0;
            $convr = self::cr(array($exp, $conv));
            $nconvr =  ($convr !== null) ?
                sprintf('%01.2f%%', $convr*100.0) : 'N/A';
            $ssize = ($convr !== null and $convr > 0) ?
                self::ssize($convr) : array();
            $stats[] = array('alt' => $i,
                             'convr' => $convr,
                             'conv' => $conv,
                             'exp' => $exp,
                             'nconvr' => $nconvr,
                             'ssize' => $ssize);
            $aconvr[] = ($convr === null) ? 0 : $convr;
        }
        array_multisort($aconvr, SORT_DESC, $stats);
        // We want the best to be significantly better than the second best.
        for ($i=0;$i<$n;$i++) {
            $convr = $stats[$i]['convr'];
            $exp = $stats[$i]['exp'];
            $conv = $stats[$i]['conv'];
            $comp = false;
            $zscore = false;
            $conf = false;
            $better = false;
            if ($i != 1 and $stats[1]['convr'] > 0) {
                // Compare with base case and get confidence/Z-score
                $comp = 100.0 * (float) ($convr - $stats[1]['convr'])/ (float) ($stats[1]['convr']);
                if ($comp > 0) $better = true;
                $comp = sprintf('%01.2f%%', $comp);                
                $zscore = self::zscore(array($stats[1]['exp'], $stats[1]['conv']), 
                                       array($exp, $conv));
                $conf = sprintf('%01.2f%%', self::cumnormdist($zscore)*100.0);
                $zscore = sprintf('%01.2f', $zscore);
            }
            $stats[$i]['comp'] = $comp;
            $stats[$i]['zscore'] = $zscore;
            $stats[$i]['conf'] = $conf;
            $stats[$i]['better'] = $better;
        }
        return $stats;
    }

    public static function cr($t) 
    { 
        if ($t[1] < 0) return null;
        if ($t[0] <= 0) return null;
        return $t[1]/$t[0]; 
    }

    public static function zscore($c, $t) 
    {
        $z = self::cr($t)-self::cr($c);
        $s = (self::cr($t)*(1-self::cr($t)))/$t[0] 
            + (self::cr($c)*(1-self::cr($c)))/$c[0];
        return $z/sqrt($s);
    }

    /**
     * Approximation of the cumulative normal distribution.
     */
    public static function cumnormdist($x)
    {
        $b1 =  0.319381530;
        $b2 = -0.356563782;
        $b3 =  1.781477937;
        $b4 = -1.821255978;
        $b5 =  1.330274429;
        $p  =  0.2316419;
        $c  =  0.39894228;

        if($x >= 0.0) {
            $t = 1.0 / ( 1.0 + $p * $x );
            return (1.0 - $c * exp( -$x * $x / 2.0 ) * $t *
                    ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
        } else {
            $t = 1.0 / ( 1.0 - $p * $x );
            return ( $c * exp( -$x * $x / 2.0 ) * $t *
                     ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
        }
    }
}

Archive Download this file

Branches

Tags

Number of commits:
Page rendered in 0.10035s using 11 queries.