<?php␊ |
/* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */␊ |
/*␊ |
# ***** BEGIN LICENSE BLOCK *****␊ |
# This file is part of Plume Framework, a simple PHP Application Framework.␊ |
# Copyright (C) 2001-2010 Loic d'Anterroches and contributors.␊ |
#␊ |
# Plume Framework is free software; you can redistribute it and/or modify␊ |
# it under the terms of the GNU Lesser General Public License as published by␊ |
# the Free Software Foundation; either version 2.1 of the License, or␊ |
# (at your option) any later version.␊ |
#␊ |
# Plume Framework is distributed in the hope that it will be useful,␊ |
# but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
# GNU Lesser General Public License for more details.␊ |
#␊ |
# You should have received a copy of the GNU Lesser General Public License␊ |
# along with this program; if not, write to the Free Software␊ |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA␊ |
#␊ |
# ***** END LICENSE BLOCK ***** */␊ |
␊ |
/**␊ |
* Core A/B testing component.␊ |
*␊ |
* The two importants methods are `test` and `convert`.␊ |
*␊ |
* For performance reasons, the A/B testing component requires you to␊ |
* setup a cache (APC or Memcached) and use the MongoDB database. The␊ |
* amount of data in the MongoDB should not be that big for most of␊ |
* the websites and as such it is fine if you are using the 32bit␊ |
* version of MongoDB.␊ |
*␊ |
* For the moment the storage is not abstracted to use another database.␊ |
*␊ |
* All the configuration variables for the component start with␊ |
* `pluf_ab_`. You need to add 'Pluf_AB' to your list of middleware.␊ |
*␊ |
*/␊ |
class Pluf_AB␊ |
{␊ |
/**␊ |
* MongoDB database handler.␊ |
*/␊ |
public static $db = null;␊ |
␊ |
/**␊ |
* Returns an alternative for a given test.␊ |
* ␊ |
* The middleware is already storing the uid of the user and makes␊ |
* it available as $request->pabuid.␊ |
*␊ |
* @param $test string Unique name of the test␊ |
* @param $request Pluf_HTTP_Request␊ |
* @param $alts array Alternatives to pick from (array(true,false))␊ |
* @param $weights array Weights for the alternatives (null)␊ |
* @param $desc string Optional description of the test ('')␊ |
* @return mixed One value from $alts␊ |
*/␊ |
public static function test($test, &$request, $alts=array(true,false), ␊ |
$weights=null, $desc='')␊ |
{␊ |
if (Pluf::f('pluf_ab_allow_force', false) and ␊ |
isset($request->GET[$test])) {␊ |
return $alts[$request->GET[$test]];␊ |
}␊ |
$db = self::getDb();␊ |
// Get or set test␊ |
$dtest = $db->tests->findOne(array('_id' => $test), ␊ |
array('_id', 'active', 'winner'));␊ |
if ($dtest == null) {␊ |
$dtest = array('_id' => $test,␊ |
'creation_dtime' => gmdate('Y-m-d H:i:s', ␊ |
$request->time),␊ |
'desc' => $desc,␊ |
'alts' => $alts,␊ |
'exp' => 0,␊ |
'conv' => 0,␊ |
'active' => true);␊ |
for ($i=0;$i<count($alts);$i++) {␊ |
$dtest['expalt_'.$i] = 0;␊ |
$dtest['convalt_'.$i] = 0;␊ |
}␊ |
$db->tests->update(array('_id'=>$test), $dtest,␊ |
array('upsert' => true));␊ |
} elseif (!$dtest['active']) {␊ |
// If test closed with given alternative, returns alternative␊ |
return (isset($dtest['winner'])) ? $alts[$dtest['winner']] : $alts[0];␊ |
}␊ |
if (!isset($request->pabuid)) {␊ |
$request->pabuid = self::getUid($request);␊ |
}␊ |
if ($request->pabuid == 'bot') {␊ |
return $alts[0];␊ |
}␊ |
// If $request->pabuid in test, returns corresponding alternative␊ |
$intest = $db->intest->findOne(array('_id' => $test.'##'.$request->pabuid), ␊ |
array('_id', 'alt'));␊ |
if ($intest) {␊ |
return $alts[$intest['alt']];␊ |
}␊ |
// Else find alternative, store and return it␊ |
if ($weights == null) {␊ |
$weights = array_fill(0, count($alts), 1.0/count($alts));␊ |
}␊ |
$alt = self::weightedRand($weights);␊ |
$intest = array('_id' => $test.'##'.$request->pabuid, ␊ |
'test' => $test,␊ |
'pabuid' => $request->pabuid, ␊ |
'first_dtime' => gmdate('Y-m-d H:i:s', ␊ |
$request->time),␊ |
'alt' => $alt);␊ |
$db->intest->update(array('_id' => $test.'##'.$request->pabuid),␊ |
$intest, array('upsert' => true));␊ |
// Update the counts of the test␊ |
$db->tests->update(array('_id' => $test), ␊ |
array('$inc' => array('exp' => 1, ␊ |
'expalt_'.$alt => 1)));␊ |
return $alts[$alt];␊ |
}␊ |
␊ |
/**␊ |
* Mark a test as converted.␊ |
*␊ |
* A user which was not exposed to the test or a bot is not marked␊ |
* as converted as it is not significant.␊ |
*␊ |
* @param $test string Test␊ |
* @param $request Pluf_HTTP_Request␊ |
*/␊ |
public static function convert($test, $request)␊ |
{␊ |
if (!isset($request->pabuid) or $request->pabuid == 'bot') {␊ |
return;␊ |
}␊ |
$db = self::getDb();␊ |
$id = $test.'##'.$request->pabuid;␊ |
$intest = $db->intest->findOne(array('_id' => $id), ␊ |
array('_id', 'alt'));␊ |
if (!$intest) {␊ |
// Not tested␊ |
return;␊ |
}␊ |
$conv = $db->convert->findOne(array('_id' => $id)); ␊ |
if ($conv) {␊ |
// Already converted␊ |
return;␊ |
}␊ |
$dtest = $db->tests->findOne(array('_id' => $test)); ␊ |
if (!$dtest or !$dtest['active']) {␊ |
return;␊ |
}␊ |
$conv = array(␊ |
'_id' => $id,␊ |
'test' => $test,␊ |
);␊ |
$db->convert->update(array('_id' => $id), $conv, ␊ |
array('upsert' => true));␊ |
// increment the test counters␊ |
$db->tests->update(array('_id' => $test), ␊ |
array('$inc' => array('conv' => 1, ␊ |
'convalt_'.$intest['alt'] => 1)));␊ |
}␊ |
␊ |
/**␊ |
* Process the response of a view.␊ |
*␊ |
* If the request has no cookie and the request has a pabuid, set␊ |
* the cookie in the response. ␊ |
*␊ |
* @param Pluf_HTTP_Request The request␊ |
* @param Pluf_HTTP_Response The response␊ |
* @return Pluf_HTTP_Response The response␊ |
*/␊ |
function process_response($request, $response)␊ |
{␊ |
if (!isset($request->COOKIE['pabuid']) and isset($request->pabuid)␊ |
and $request->pabuid != 'bot') {␊ |
$response->cookies['pabuid'] = $request->pabuid;␊ |
}␊ |
return $response;␊ |
}␊ |
␊ |
/**␊ |
* Process the request.␊ |
*␊ |
* If the request has the A/B test cookie, set $request->pabuid.␊ |
*␊ |
* @param Pluf_HTTP_Request The request␊ |
* @return bool False␊ |
*/␊ |
function process_request($request)␊ |
{␊ |
if (isset($request->COOKIE['pabuid']) and␊ |
self::check_uid($request->COOKIE['pabuid'])) {␊ |
$request->pabuid = $request->COOKIE['pabuid'];␊ |
}␊ |
return false;␊ |
}␊ |
␊ |
/**␊ |
* Get a MongoDB database handle.␊ |
*␊ |
* It opens only one connection per request and tries to keep a␊ |
* persistent connection between the requests.␊ |
*␊ |
* The configuration keys used are:␊ |
*␊ |
* `pluf_ab_mongo_server`: 'mongodb://localhost:27017'␊ |
* `pluf_ab_mongo_options`: array('connect' => true, ␊ |
* 'persist' => 'pluf_ab_mongo')␊ |
* `pluf_ab_mongo_db`: 'pluf_ab'␊ |
*␊ |
* If you have a default installation of MongoDB, it should work␊ |
* out of the box. ␊ |
*␊ |
*/␊ |
public static function getDb()␊ |
{␊ |
if (self::$db !== null) {␊ |
return self::$db;␊ |
}␊ |
$server = Pluf::f('pluf_ab_mongo_server', 'mongodb://localhost:27017');␊ |
$options = Pluf::f('pluf_ab_mongo_options', ␊ |
array('connect' => true, 'persist' => 'pluf_ab_mongo'));␊ |
$conn = new Mongo($server, $options); ␊ |
self::$db = $conn->selectDB(Pluf::f('pluf_ab_mongo_db', 'pluf_ab'));␊ |
return self::$db;␊ |
}␊ |
␊ |
/**␊ |
* Get the uid of a given request.␊ |
*␊ |
* @param $request Pluf_HTTP_Request␊ |
*/␊ |
public static function getUid($request)␊ |
{␊ |
if (isset($request->COOKIE['pabuid']) and ␊ |
self::check_uid($request->COOKIE['pabuid'])) {␊ |
return $request->COOKIE['pabuid'];␊ |
}␊ |
if (!isset($request->SERVER['HTTP_USER_AGENT']) or␊ |
self::isBot($request->SERVER['HTTP_USER_AGENT'])) {␊ |
return 'bot';␊ |
}␊ |
// Here we need to make an uid, first check if a user with␊ |
// same ip/agent exists and was last seen within the last 1h.␊ |
// We get that from MemcacheDB␊ |
$cache = Pluf_Cache::factory();␊ |
$key = 'pluf_ab_'.crc32($request->remote_addr.'#'.$request->SERVER['HTTP_USER_AGENT']);␊ |
if ($uid=$cache->get($key, null)) {␊ |
$cache->set($key, $uid, 3600);␊ |
return $uid;␊ |
}␊ |
$uid = self::make_uid($request);␊ |
$cache->set($key, $uid, 3600);␊ |
return $uid;␊ |
}␊ |
␊ |
/**␊ |
* Check if a given user agent is a bot.␊ |
*␊ |
* @param $user_agent string User agent string␊ |
* @return bool True if the user agent is a bot␊ |
*/␊ |
public static function isBot($user_agent)␊ |
{␊ |
static $bots = array('robot', 'checker', 'crawl', 'discovery', ␊ |
'hunter', 'scanner', 'spider', 'sucker', 'larbin',␊ |
'slurp', 'libwww', 'lwp', 'yandex', 'netcraft',␊ |
'wget', 'twiceler');␊ |
static $pbots = array('/bot[\s_+:,\.\;\/\\\-]/i', ␊ |
'/[\s_+:,\.\;\/\\\-]bot/i');␊ |
foreach ($bots as $r) {␊ |
if (false !== stristr($user_agent, $r)) {␊ |
return true;␊ |
}␊ |
}␊ |
foreach ($pbots as $p) {␊ |
if (preg_match($p, $user_agent)) {␊ |
return true;␊ |
}␊ |
}␊ |
if (false === strpos($user_agent, '(')) {␊ |
return true;␊ |
}␊ |
return false;␊ |
}␊ |
␊ |
/**␊ |
* Returns a random weighted alternative.␊ |
*␊ |
* Given a series of weighted alternative in the format:␊ |
* ␊ |
* <pre>␊ |
* array('alt1' => 0.2,␊ |
* 'alt2' => 0.3,␊ |
* 'alt3' => 0.5);␊ |
* </pre>␊ |
*␊ |
* Returns the key of the selected alternative. In the following␊ |
* example, the alternative 3 (alt3) has a 50% chance to be␊ |
* selected, if the selected the results would be 'alt3'.␊ |
␊ |
* @link: http://20bits.com/downloads/w_rand.phps␊ |
*␊ |
* @param $weights array Weighted alternatives␊ |
* @return mixed Key of the selected $weights array␊ |
*/␊ |
public static function weightedRand($weights) ␊ |
{␊ |
$r = mt_rand(1,10000);␊ |
$offset = 0;␊ |
foreach ($weights as $k => $w) {␊ |
$offset += $w*10000;␊ |
if ($r <= $offset) {␊ |
return $k;␊ |
}␊ |
}␊ |
}␊ |
␊ |
/**␊ |
* Given a request, make a corresponding A/B test UID.␊ |
*␊ |
* The UID is based on the time, the remote address, a random␊ |
* component and is hashed to ensure the integrity and avoid the␊ |
* need of a database hit when controlled.␊ |
*␊ |
* @param $request Pluf_HTTP_Request␊ |
* @return string UID␊ |
*/␊ |
public static function make_uid($request)␊ |
{␊ |
$base = sprintf('%08X%08X%08X', $request->time, ␊ |
sprintf('%u', crc32($request->remote_addr)), ␊ |
rand());␊ |
return sprintf('%s%08X', $base, sprintf('%u', crc32($base.md5(Pluf::f('secret_key')))));␊ |
}␊ |
␊ |
/**␊ |
* Validate the uid in the cookie. ␊ |
*␊ |
* @see self::make_uid␊ |
*␊ |
* @param $uid string The UID␊ |
* @return bool True if the UID is valid␊ |
*/␊ |
public static function check_uid($uid)␊ |
{␊ |
if (strlen($uid) != 32) {␊ |
return false;␊ |
}␊ |
$check = sprintf('%08X', sprintf('%u', crc32(substr($uid, 0, 24).md5(Pluf::f('secret_key')))));␊ |
return ($check == substr($uid, -8));␊ |
}␊ |
␊ |
/* ------------------------------------------------------------␊ |
*␊ |
* Statistics Functions␊ |
*␊ |
* Note: I am not a statistician, use at your own risk!␊ |
*␊ |
* ------------------------------------------------------------ */␊ |
␊ |
/**␊ |
* Given a conversion rate calculate the recommended sample sizes.␊ |
*␊ |
* The sample sizes is calculated to be significant at 95% in the␊ |
* case of a variation of conversion with respect to the other␊ |
* alternative of 25%, 15% and 5%.␊ |
*␊ |
* @param $conv Conversion rate ]0.0;1.0]␊ |
* @return array The 3 sample sizes for 25%, 15% and 5%␊ |
*/␊ |
public static function ssize($conv)␊ |
{␊ |
$a = 3.84145882689; // $a = pow(inverse_ncdf(1-(1-0.95)/2),2)␊ |
$res = array();␊ |
$bs = array(0.0625, 0.0225, 0.0025);␊ |
foreach ($bs as $b) {␊ |
$res[] = (int) ((1-$conv)*$a/($b*$conv));␊ |
}␊ |
return $res;␊ |
}␊ |
␊ |
␊ |
/**␊ |
* Given a test, returns the corresponding stats.␊ |
*␊ |
* @param $test array Test definition and results␊ |
* @return array Statistics for the test␊ |
*/␊ |
public static function getTestStats($test)␊ |
{␊ |
$stats = array(); // Will store the stats␊ |
$n = count($test['alts']);␊ |
$aconvr = array(); // All the conversion rates to sort the alternatives␊ |
for ($i=0;$i<$n;$i++) {␊ |
$conv = (isset($test['convalt_'.$i])) ? $test['convalt_'.$i] : 0;␊ |
$exp = (isset($test['expalt_'.$i])) ? $test['expalt_'.$i] : 0;␊ |
$convr = self::cr(array($exp, $conv));␊ |
$nconvr = ($convr !== null) ?␊ |
sprintf('%01.2f%%', $convr*100.0) : 'N/A';␊ |
$ssize = ($convr !== null and $convr > 0) ?␊ |
self::ssize($convr) : array();␊ |
$stats[] = array('alt' => $i,␊ |
'convr' => $convr,␊ |
'conv' => $conv,␊ |
'exp' => $exp,␊ |
'nconvr' => $nconvr,␊ |
'ssize' => $ssize);␊ |
$aconvr[] = ($convr === null) ? 0 : $convr;␊ |
}␊ |
array_multisort($aconvr, SORT_DESC, $stats);␊ |
// We want the best to be significantly better than the second best.␊ |
for ($i=0;$i<$n;$i++) {␊ |
$convr = $stats[$i]['convr'];␊ |
$exp = $stats[$i]['exp'];␊ |
$conv = $stats[$i]['conv'];␊ |
$comp = false;␊ |
$zscore = false;␊ |
$conf = false;␊ |
$better = false;␊ |
if ($i != 1 and $stats[1]['convr'] > 0) {␊ |
// Compare with base case and get confidence/Z-score␊ |
$comp = 100.0 * (float) ($convr - $stats[1]['convr'])/ (float) ($stats[1]['convr']);␊ |
if ($comp > 0) $better = true;␊ |
$comp = sprintf('%01.2f%%', $comp); ␊ |
$zscore = self::zscore(array($stats[1]['exp'], $stats[1]['conv']), ␊ |
array($exp, $conv));␊ |
$conf = sprintf('%01.2f%%', self::cumnormdist($zscore)*100.0);␊ |
$zscore = sprintf('%01.2f', $zscore);␊ |
}␊ |
$stats[$i]['comp'] = $comp;␊ |
$stats[$i]['zscore'] = $zscore;␊ |
$stats[$i]['conf'] = $conf;␊ |
$stats[$i]['better'] = $better;␊ |
}␊ |
return $stats;␊ |
}␊ |
␊ |
public static function cr($t) ␊ |
{ ␊ |
if ($t[1] < 0) return null;␊ |
if ($t[0] <= 0) return null;␊ |
return $t[1]/$t[0]; ␊ |
}␊ |
␊ |
public static function zscore($c, $t) ␊ |
{␊ |
$z = self::cr($t)-self::cr($c);␊ |
$s = (self::cr($t)*(1-self::cr($t)))/$t[0] ␊ |
+ (self::cr($c)*(1-self::cr($c)))/$c[0];␊ |
return $z/sqrt($s);␊ |
}␊ |
␊ |
/**␊ |
* Approximation of the cumulative normal distribution.␊ |
*/␊ |
public static function cumnormdist($x)␊ |
{␊ |
$b1 = 0.319381530;␊ |
$b2 = -0.356563782;␊ |
$b3 = 1.781477937;␊ |
$b4 = -1.821255978;␊ |
$b5 = 1.330274429;␊ |
$p = 0.2316419;␊ |
$c = 0.39894228;␊ |
␊ |
if($x >= 0.0) {␊ |
$t = 1.0 / ( 1.0 + $p * $x );␊ |
return (1.0 - $c * exp( -$x * $x / 2.0 ) * $t *␊ |
( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));␊ |
} else {␊ |
$t = 1.0 / ( 1.0 - $p * $x );␊ |
return ( $c * exp( -$x * $x / 2.0 ) * $t *␊ |
( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));␊ |
}␊ |
}␊ |
}␊ |