| <?php␊ |
| /* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */␊ |
| /*␊ |
| # ***** BEGIN LICENSE BLOCK *****␊ |
| # This file is part of Plume Framework, a simple PHP Application Framework.␊ |
| # Copyright (C) 2001-2010 Loic d'Anterroches and contributors.␊ |
| #␊ |
| # Plume Framework is free software; you can redistribute it and/or modify␊ |
| # it under the terms of the GNU Lesser General Public License as published by␊ |
| # the Free Software Foundation; either version 2.1 of the License, or␊ |
| # (at your option) any later version.␊ |
| #␊ |
| # Plume Framework is distributed in the hope that it will be useful,␊ |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
| # GNU Lesser General Public License for more details.␊ |
| #␊ |
| # You should have received a copy of the GNU Lesser General Public License␊ |
| # along with this program; if not, write to the Free Software␊ |
| # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA␊ |
| #␊ |
| # ***** END LICENSE BLOCK ***** */␊ |
| ␊ |
| /**␊ |
| * Core A/B testing component.␊ |
| *␊ |
| * The two importants methods are `test` and `convert`.␊ |
| *␊ |
| * For performance reasons, the A/B testing component requires you to␊ |
| * setup a cache (APC or Memcached) and use the MongoDB database. The␊ |
| * amount of data in the MongoDB should not be that big for most of␊ |
| * the websites and as such it is fine if you are using the 32bit␊ |
| * version of MongoDB.␊ |
| *␊ |
| * For the moment the storage is not abstracted to use another database.␊ |
| *␊ |
| * All the configuration variables for the component start with␊ |
| * `pluf_ab_`. You need to add 'Pluf_AB' to your list of middleware.␊ |
| *␊ |
| */␊ |
| class Pluf_AB␊ |
| {␊ |
| /**␊ |
| * MongoDB database handler.␊ |
| */␊ |
| public static $db = null;␊ |
| ␊ |
| /**␊ |
| * Returns an alternative for a given test.␊ |
| * ␊ |
| * The middleware is already storing the uid of the user and makes␊ |
| * it available as $request->pabuid.␊ |
| *␊ |
| * @param $test string Unique name of the test␊ |
| * @param $request Pluf_HTTP_Request␊ |
| * @param $alts array Alternatives to pick from (array(true,false))␊ |
| * @param $weights array Weights for the alternatives (null)␊ |
| * @param $desc string Optional description of the test ('')␊ |
| * @return mixed One value from $alts␊ |
| */␊ |
| public static function test($test, &$request, $alts=array(true,false), ␊ |
| $weights=null, $desc='')␊ |
| {␊ |
| if (Pluf::f('pluf_ab_allow_force', false) and ␊ |
| isset($request->GET[$test])) {␊ |
| return $alts[$request->GET[$test]];␊ |
| }␊ |
| $db = self::getDb();␊ |
| // Get or set test␊ |
| $dtest = $db->tests->findOne(array('_id' => $test), ␊ |
| array('_id', 'active', 'winner'));␊ |
| if ($dtest == null) {␊ |
| $dtest = array('_id' => $test,␊ |
| 'creation_dtime' => gmdate('Y-m-d H:i:s', ␊ |
| $request->time),␊ |
| 'desc' => $desc,␊ |
| 'alts' => $alts,␊ |
| 'exp' => 0,␊ |
| 'conv' => 0,␊ |
| 'active' => true);␊ |
| for ($i=0;$i<count($alts);$i++) {␊ |
| $dtest['expalt_'.$i] = 0;␊ |
| $dtest['convalt_'.$i] = 0;␊ |
| }␊ |
| $db->tests->update(array('_id'=>$test), $dtest,␊ |
| array('upsert' => true));␊ |
| } elseif (!$dtest['active']) {␊ |
| // If test closed with given alternative, returns alternative␊ |
| return (isset($dtest['winner'])) ? $alts[$dtest['winner']] : $alts[0];␊ |
| }␊ |
| if (!isset($request->pabuid)) {␊ |
| $request->pabuid = self::getUid($request);␊ |
| }␊ |
| if ($request->pabuid == 'bot') {␊ |
| return $alts[0];␊ |
| }␊ |
| // If $request->pabuid in test, returns corresponding alternative␊ |
| $intest = $db->intest->findOne(array('_id' => $test.'##'.$request->pabuid), ␊ |
| array('_id', 'alt'));␊ |
| if ($intest) {␊ |
| return $alts[$intest['alt']];␊ |
| }␊ |
| // Else find alternative, store and return it␊ |
| if ($weights == null) {␊ |
| $weights = array_fill(0, count($alts), 1.0/count($alts));␊ |
| }␊ |
| $alt = self::weightedRand($weights);␊ |
| $intest = array('_id' => $test.'##'.$request->pabuid, ␊ |
| 'test' => $test,␊ |
| 'pabuid' => $request->pabuid, ␊ |
| 'first_dtime' => gmdate('Y-m-d H:i:s', ␊ |
| $request->time),␊ |
| 'alt' => $alt);␊ |
| $db->intest->update(array('_id' => $test.'##'.$request->pabuid),␊ |
| $intest, array('upsert' => true));␊ |
| // Update the counts of the test␊ |
| $db->tests->update(array('_id' => $test), ␊ |
| array('$inc' => array('exp' => 1, ␊ |
| 'expalt_'.$alt => 1)));␊ |
| return $alts[$alt];␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Mark a test as converted.␊ |
| *␊ |
| * A user which was not exposed to the test or a bot is not marked␊ |
| * as converted as it is not significant.␊ |
| *␊ |
| * @param $test string Test␊ |
| * @param $request Pluf_HTTP_Request␊ |
| */␊ |
| public static function convert($test, $request)␊ |
| {␊ |
| if (!isset($request->pabuid) or $request->pabuid == 'bot') {␊ |
| return;␊ |
| }␊ |
| $db = self::getDb();␊ |
| $id = $test.'##'.$request->pabuid;␊ |
| $intest = $db->intest->findOne(array('_id' => $id), ␊ |
| array('_id', 'alt'));␊ |
| if (!$intest) {␊ |
| // Not tested␊ |
| return;␊ |
| }␊ |
| $conv = $db->convert->findOne(array('_id' => $id)); ␊ |
| if ($conv) {␊ |
| // Already converted␊ |
| return;␊ |
| }␊ |
| $dtest = $db->tests->findOne(array('_id' => $test)); ␊ |
| if (!$dtest or !$dtest['active']) {␊ |
| return;␊ |
| }␊ |
| $conv = array(␊ |
| '_id' => $id,␊ |
| 'test' => $test,␊ |
| );␊ |
| $db->convert->update(array('_id' => $id), $conv, ␊ |
| array('upsert' => true));␊ |
| // increment the test counters␊ |
| $db->tests->update(array('_id' => $test), ␊ |
| array('$inc' => array('conv' => 1, ␊ |
| 'convalt_'.$intest['alt'] => 1)));␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Process the response of a view.␊ |
| *␊ |
| * If the request has no cookie and the request has a pabuid, set␊ |
| * the cookie in the response. ␊ |
| *␊ |
| * @param Pluf_HTTP_Request The request␊ |
| * @param Pluf_HTTP_Response The response␊ |
| * @return Pluf_HTTP_Response The response␊ |
| */␊ |
| function process_response($request, $response)␊ |
| {␊ |
| if (!isset($request->COOKIE['pabuid']) and isset($request->pabuid)␊ |
| and $request->pabuid != 'bot') {␊ |
| $response->cookies['pabuid'] = $request->pabuid;␊ |
| }␊ |
| return $response;␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Process the request.␊ |
| *␊ |
| * If the request has the A/B test cookie, set $request->pabuid.␊ |
| *␊ |
| * @param Pluf_HTTP_Request The request␊ |
| * @return bool False␊ |
| */␊ |
| function process_request($request)␊ |
| {␊ |
| if (isset($request->COOKIE['pabuid']) and␊ |
| self::check_uid($request->COOKIE['pabuid'])) {␊ |
| $request->pabuid = $request->COOKIE['pabuid'];␊ |
| }␊ |
| return false;␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Get a MongoDB database handle.␊ |
| *␊ |
| * It opens only one connection per request and tries to keep a␊ |
| * persistent connection between the requests.␊ |
| *␊ |
| * The configuration keys used are:␊ |
| *␊ |
| * `pluf_ab_mongo_server`: 'mongodb://localhost:27017'␊ |
| * `pluf_ab_mongo_options`: array('connect' => true, ␊ |
| * 'persist' => 'pluf_ab_mongo')␊ |
| * `pluf_ab_mongo_db`: 'pluf_ab'␊ |
| *␊ |
| * If you have a default installation of MongoDB, it should work␊ |
| * out of the box. ␊ |
| *␊ |
| */␊ |
| public static function getDb()␊ |
| {␊ |
| if (self::$db !== null) {␊ |
| return self::$db;␊ |
| }␊ |
| $server = Pluf::f('pluf_ab_mongo_server', 'mongodb://localhost:27017');␊ |
| $options = Pluf::f('pluf_ab_mongo_options', ␊ |
| array('connect' => true, 'persist' => 'pluf_ab_mongo'));␊ |
| $conn = new Mongo($server, $options); ␊ |
| self::$db = $conn->selectDB(Pluf::f('pluf_ab_mongo_db', 'pluf_ab'));␊ |
| return self::$db;␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Get the uid of a given request.␊ |
| *␊ |
| * @param $request Pluf_HTTP_Request␊ |
| */␊ |
| public static function getUid($request)␊ |
| {␊ |
| if (isset($request->COOKIE['pabuid']) and ␊ |
| self::check_uid($request->COOKIE['pabuid'])) {␊ |
| return $request->COOKIE['pabuid'];␊ |
| }␊ |
| if (!isset($request->SERVER['HTTP_USER_AGENT']) or␊ |
| self::isBot($request->SERVER['HTTP_USER_AGENT'])) {␊ |
| return 'bot';␊ |
| }␊ |
| // Here we need to make an uid, first check if a user with␊ |
| // same ip/agent exists and was last seen within the last 1h.␊ |
| // We get that from MemcacheDB␊ |
| $cache = Pluf_Cache::factory();␊ |
| $key = 'pluf_ab_'.crc32($request->remote_addr.'#'.$request->SERVER['HTTP_USER_AGENT']);␊ |
| if ($uid=$cache->get($key, null)) {␊ |
| $cache->set($key, $uid, 3600);␊ |
| return $uid;␊ |
| }␊ |
| $uid = self::make_uid($request);␊ |
| $cache->set($key, $uid, 3600);␊ |
| return $uid;␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Check if a given user agent is a bot.␊ |
| *␊ |
| * @param $user_agent string User agent string␊ |
| * @return bool True if the user agent is a bot␊ |
| */␊ |
| public static function isBot($user_agent)␊ |
| {␊ |
| static $bots = array('robot', 'checker', 'crawl', 'discovery', ␊ |
| 'hunter', 'scanner', 'spider', 'sucker', 'larbin',␊ |
| 'slurp', 'libwww', 'lwp', 'yandex', 'netcraft',␊ |
| 'wget', 'twiceler');␊ |
| static $pbots = array('/bot[\s_+:,\.\;\/\\\-]/i', ␊ |
| '/[\s_+:,\.\;\/\\\-]bot/i');␊ |
| foreach ($bots as $r) {␊ |
| if (false !== stristr($user_agent, $r)) {␊ |
| return true;␊ |
| }␊ |
| }␊ |
| foreach ($pbots as $p) {␊ |
| if (preg_match($p, $user_agent)) {␊ |
| return true;␊ |
| }␊ |
| }␊ |
| if (false === strpos($user_agent, '(')) {␊ |
| return true;␊ |
| }␊ |
| return false;␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Returns a random weighted alternative.␊ |
| *␊ |
| * Given a series of weighted alternative in the format:␊ |
| * ␊ |
| * <pre>␊ |
| * array('alt1' => 0.2,␊ |
| * 'alt2' => 0.3,␊ |
| * 'alt3' => 0.5);␊ |
| * </pre>␊ |
| *␊ |
| * Returns the key of the selected alternative. In the following␊ |
| * example, the alternative 3 (alt3) has a 50% chance to be␊ |
| * selected, if the selected the results would be 'alt3'.␊ |
| ␊ |
| * @link: http://20bits.com/downloads/w_rand.phps␊ |
| *␊ |
| * @param $weights array Weighted alternatives␊ |
| * @return mixed Key of the selected $weights array␊ |
| */␊ |
| public static function weightedRand($weights) ␊ |
| {␊ |
| $r = mt_rand(1,10000);␊ |
| $offset = 0;␊ |
| foreach ($weights as $k => $w) {␊ |
| $offset += $w*10000;␊ |
| if ($r <= $offset) {␊ |
| return $k;␊ |
| }␊ |
| }␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Given a request, make a corresponding A/B test UID.␊ |
| *␊ |
| * The UID is based on the time, the remote address, a random␊ |
| * component and is hashed to ensure the integrity and avoid the␊ |
| * need of a database hit when controlled.␊ |
| *␊ |
| * @param $request Pluf_HTTP_Request␊ |
| * @return string UID␊ |
| */␊ |
| public static function make_uid($request)␊ |
| {␊ |
| $base = sprintf('%08X%08X%08X', $request->time, ␊ |
| sprintf('%u', crc32($request->remote_addr)), ␊ |
| rand());␊ |
| return sprintf('%s%08X', $base, sprintf('%u', crc32($base.md5(Pluf::f('secret_key')))));␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Validate the uid in the cookie. ␊ |
| *␊ |
| * @see self::make_uid␊ |
| *␊ |
| * @param $uid string The UID␊ |
| * @return bool True if the UID is valid␊ |
| */␊ |
| public static function check_uid($uid)␊ |
| {␊ |
| if (strlen($uid) != 32) {␊ |
| return false;␊ |
| }␊ |
| $check = sprintf('%08X', sprintf('%u', crc32(substr($uid, 0, 24).md5(Pluf::f('secret_key')))));␊ |
| return ($check == substr($uid, -8));␊ |
| }␊ |
| ␊ |
| /* ------------------------------------------------------------␊ |
| *␊ |
| * Statistics Functions␊ |
| *␊ |
| * Note: I am not a statistician, use at your own risk!␊ |
| *␊ |
| * ------------------------------------------------------------ */␊ |
| ␊ |
| /**␊ |
| * Given a conversion rate calculate the recommended sample sizes.␊ |
| *␊ |
| * The sample sizes is calculated to be significant at 95% in the␊ |
| * case of a variation of conversion with respect to the other␊ |
| * alternative of 25%, 15% and 5%.␊ |
| *␊ |
| * @param $conv Conversion rate ]0.0;1.0]␊ |
| * @return array The 3 sample sizes for 25%, 15% and 5%␊ |
| */␊ |
| public static function ssize($conv)␊ |
| {␊ |
| $a = 3.84145882689; // $a = pow(inverse_ncdf(1-(1-0.95)/2),2)␊ |
| $res = array();␊ |
| $bs = array(0.0625, 0.0225, 0.0025);␊ |
| foreach ($bs as $b) {␊ |
| $res[] = (int) ((1-$conv)*$a/($b*$conv));␊ |
| }␊ |
| return $res;␊ |
| }␊ |
| ␊ |
| ␊ |
| /**␊ |
| * Given a test, returns the corresponding stats.␊ |
| *␊ |
| * @param $test array Test definition and results␊ |
| * @return array Statistics for the test␊ |
| */␊ |
| public static function getTestStats($test)␊ |
| {␊ |
| $stats = array(); // Will store the stats␊ |
| $n = count($test['alts']);␊ |
| $aconvr = array(); // All the conversion rates to sort the alternatives␊ |
| for ($i=0;$i<$n;$i++) {␊ |
| $conv = (isset($test['convalt_'.$i])) ? $test['convalt_'.$i] : 0;␊ |
| $exp = (isset($test['expalt_'.$i])) ? $test['expalt_'.$i] : 0;␊ |
| $convr = self::cr(array($exp, $conv));␊ |
| $nconvr = ($convr !== null) ?␊ |
| sprintf('%01.2f%%', $convr*100.0) : 'N/A';␊ |
| $ssize = ($convr !== null and $convr > 0) ?␊ |
| self::ssize($convr) : array();␊ |
| $stats[] = array('alt' => $i,␊ |
| 'convr' => $convr,␊ |
| 'conv' => $conv,␊ |
| 'exp' => $exp,␊ |
| 'nconvr' => $nconvr,␊ |
| 'ssize' => $ssize);␊ |
| $aconvr[] = ($convr === null) ? 0 : $convr;␊ |
| }␊ |
| array_multisort($aconvr, SORT_DESC, $stats);␊ |
| // We want the best to be significantly better than the second best.␊ |
| for ($i=0;$i<$n;$i++) {␊ |
| $convr = $stats[$i]['convr'];␊ |
| $exp = $stats[$i]['exp'];␊ |
| $conv = $stats[$i]['conv'];␊ |
| $comp = false;␊ |
| $zscore = false;␊ |
| $conf = false;␊ |
| $better = false;␊ |
| if ($i != 1 and $stats[1]['convr'] > 0) {␊ |
| // Compare with base case and get confidence/Z-score␊ |
| $comp = 100.0 * (float) ($convr - $stats[1]['convr'])/ (float) ($stats[1]['convr']);␊ |
| if ($comp > 0) $better = true;␊ |
| $comp = sprintf('%01.2f%%', $comp); ␊ |
| $zscore = self::zscore(array($stats[1]['exp'], $stats[1]['conv']), ␊ |
| array($exp, $conv));␊ |
| $conf = sprintf('%01.2f%%', self::cumnormdist($zscore)*100.0);␊ |
| $zscore = sprintf('%01.2f', $zscore);␊ |
| }␊ |
| $stats[$i]['comp'] = $comp;␊ |
| $stats[$i]['zscore'] = $zscore;␊ |
| $stats[$i]['conf'] = $conf;␊ |
| $stats[$i]['better'] = $better;␊ |
| }␊ |
| return $stats;␊ |
| }␊ |
| ␊ |
| public static function cr($t) ␊ |
| { ␊ |
| if ($t[1] < 0) return null;␊ |
| if ($t[0] <= 0) return null;␊ |
| return $t[1]/$t[0]; ␊ |
| }␊ |
| ␊ |
| public static function zscore($c, $t) ␊ |
| {␊ |
| $z = self::cr($t)-self::cr($c);␊ |
| $s = (self::cr($t)*(1-self::cr($t)))/$t[0] ␊ |
| + (self::cr($c)*(1-self::cr($c)))/$c[0];␊ |
| return $z/sqrt($s);␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Approximation of the cumulative normal distribution.␊ |
| */␊ |
| public static function cumnormdist($x)␊ |
| {␊ |
| $b1 = 0.319381530;␊ |
| $b2 = -0.356563782;␊ |
| $b3 = 1.781477937;␊ |
| $b4 = -1.821255978;␊ |
| $b5 = 1.330274429;␊ |
| $p = 0.2316419;␊ |
| $c = 0.39894228;␊ |
| ␊ |
| if($x >= 0.0) {␊ |
| $t = 1.0 / ( 1.0 + $p * $x );␊ |
| return (1.0 - $c * exp( -$x * $x / 2.0 ) * $t *␊ |
| ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));␊ |
| } else {␊ |
| $t = 1.0 / ( 1.0 - $p * $x );␊ |
| return ( $c * exp( -$x * $x / 2.0 ) * $t *␊ |
| ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));␊ |
| }␊ |
| }␊ |
| }␊ |