Root/
<?php /* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* # ***** BEGIN LICENSE BLOCK ***** # This file is part of Plume Framework, a simple PHP Application Framework. # Copyright (C) 2001-2010 Loic d'Anterroches and contributors. # # Plume Framework is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Plume Framework is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # ***** END LICENSE BLOCK ***** */ /** * Core A/B testing component. * * The two importants methods are `test` and `convert`. * * For performance reasons, the A/B testing component requires you to * setup a cache (APC or Memcached) and use the MongoDB database. The * amount of data in the MongoDB should not be that big for most of * the websites and as such it is fine if you are using the 32bit * version of MongoDB. * * For the moment the storage is not abstracted to use another database. * * All the configuration variables for the component start with * `pluf_ab_`. You need to add 'Pluf_AB' to your list of middleware. * */ class Pluf_AB { /** * MongoDB database handler. */ public static $db = null; /** * Returns an alternative for a given test. * * The middleware is already storing the uid of the user and makes * it available as $request->pabuid. * * @param $test string Unique name of the test * @param $request Pluf_HTTP_Request * @param $alts array Alternatives to pick from (array(true,false)) * @param $weights array Weights for the alternatives (null) * @param $desc string Optional description of the test ('') * @return mixed One value from $alts */ public static function test( $test , & $request , $alts = array (true,false), $weights =null, $desc = '' ) { if (Pluf::f( 'pluf_ab_allow_force' , false) and isset( $request ->GET[ $test ])) { return $alts [ $request ->GET[ $test ]]; } $db = self::getDb(); // Get or set test $dtest = $db ->tests->findOne( array ( '_id' => $test ), array ( '_id' , 'active' , 'winner' )); if ( $dtest == null) { $dtest = array ( '_id' => $test , 'creation_dtime' => gmdate ( 'Y-m-d H:i:s' , $request ->time), 'desc' => $desc , 'alts' => $alts , 'exp' => 0, 'conv' => 0, 'active' => true); for ( $i =0; $i < count ( $alts ); $i ++) { $dtest [ 'expalt_' . $i ] = 0; $dtest [ 'convalt_' . $i ] = 0; } $db ->tests->update( array ( '_id' => $test ), $dtest , array ( 'upsert' => true)); } elseif (! $dtest [ 'active' ]) { // If test closed with given alternative, returns alternative return (isset( $dtest [ 'winner' ])) ? $alts [ $dtest [ 'winner' ]] : $alts [0]; } if (!isset( $request ->pabuid)) { $request ->pabuid = self::getUid( $request ); } if ( $request ->pabuid == 'bot' ) { return $alts [0]; } // If $request->pabuid in test, returns corresponding alternative $intest = $db ->intest->findOne( array ( '_id' => $test . '##' . $request ->pabuid), array ( '_id' , 'alt' )); if ( $intest ) { return $alts [ $intest [ 'alt' ]]; } // Else find alternative, store and return it if ( $weights == null) { $weights = array_fill (0, count ( $alts ), 1.0/ count ( $alts )); } $alt = self::weightedRand( $weights ); $intest = array ( '_id' => $test . '##' . $request ->pabuid, 'test' => $test , 'pabuid' => $request ->pabuid, 'first_dtime' => gmdate ( 'Y-m-d H:i:s' , $request ->time), 'alt' => $alt ); $db ->intest->update( array ( '_id' => $test . '##' . $request ->pabuid), $intest , array ( 'upsert' => true)); // Update the counts of the test $db ->tests->update( array ( '_id' => $test ), array ( '$inc' => array ( 'exp' => 1, 'expalt_' . $alt => 1))); return $alts [ $alt ]; } /** * Mark a test as converted. * * A user which was not exposed to the test or a bot is not marked * as converted as it is not significant. * * @param $test string Test * @param $request Pluf_HTTP_Request */ public static function convert( $test , $request ) { if (!isset( $request ->pabuid) or $request ->pabuid == 'bot' ) { return ; } $db = self::getDb(); $id = $test . '##' . $request ->pabuid; $intest = $db ->intest->findOne( array ( '_id' => $id ), array ( '_id' , 'alt' )); if (! $intest ) { // Not tested return ; } $conv = $db ->convert->findOne( array ( '_id' => $id )); if ( $conv ) { // Already converted return ; } $dtest = $db ->tests->findOne( array ( '_id' => $test )); if (! $dtest or ! $dtest [ 'active' ]) { return ; } $conv = array ( '_id' => $id , 'test' => $test , ); $db ->convert->update( array ( '_id' => $id ), $conv , array ( 'upsert' => true)); // increment the test counters $db ->tests->update( array ( '_id' => $test ), array ( '$inc' => array ( 'conv' => 1, 'convalt_' . $intest [ 'alt' ] => 1))); } /** * Register a property set for the user. * * This allows you to segment your users with these properties. * * @param $request Pluf_HTTP_Request * @param $props array Properties */ public static function register( $request , $props ) { $pabuid = (isset( $request ->pabuid)) ? $request ->pabuid : self::getUid( $request ); if ( $pabuid == 'bot' ) { return ; } $request ->pabuid = $pabuid ; $request ->pabprops = array_merge ( $request ->pabprops, $props ); } /** * Track a funnel. * * The array of properties can be used to track different A/B * testing cases. * * The list of properties must be the same at all the steps of the * funnel, you cannot pass array('gender' => 'M') at step 1 and * array('age' => 32) at step 2. You need to pass both of them at * all steps. * * @param $funnel string Name of the funnel * @param $step int Step in the funnel, from 1 to n * @param $stepname string Readable name for the step * @param $request Pluf_HTTP_Request Request object * @param $props array Array of properties associated with the funnel (array()) */ public static function trackFunnel( $funnel , $step , $stepname , $request , $props = array ()) { $pabuid = (isset( $request ->pabuid)) ? $request ->pabuid : self::getUid( $request ); if ( $pabuid == 'bot' ) { return ; } $request ->pabuid = $pabuid ; $cache = Pluf_Cache::factory(); $key = 'pluf_ab_funnel_' .crc32( $funnel . '#' . $step . '#' . $pabuid ); if ( $cache ->get( $key , false)) { return ; // The key is valid 60s not to track 2 steps within 60s } $cache ->set( $key , '1' , 60); $what = array ( 'f' => $funnel , 's' => $step , 'sn' => $stepname , 't' => (int) gmdate ( 'Ymd' , $request ->time), 'u' => $pabuid , 'p' => array_merge ( $request ->pabprops, $props ), ); $db = self::getDb(); $db ->funnellogs->insert( $what ); } /** * Process the response of a view. * * If the request has no cookie and the request has a pabuid, set * the cookie in the response. * * @param Pluf_HTTP_Request The request * @param Pluf_HTTP_Response The response * @return Pluf_HTTP_Response The response */ function process_response( $request , $response ) { if (!isset( $request ->COOKIE[ 'pabuid' ]) and isset( $request ->pabuid) and $request ->pabuid != 'bot' ) { $response ->cookies[ 'pabuid' ] = $request ->pabuid; } if (isset( $request ->pabprops) and count ( $request ->pabprops) and $request ->pabuid != 'bot' ) { $response ->cookies[ 'pabprops' ] = Pluf_Sign::dumps( $request ->pabprops, null, true); } return $response ; } /** * Process the request. * * If the request has the A/B test cookie, set $request->pabuid. * * @param Pluf_HTTP_Request The request * @return bool False */ function process_request( $request ) { if (isset( $request ->COOKIE[ 'pabuid' ]) and self::check_uid( $request ->COOKIE[ 'pabuid' ])) { $request ->pabuid = $request ->COOKIE[ 'pabuid' ]; } $request ->pabprops = array (); if (isset( $request ->COOKIE[ 'pabprops' ])) { try { $request ->pabprops = Pluf_Sign::loads( $request ->COOKIE[ 'pabprops' ]); } catch (Exception $e ) { } } return false; } /** * Get a MongoDB database handle. * * It opens only one connection per request and tries to keep a * persistent connection between the requests. * * The configuration keys used are: * * `pluf_ab_mongo_server`: 'mongodb://localhost:27017' * `pluf_ab_mongo_options`: array('connect' => true, * 'persist' => 'pluf_ab_mongo') * `pluf_ab_mongo_db`: 'pluf_ab' * * If you have a default installation of MongoDB, it should work * out of the box. * */ public static function getDb() { if (self:: $db !== null) { return self:: $db ; } $options = Pluf::f( 'pluf_ab_mongo_options' , array ( 'connect' => true, 'persist' => 'pluf_ab_mongo' )); $conn = new Mongo( $server , $options ); self:: $db = $conn ->selectDB(Pluf::f( 'pluf_ab_mongo_db' , 'pluf_ab' )); return self:: $db ; } /** * Get the uid of a given request. * * @param $request Pluf_HTTP_Request */ public static function getUid( $request ) { if (isset( $request ->COOKIE[ 'pabuid' ]) and self::check_uid( $request ->COOKIE[ 'pabuid' ])) { return $request ->COOKIE[ 'pabuid' ]; } if (!isset( $request ->SERVER[ 'HTTP_USER_AGENT' ]) or self::isBot( $request ->SERVER[ 'HTTP_USER_AGENT' ])) { return 'bot' ; } // Here we need to make an uid, first check if a user with // same ip/agent exists and was last seen within the last 1h. // We get that from MemcacheDB $cache = Pluf_Cache::factory(); $key = 'pluf_ab_' .crc32( $request ->remote_addr. '#' . $request ->SERVER[ 'HTTP_USER_AGENT' ]); if ( $uid = $cache ->get( $key , null)) { $cache ->set( $key , $uid , 3600); return $uid ; } $uid = self::make_uid( $request ); $cache ->set( $key , $uid , 3600); return $uid ; } /** * Check if a given user agent is a bot. * * @param $user_agent string User agent string * @return bool True if the user agent is a bot */ public static function isBot( $user_agent ) { static $bots = array ( 'robot' , 'checker' , 'crawl' , 'discovery' , 'hunter' , 'scanner' , 'spider' , 'sucker' , 'larbin' , 'slurp' , 'libwww' , 'lwp' , 'yandex' , 'netcraft' , 'wget' , 'twiceler' ); static $pbots = array ( '/bot[\s_+:,\.\;\/\\\-]/i' , '/[\s_+:,\.\;\/\\\-]bot/i' ); foreach ( $bots as $r ) { if (false !== stristr ( $user_agent , $r )) { return true; } } foreach ( $pbots as $p ) { if (preg_match( $p , $user_agent )) { return true; } } if (false === strpos ( $user_agent , '(' )) { return true; } return false; } /** * Returns a random weighted alternative. * * Given a series of weighted alternative in the format: * * <pre> * array('alt1' => 0.2, * 'alt2' => 0.3, * 'alt3' => 0.5); * </pre> * * Returns the key of the selected alternative. In the following * example, the alternative 3 (alt3) has a 50% chance to be * selected, if the selected the results would be 'alt3'. * * @param $weights array Weighted alternatives * @return mixed Key of the selected $weights array */ public static function weightedRand( $weights ) { $r = mt_rand(1,10000); $offset = 0; foreach ( $weights as $k => $w ) { $offset += $w *10000; if ( $r <= $offset ) { return $k ; } } } /** * Given a request, make a corresponding A/B test UID. * * The UID is based on the time, the remote address, a random * component and is hashed to ensure the integrity and avoid the * need of a database hit when controlled. * * @param $request Pluf_HTTP_Request * @return string UID */ public static function make_uid( $request ) { $base = sprintf( '%08X%08X%08X' , $request ->time, sprintf( '%u' , crc32( $request ->remote_addr)), rand()); return sprintf( '%s%08X' , $base , sprintf( '%u' , crc32( $base .md5(Pluf::f( 'secret_key' ))))); } /** * Validate the uid in the cookie. * * @see self::make_uid * * @param $uid string The UID * @return bool True if the UID is valid */ public static function check_uid( $uid ) { if ( strlen ( $uid ) != 32) { return false; } $check = sprintf( '%08X' , sprintf( '%u' , crc32( substr ( $uid , 0, 24).md5(Pluf::f( 'secret_key' ))))); return ( $check == substr ( $uid , -8)); } /* ------------------------------------------------------------ * * Statistics Functions * * Note: I am not a statistician, use at your own risk! * * ------------------------------------------------------------ */ /** * Given a conversion rate calculate the recommended sample sizes. * * The sample sizes is calculated to be significant at 95% in the * case of a variation of conversion with respect to the other * alternative of 25%, 15% and 5%. * * @param $conv Conversion rate ]0.0;1.0] * @return array The 3 sample sizes for 25%, 15% and 5% */ public static function ssize( $conv ) { $a = 3.84145882689; // $a = pow(inverse_ncdf(1-(1-0.95)/2),2) $res = array (); $bs = array (0.0625, 0.0225, 0.0025); foreach ( $bs as $b ) { $res [] = (int) ((1- $conv )* $a /( $b * $conv )); } return $res ; } /** * Given a test, returns the corresponding stats. * * @param $test array Test definition and results * @return array Statistics for the test */ public static function getTestStats( $test ) { $stats = array (); // Will store the stats $n = count ( $test [ 'alts' ]); $aconvr = array (); // All the conversion rates to sort the alternatives for ( $i =0; $i < $n ; $i ++) { $conv = (isset( $test [ 'convalt_' . $i ])) ? $test [ 'convalt_' . $i ] : 0; $exp = (isset( $test [ 'expalt_' . $i ])) ? $test [ 'expalt_' . $i ] : 0; $convr = self::cr( array ( $exp , $conv )); $nconvr = ( $convr !== null) ? sprintf( '%01.2f%%' , $convr *100.0) : 'N/A' ; $ssize = ( $convr !== null and $convr > 0) ? self::ssize( $convr ) : array (); $stats [] = array ( 'alt' => $i , 'convr' => $convr , 'conv' => $conv , 'exp' => $exp , 'nconvr' => $nconvr , 'ssize' => $ssize ); $aconvr [] = ( $convr === null) ? 0 : $convr ; } array_multisort ( $aconvr , SORT_DESC, $stats ); // We want the best to be significantly better than the second best. for ( $i =0; $i < $n ; $i ++) { $convr = $stats [ $i ][ 'convr' ]; $exp = $stats [ $i ][ 'exp' ]; $conv = $stats [ $i ][ 'conv' ]; $comp = false; $zscore = false; $conf = false; $better = false; if ( $i != 1 and $stats [1][ 'convr' ] > 0) { // Compare with base case and get confidence/Z-score $comp = 100.0 * (float) ( $convr - $stats [1][ 'convr' ])/ (float) ( $stats [1][ 'convr' ]); if ( $comp > 0) $better = true; $comp = sprintf( '%01.2f%%' , $comp ); $zscore = self::zscore( array ( $stats [1][ 'exp' ], $stats [1][ 'conv' ]), array ( $exp , $conv )); $conf = sprintf( '%01.2f%%' , self::cumnormdist( $zscore )*100.0); $zscore = sprintf( '%01.2f' , $zscore ); } $stats [ $i ][ 'comp' ] = $comp ; $stats [ $i ][ 'zscore' ] = $zscore ; $stats [ $i ][ 'conf' ] = $conf ; $stats [ $i ][ 'better' ] = $better ; } return $stats ; } public static function cr( $t ) { if ( $t [1] < 0) return null; if ( $t [0] <= 0) return null; return $t [1]/ $t [0]; } public static function zscore( $c , $t ) { $z = self::cr( $t )-self::cr( $c ); $s = (self::cr( $t )*(1-self::cr( $t )))/ $t [0] + (self::cr( $c )*(1-self::cr( $c )))/ $c [0]; return $z /sqrt( $s ); } /** * Approximation of the cumulative normal distribution. */ public static function cumnormdist( $x ) { $b1 = 0.319381530; $b2 = -0.356563782; $b3 = 1.781477937; $b4 = -1.821255978; $b5 = 1.330274429; $p = 0.2316419; $c = 0.39894228; if ( $x >= 0.0) { $t = 1.0 / ( 1.0 + $p * $x ); return (1.0 - $c * exp ( - $x * $x / 2.0 ) * $t * ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 )); } else { $t = 1.0 / ( 1.0 - $p * $x ); return ( $c * exp ( - $x * $x / 2.0 ) * $t * ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 )); } } } |