<?php␊ |
/* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */␊ |
/*␊ |
# ***** BEGIN LICENSE BLOCK *****␊ |
# This file is part of InDefero, an open source project management application.␊ |
# Copyright (C) 2008 Céondo Ltd and contributors.␊ |
#␊ |
# InDefero is free software; you can redistribute it and/or modify␊ |
# it under the terms of the GNU General Public License as published by␊ |
# the Free Software Foundation; either version 2 of the License, or␊ |
# (at your option) any later version.␊ |
#␊ |
# InDefero is distributed in the hope that it will be useful,␊ |
# but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
# GNU General Public License for more details.␊ |
#␊ |
# You should have received a copy of the GNU General Public License␊ |
# along with this program; if not, write to the Free Software␊ |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA␊ |
#␊ |
# Based on work under GNU LGPL copyright, from the Pluf Framework␊ |
# Copyright (C) 2001-2007 Loic d'Anterroches and contributors.␊ |
#␊ |
# ***** END LICENSE BLOCK ***** */␊ |
␊ |
/**␊ |
* Class implementing the search engine␊ |
*␊ |
* It is a modified version of the Pluf_Search class to be able to␊ |
* cluster the results by project.␊ |
*/␊ |
class IDF_Search extends Pluf_Search␊ |
{␊ |
/**␊ |
* Search.␊ |
*␊ |
* Returns an array of array with model_class, model_id and␊ |
* score. The list is already sorted by score descending.␊ |
*␊ |
* You can then filter the list as you wish with another set of␊ |
* weights.␊ |
*␊ |
* @param string Query string.␊ |
* @param int Project id to limit the results (null)␊ |
* @param string Stemmer class.␊ |
* @return array Results.␊ |
*/␊ |
public static function mySearch($query, $project=null, $stemmer='Pluf_Text_Stemmer_Porter')␊ |
{␊ |
$query = Pluf_Text::cleanString(html_entity_decode($query, ENT_QUOTES, 'UTF-8'));␊ |
$words = Pluf_Text::tokenize($query);␊ |
if ($stemmer != null) {␊ |
$words = self::stem($words, $stemmer);␊ |
}␊ |
$words_flat = array();␊ |
foreach ($words as $word=>$c) {␊ |
$words_flat[] = $word;␊ |
}␊ |
$word_ids = self::getWordIds($words_flat);␊ |
if (in_array(null, $word_ids)) {␊ |
return array();␊ |
}␊ |
return self::mySearchDocuments($word_ids, $project);␊ |
}␊ |
␊ |
/**␊ |
* Search documents.␊ |
*␊ |
* Only the total of the ponderated occurences is used to sort the␊ |
* results.␊ |
*␊ |
* @param array Ids.␊ |
* @param IDF_Project Project to limit the search.␊ |
* @return array Sorted by score, returns model_class, model_id and score.␊ |
*/␊ |
public static function mySearchDocuments($wids, $project)␊ |
{␊ |
$db =& Pluf::db();␊ |
$gocc = new IDF_Search_Occ();␊ |
$where = array();␊ |
foreach ($wids as $id) {␊ |
$where[] = $db->qn('word').'='.(int)$id;␊ |
}␊ |
$prj = (is_null($project)) ? '' : ' AND project='.(int)$project->id;␊ |
$select = 'SELECT model_class, model_id, SUM(pondocc) AS score FROM '.$gocc->getSqlTable().' WHERE '.implode(' OR ', $where).$prj.' GROUP BY model_class, model_id HAVING COUNT(*)='.count($wids).' ORDER BY score DESC';␊ |
return $db->select($select);␊ |
}␊ |
␊ |
/**␊ |
* Index a document.␊ |
*␊ |
* See Pluf_Search for the disclaimer and informations.␊ |
*␊ |
* @param Pluf_Model Document to index.␊ |
* @param Stemmer used. ('Pluf_Text_Stemmer_Porter')␊ |
* @return array Statistics.␊ |
*/␊ |
public static function index($doc, $stemmer='Pluf_Text_Stemmer_Porter')␊ |
{␊ |
$words = Pluf_Text::tokenize($doc->_toIndex());␊ |
if ($stemmer != null) {␊ |
$words = self::stem($words, $stemmer);␊ |
}␊ |
// Get the total number of words.␊ |
$total = 0.0;␊ |
$words_flat = array();␊ |
foreach ($words as $word => $occ) {␊ |
$total += (float) $occ;␊ |
$words_flat[] = $word;␊ |
}␊ |
// Drop the last indexation.␊ |
$gocc = new IDF_Search_Occ();␊ |
$sql = new Pluf_SQL('DELETE FROM '.$gocc->getSqlTable().' WHERE model_class=%s AND model_id=%s', array($doc->_model, $doc->id));␊ |
$db =& Pluf::db();␊ |
$db->execute($sql->gen());␊ |
// Get the ids for each word.␊ |
$ids = self::getWordIds($words_flat);␊ |
// Insert a new word for the missing words and add the occ.␊ |
$n = count($ids);␊ |
$new_words = 0;␊ |
$done = array();␊ |
for ($i=0;$i<$n;$i++) {␊ |
if ($ids[$i] === null) {␊ |
$word = new Pluf_Search_Word();␊ |
$word->word = $words_flat[$i];␊ |
$word->create();␊ |
$ids[$i] = $word->id;␊ |
$new_words++;␊ |
}␊ |
if (isset($done[$ids[$i]])) {␊ |
continue;␊ |
}␊ |
$done[$ids[$i]] = true;␊ |
$occ = new IDF_Search_Occ();␊ |
$occ->word = new Pluf_Search_Word($ids[$i]);␊ |
$occ->model_class = $doc->_model;␊ |
$occ->model_id = $doc->id;␊ |
$occ->project = $doc->get_project();␊ |
$occ->occ = $words[$words_flat[$i]];␊ |
$occ->pondocc = $words[$words_flat[$i]]/$total;␊ |
$occ->create();␊ |
}␊ |
// update the stats␊ |
$sql = new Pluf_SQL('model_class=%s AND model_id=%s',␊ |
array($doc->_model, $doc->id));␊ |
$last_index = Pluf::factory('Pluf_Search_Stats')->getList(array('filter' => $sql->gen()));␊ |
if ($last_index->count() == 0) {␊ |
$stats = new Pluf_Search_Stats();␊ |
$stats->model_class = $doc->_model;␊ |
$stats->model_id = $doc->id;␊ |
$stats->indexations = 1;␊ |
$stats->create();␊ |
} else {␊ |
$last_index[0]->indexations += 1;␊ |
$last_index[0]->update();␊ |
}␊ |
return array('total' => $total, 'new' => $new_words, 'unique'=>$n);␊ |
}␊ |
} |