| <?php␊ |
| /* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */␊ |
| /*␊ |
| # ***** BEGIN LICENSE BLOCK *****␊ |
| # This file is part of InDefero, an open source project management application.␊ |
| # Copyright (C) 2008 Céondo Ltd and contributors.␊ |
| #␊ |
| # InDefero is free software; you can redistribute it and/or modify␊ |
| # it under the terms of the GNU General Public License as published by␊ |
| # the Free Software Foundation; either version 2 of the License, or␊ |
| # (at your option) any later version.␊ |
| #␊ |
| # InDefero is distributed in the hope that it will be useful,␊ |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
| # GNU General Public License for more details.␊ |
| #␊ |
| # You should have received a copy of the GNU General Public License␊ |
| # along with this program; if not, write to the Free Software␊ |
| # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA␊ |
| #␊ |
| # Based on work under GNU LGPL copyright, from the Pluf Framework␊ |
| # Copyright (C) 2001-2007 Loic d'Anterroches and contributors.␊ |
| #␊ |
| # ***** END LICENSE BLOCK ***** */␊ |
| ␊ |
| /**␊ |
| * Class implementing the search engine␊ |
| *␊ |
| * It is a modified version of the Pluf_Search class to be able to␊ |
| * cluster the results by project.␊ |
| */␊ |
| class IDF_Search extends Pluf_Search␊ |
| {␊ |
| /**␊ |
| * Search.␊ |
| *␊ |
| * Returns an array of array with model_class, model_id and␊ |
| * score. The list is already sorted by score descending.␊ |
| *␊ |
| * You can then filter the list as you wish with another set of␊ |
| * weights.␊ |
| *␊ |
| * @param string Query string.␊ |
| * @param int Project id to limit the results (null)␊ |
| * @param string Stemmer class.␊ |
| * @return array Results.␊ |
| */␊ |
| public static function mySearch($query, $project=null, $stemmer='Pluf_Text_Stemmer_Porter')␊ |
| {␊ |
| $query = Pluf_Text::cleanString(html_entity_decode($query, ENT_QUOTES, 'UTF-8'));␊ |
| $words = Pluf_Text::tokenize($query);␊ |
| if ($stemmer != null) {␊ |
| $words = self::stem($words, $stemmer);␊ |
| }␊ |
| $words_flat = array();␊ |
| foreach ($words as $word=>$c) {␊ |
| $words_flat[] = $word;␊ |
| }␊ |
| $word_ids = self::getWordIds($words_flat);␊ |
| if (in_array(null, $word_ids)) {␊ |
| return array();␊ |
| }␊ |
| return self::mySearchDocuments($word_ids, $project);␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Search documents.␊ |
| *␊ |
| * Only the total of the ponderated occurences is used to sort the␊ |
| * results.␊ |
| *␊ |
| * @param array Ids.␊ |
| * @param IDF_Project Project to limit the search.␊ |
| * @return array Sorted by score, returns model_class, model_id and score.␊ |
| */␊ |
| public static function mySearchDocuments($wids, $project)␊ |
| {␊ |
| $db =& Pluf::db();␊ |
| $gocc = new IDF_Search_Occ();␊ |
| $where = array();␊ |
| foreach ($wids as $id) {␊ |
| $where[] = $db->qn('word').'='.(int)$id;␊ |
| }␊ |
| $prj = (is_null($project)) ? '' : ' AND project='.(int)$project->id;␊ |
| $select = 'SELECT model_class, model_id, SUM(pondocc) AS score FROM '.$gocc->getSqlTable().' WHERE '.implode(' OR ', $where).$prj.' GROUP BY model_class, model_id HAVING COUNT(*)='.count($wids).' ORDER BY score DESC';␊ |
| return $db->select($select);␊ |
| }␊ |
| ␊ |
| /**␊ |
| * Index a document.␊ |
| *␊ |
| * See Pluf_Search for the disclaimer and informations.␊ |
| *␊ |
| * @param Pluf_Model Document to index.␊ |
| * @param Stemmer used. ('Pluf_Text_Stemmer_Porter')␊ |
| * @return array Statistics.␊ |
| */␊ |
| public static function index($doc, $stemmer='Pluf_Text_Stemmer_Porter')␊ |
| {␊ |
| $words = Pluf_Text::tokenize($doc->_toIndex());␊ |
| if ($stemmer != null) {␊ |
| $words = self::stem($words, $stemmer);␊ |
| }␊ |
| // Get the total number of words.␊ |
| $total = 0.0;␊ |
| $words_flat = array();␊ |
| foreach ($words as $word => $occ) {␊ |
| $total += (float) $occ;␊ |
| $words_flat[] = $word;␊ |
| }␊ |
| // Drop the last indexation.␊ |
| $gocc = new IDF_Search_Occ();␊ |
| $sql = new Pluf_SQL('DELETE FROM '.$gocc->getSqlTable().' WHERE model_class=%s AND model_id=%s', array($doc->_model, $doc->id));␊ |
| $db =& Pluf::db();␊ |
| $db->execute($sql->gen());␊ |
| // Get the ids for each word.␊ |
| $ids = self::getWordIds($words_flat);␊ |
| // Insert a new word for the missing words and add the occ.␊ |
| $n = count($ids);␊ |
| $new_words = 0;␊ |
| $done = array();␊ |
| for ($i=0;$i<$n;$i++) {␊ |
| if ($ids[$i] === null) {␊ |
| $word = new Pluf_Search_Word();␊ |
| $word->word = $words_flat[$i];␊ |
| $word->create();␊ |
| $ids[$i] = $word->id;␊ |
| $new_words++;␊ |
| }␊ |
| if (isset($done[$ids[$i]])) {␊ |
| continue;␊ |
| }␊ |
| $done[$ids[$i]] = true;␊ |
| $occ = new IDF_Search_Occ();␊ |
| $occ->word = new Pluf_Search_Word($ids[$i]);␊ |
| $occ->model_class = $doc->_model;␊ |
| $occ->model_id = $doc->id;␊ |
| $occ->project = $doc->get_project();␊ |
| $occ->occ = $words[$words_flat[$i]];␊ |
| $occ->pondocc = $words[$words_flat[$i]]/$total;␊ |
| $occ->create();␊ |
| }␊ |
| // update the stats␊ |
| $sql = new Pluf_SQL('model_class=%s AND model_id=%s',␊ |
| array($doc->_model, $doc->id));␊ |
| $last_index = Pluf::factory('Pluf_Search_Stats')->getList(array('filter' => $sql->gen()));␊ |
| if ($last_index->count() == 0) {␊ |
| $stats = new Pluf_Search_Stats();␊ |
| $stats->model_class = $doc->_model;␊ |
| $stats->model_id = $doc->id;␊ |
| $stats->indexations = 1;␊ |
| $stats->create();␊ |
| } else {␊ |
| $last_index[0]->indexations += 1;␊ |
| $last_index[0]->update();␊ |
| }␊ |
| return array('total' => $total, 'new' => $new_words, 'unique'=>$n);␊ |
| }␊ |
| } |