search
[ class tree: search ] [ index: search ] [ all elements ]

Source for file search_lib.php

Documentation is available at search_lib.php

  1. <?php
  2. /**
  3.  * $Header$
  4.  *
  5.  * @copyright (c) 2004 bitweaver.org
  6.  *  Copyright (c) 2003 tikwiki.org
  7.  *  Copyright (c) 2002-2003, Luis Argerich, Garland Foster, Eduardo Polidor, et. al.
  8.  *  All Rights Reserved. See below for details and a complete list of authors.
  9.  *  Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See http://www.gnu.org/copyleft/lesser.html for details
  10.  *
  11.  *  $Id$
  12.  * @author  Luis Argerich (lrargerich@yahoo.com)
  13.  * @package search
  14.  */
  15.  
  16. /**
  17.  * @package search
  18.  */
  19.  
  20. class SearchLib extends BitBase {
  21.     function __construct({
  22.         parent::__construct();
  23.         $this->wordlist_cache array()// for caching queries to the LRU-cache-list.
  24.     }
  25.  
  26.     function register_search($words{
  27.         $words strtolower($words);
  28.         $words addslashes($words);
  29.         $words preg_split("/\s/"$words);
  30.         foreach ($words as $word{
  31.             $word trim($word);
  32.             $cant $this->mDb->getOne("SELECT COUNT(*) FROM `" BIT_DB_PREFIX .
  33.                 "search_stats` WHERE `term`=?"array($word));
  34.             if ($cant{
  35.                 $query "UPDATE `" BIT_DB_PREFIX "search_stats` SET `hits`= `hits` + 1 WHERE `term`=?";
  36.             else {
  37.                 $query "INSERT INTO `" BIT_DB_PREFIX "search_stats` (`term`,`hits`) VALUES (?,1)";
  38.             }
  39.             $result $this->mDb->query($query,array($word));
  40.         }
  41.     }
  42.  
  43.     function find&$pParamHash // $where, $words, $offset, $max_records, $plUsePart = false) {
  44.         $pParamHash['words'preg_split("/[\W]+/"strtolower($pParamHash['words'])-1PREG_SPLIT_NO_EMPTY);
  45.         if isset($pParamHash['$plUsePart']&& $pParamHash['$plUsePart'{
  46.             $wordList $this->get_wordlist_from_syllables$pParamHash['words');
  47.             if array$wordList ) ) {
  48.                 $pParamHash['words'array_merge$pParamHash['words']$wordList );
  49.             }
  50.         }
  51.         $res $this->find_exact_generic$pParamHash );
  52.         return $res;
  53.     }
  54.  
  55.     /*
  56.      * This function checks the search_syllable table to see how old the "syllable" is
  57.      * If the syllable is to old or doesn't exist, it refreshes the syllable/word list stored in search_words
  58.      * Then, it get a list of words from the search_words table and returns an array of them
  59.     */
  60.     function get_wordlist_from_syllables($syllables{
  61.         global $gBitSystem;
  62.         $search_syll_age $gBitSystem->getConfig'search_syll_age'SEARCH_PKG_NAME );
  63.         $ret array();
  64.         foreach($syllables as $syllable{
  65.             $bindvars array($syllable);
  66.             $age      time($this->mDb->getOne(
  67.                         "select `last_updated` from `" BIT_DB_PREFIX "search_syllable` where `syllable`=?",
  68.                         $bindvars);
  69.             if(!$age || $age ($search_syll_age 3600)) {// older than search_syll_age hours
  70.                 $a $this->refresh_lru_wordlist($syllable);
  71.             }
  72.             $lruList $this->get_lru_wordlist($syllable);
  73.             if (is_array($lruList)) {
  74.                 $ret array_merge($ret$lruList);
  75.             }
  76.             // update lru last used value (Used to purge oldest last used records)
  77.             $now time();
  78.             $this->mDb->query("update `" BIT_DB_PREFIX "search_syllable` set `last_used`=? where `syllable`=?",
  79.                 array((int) $now$syllable));
  80.         }
  81.         return $ret;
  82.     }
  83.  
  84.     function get_lru_wordlist($syllable{
  85.         $ret array();
  86.         if(!isset($this->wordlist_cache[$syllable])) {
  87.                    $query  "select `searchword` from `" BIT_DB_PREFIX "search_words` where `syllable`=?";
  88.                 $result $this->mDb->query($queryarray($syllable));
  89.                 if ($result->RecordCount(0{
  90.                     while ($res $result->fetchRow()) {
  91.                         $this->wordlist_cache[$syllable][]=$res["searchword"];
  92.                     }
  93.                     $ret $this->wordlist_cache[$syllable];
  94.                 }
  95.         }
  96.         return $ret;
  97.     }
  98.  
  99.     function refresh_lru_wordlist($syllable{
  100.         global $gBitSystem;
  101.         $search_max_syllwords $gBitSystem->getConfig'search_max_syllwords'SEARCH_PKG_NAME );;
  102.         $search_lru_length $gBitSystem->getConfig'search_lru_length'SEARCH_PKG_NAME );;
  103.         $search_lru_purge_rate $gBitSystem->getConfig'search_lru_purge_rate'SEARCH_PKG_NAME );
  104.         $ret array();
  105.  
  106.         // delete from wordlist and lru list
  107.         $this->mDb->query("delete from `".BIT_DB_PREFIX."search_words` where `syllable`=?",array($syllable),-1,-1);
  108.         $this->mDb->query("delete from `".BIT_DB_PREFIX."search_syllable` where `syllable`=?",array($syllable),-1,-1);
  109.         if (!isset($search_max_syllwords)) {
  110.             $search_max_syllwords 100;
  111.         }
  112.         $query  "SELECT `searchword`, SUM(`i_count`) AS `cnt` FROM `" BIT_DB_PREFIX .
  113.                     "search_index` WHERE `searchword` LIKE ? GROUP BY `searchword` ORDER BY 2 desc";
  114.         $result $this->mDb->query($queryarray('%' $syllable '%')$search_max_syllwords)// search_max_syllwords: how many different search_words that contain the syllable are taken into account?. Sortet by number of occurences.
  115.         while ($res $result->fetchRow()) {
  116.             $ret[$res["searchword"];
  117.         }
  118.         // cache this long running query
  119.         foreach($ret as $searchword{
  120.             $this->mDb->query("INSERT INTO `" BIT_DB_PREFIX .
  121.                 "search_words` (`syllable`,`searchword`) VALUES (?,?)",
  122.                 array($syllable$searchword)-1-1);
  123.             }
  124.         // set lru list parameters
  125.         $now time();
  126.         $this->mDb->query("INSERT INTO `" BIT_DB_PREFIX .
  127.             "search_syllable`(`syllable`,`last_used`,`last_updated`) values (?,?,?)",
  128.             array($syllable,(int) $now,(int) $now));
  129.  
  130.         // at random rate: check length of lru list and purge these that
  131.         // have not been used for long time. This is what a lru list
  132.         // basically does
  133.         list($usec$secexplode(" "microtime());
  134.         srand (ceil($sec 100 $usec));
  135.         if(rand(1$search_lru_purge_rate== 1{
  136.             $lrulength $this->mDb->getOne("SELECT COUNT(*) FROM `" BIT_DB_PREFIX .
  137.                 "search_syllable`"array());
  138.             if ($lrulength $search_lru_length// only purge if lru list is too long.
  139.                 //purge oldest
  140.                 $oldwords array();
  141.                 $diff   $lrulength $search_lru_length;
  142.                 $query  "select `syllable` from `".BIT_DB_PREFIX."search_syllable` ORDER BY `last_used` asc";
  143.                 $result $this->mDb->query($queryarray()$diff);
  144.                 while ($res $result->fetchRow()) {
  145.                     $oldwords[]=$res["syllable"];
  146.                 }
  147.                 foreach($oldwords as $oldword{
  148.                     $this->mDb->query("delete from `" BIT_DB_PREFIX .
  149.                         "search_words`    where `syllable`=?"array($oldword)-1-1);
  150.                     $this->mDb->query("delete from `" BIT_DB_PREFIX .
  151.                         "search_syllable` where `syllable`=?"array($oldword)-1-1);
  152.                 }
  153.  
  154.             }
  155.         }
  156.         return $ret;
  157.     }
  158.  
  159.     function find_with_or($allowed$selectSql$joinSql$whereSql$bindVars,&$pParamHash{
  160.         // Putting in the below hack because mssql cannot select distinct on a text blob column.
  161.         $qPlaceHolders1 implode(','array_fill(0count($pParamHash['words'])'?'));
  162.         $bindVars array_merge$pParamHash['words']$allowed );
  163.         LibertyContent::getServicesSql'content_list_sql_function'$selectSql$joinSql$whereSql$bindVars );
  164.         $ret array();
  165.         $query "SELECT
  166.                         lc.`content_id`,
  167.                         lc.`title`,
  168.                         lc.`format_guid`,
  169.                         lc.`content_type_guid`,
  170.                         COALESCE(lch.`hits`,0) AS hits,
  171.                         lc.`created`,
  172.                         lc.`last_modified`,
  173.                         lc.`data`,
  174.                         COALESCE((
  175.                             SELECT SUM(i_count)
  176.                             FROM `" BIT_DB_PREFIX "search_index` si
  177.                             WHERE si.`content_id`=lc.`content_id` AND si.`searchword` IN (" $qPlaceHolders1 ")
  178.                         ),0) AS relevancy
  179.                         $selectSql
  180.                     FROM `BIT_DB_PREFIX "liberty_content` lc
  181.                     LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
  182.                     $joinSql
  183.                     WHERE (
  184.                         SELECT SUM(i_count)
  185.                         FROM `BIT_DB_PREFIX "search_index` si
  186.                         WHERE si.`content_id`=lc.`content_id`
  187.                         AND si.`searchword` IN (" $qPlaceHolders1 ")
  188.                         GROUP BY
  189.                         si.`content_id`
  190.                         )>0 $whereSql
  191.                     ORDER BY 9 DESC, 5 DESC
  192.                     ";
  193.             $querycant "SELECT
  194.                     COUNT(*)
  195.                     FROM `" BIT_DB_PREFIX "liberty_content` lc
  196.                     LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
  197.                     $joinSql
  198.                     WHERE (
  199.                         SELECT SUM(i_count)
  200.                         FROM `BIT_DB_PREFIX "search_index` si
  201.                         WHERE si.`content_id`=lc.`content_id`
  202.                         AND si.`searchword` IN (" $qPlaceHolders1 ")
  203.                         GROUP BY
  204.                         si.`content_id`
  205.                         )>0 $whereSql";
  206.             $result $this->mDb->query$query,  array_merge$pParamHash['words',$bindVars)$pParamHash['max_records']$pParamHash['offset');
  207.             $pParamHash['cant'$this->mDb->getOne$querycant$bindVars );
  208.             while ($res $result->fetchRow()) {
  209.                 $res['href'BIT_ROOT_URL "index.php?content_id=" $res['content_id'];
  210.                 $ret[$res;
  211.             }
  212.             return $ret;
  213.     }
  214.  
  215.     function find_with_and($allowed$selectSql$joinSql$whereSql$bindVars&$pParamHash{
  216.         // Make a slot for the search word.
  217.         $bindVars[0NULL;
  218.         $bindVars array_merge$bindVars$allowed );
  219.         LibertyContent::getServicesSql'content_list_sql_function'$selectSql$joinSql$whereSql$bindVars );
  220.  
  221.         $ret array();
  222.         $first true;
  223.         foreach($pParamHash['words'as $word{
  224.             $query "SELECT lc.`content_id` AS hash_key,
  225.                         lc.`content_id`,
  226.                         lc.`title`,
  227.                         lc.`format_guid`,
  228.                         lc.`content_type_guid`,
  229.                         COALESCE(lch.`hits`,0) AS hits,
  230.                         lc.`created`,
  231.                         lc.`last_modified`,
  232.                         lc.`data`,
  233.                         si.`i_count` AS relevancy
  234.                         $selectSql
  235.                     FROM `BIT_DB_PREFIX "liberty_content` lc
  236.                     LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
  237.                     $joinSql
  238.                     INNER JOIN `".BIT_DB_PREFIX."search_index` si ON (si.`content_id`=lc.`content_id` AND si.`searchword` = ? )
  239.                     WHERE `i_count` > 0 $whereSql
  240.                     ORDER BY 9 DESC, 5 DESC
  241.                     ";
  242.                 $bindVars[0$word;
  243.                 $result $this->mDb->getAssoc$query$bindVars );
  244.                 if ($first{
  245.                     $ret $result;
  246.                     $first false;
  247.                 }
  248.                 else {
  249.                     $this->mergeResults($ret$result);
  250.                 }
  251.             }
  252.             /* count it */
  253.             $pParamHash['cant'count($ret);
  254.  
  255.             /* Sort it */
  256.             uasort($ret'search_relevance_sort');
  257.  
  258.             /* slice it */
  259.             $ret array_slice($ret$pParamHash['offset']$pParamHash['offset'$pParamHash['max_records']);
  260.  
  261.             /* Set the hrefs. */
  262.             foreach ($ret as $content_id => $data{
  263.                 $ret[$content_id]['href'BIT_ROOT_URL "index.php?content_id=" $data['content_id'];
  264.             }
  265.  
  266.             return $ret;
  267.     }
  268.  
  269.     function find_exact_generic&$pParamHash {
  270.         global $gPage$gBitSystem$gLibertySystem$gBitDbType;
  271.         $allowed array();
  272.         $ret    array();
  273.         foreach$gLibertySystem->mContentTypes as $contentType {
  274.             if (( $pParamHash['content_type_guid'== $contentType["content_type_guid"or $pParamHash['content_type_guid'== "" // pages ?
  275.             and $this->has_permission($contentType["content_type_guid"])
  276.             and $gBitSystem->getConfig('search_restrict_types'||
  277.                   $gBitSystem->getConfig('search_pkg_'.$contentType["content_type_guid"]) ) ) {
  278.                 $allowed[$contentType["content_type_guid"];
  279.             }
  280.         }
  281.  
  282.         if (count($allowed&& count($pParamHash['words']0{
  283.             $selectSql '';
  284.             $joinSql '';
  285.             $whereSql " AND  lc.`content_type_guid` IN (" implode(','array_fill(0count($allowed)'?')) ") ";
  286.             $bindVars array();
  287.  
  288.             if (isset($pParamHash['useAnd']&& $pParamHash['useAnd']{
  289.                 return $this->find_with_and($allowed$selectSql$joinSql$whereSql$bindVars$pParamHash);
  290.             }
  291.             else {
  292.                 return $this->find_with_or($allowed$selectSql$joinSql$whereSql$bindVars$pParamHash);
  293.             }
  294.         else {
  295.             $pParamHash['cant'0;
  296.             return array();
  297.         }
  298.     }
  299.  
  300.     function mergeResults(&$ret$result{
  301.         // Remove those that don't overlap or update relevance
  302.         foreach ($ret as $content_id => $data{
  303.             if (!isset($result[$content_id])) {
  304.                 unset($ret[$content_id]);
  305.             }
  306.             else {
  307.                 $ret[$content_id]['relevancy'+= $result[$content_id]['relevancy'];
  308.             }
  309.         }
  310.     }
  311.  
  312.     public static function has_permission($pContentType NULL{
  313.         global $gBitUser$gLibertySystem;
  314.  
  315.         if empty$pContentType ) ) {
  316.             $object LibertyBase::getLibertyObject(1$pContentTypeFALSE);
  317.             if empty$object ) ) {
  318.                 // Note that we can't do verify access here because
  319.                 // we are using a generic object but we can at least get a
  320.                 // basic permission check here.
  321.                 return $object->hasViewPermission(FALSE);
  322.             }
  323.         }
  324.  
  325.         return FALSE;
  326.     }
  327.  
  328. # class SearchLib
  329.  
  330. if (!defined('search_relevance_sort')) {
  331.     function search_relevance_sort($a$b{
  332.         $rel $b['relevancy'$a['relevancy'];
  333.         if ($rel == 0{
  334.             $rel $b['hits'$a['hits'];
  335.         }
  336.         return $rel;
  337.     }
  338. }
  339.  
  340. ?>

Documentation generated on Wed, 29 Jul 2015 13:57:27 +0000 by phpDocumentor 1.5.0-lsces