2005-11-18
werner
In my Wiki-testinstallation I have changed the function idx_lookup to do a generic search.
This routine also finds the searchword within another word (install will be find in the word testinstallation).
The code looks like this:
function idx_lookup($words){
global $conf;
$result = array();
$resultg = array();
// load known words and documents
$page_idx = file($conf['cachedir'].'/page.idx');
$word_idx = file($conf['cachedir'].'/word.idx');
// get word IDs
$wids = array();
foreach($words as $word){
if (strlen($word)>2) { // ab 3 stellen generische suche
// generische Suche
$anz=0;
$nr=0;
foreach($word_idx as $iword){
// if($word == substr($iword,0,strlen($word))) {
$pos=strpos($iword,$word);
if(is_int($pos)) {
$wid=$nr;
$wids[] = $wid;
$result[$word] = $wid;
$resultg[$word][$anz] = $wid;
$anz++;
}
$nr++;
}
}
else {
// genaue Wortsuche
$wid = array_search("$word\\n",$word_idx);
if(is_int($wid)){
$wids[] = $wid;
$result[$word] = $wid;
$resultg[$word][$anz] = $wid;
$anz++;
}
}
if ($anz==0)
$result[$word] = array();
}
sort($wids);
$wids = array_unique($wids);
// Open index
$idx = fopen($conf['cachedir'].'/index.idx','r');
if(!$idx){
msg("Failed to open index files",-1);
return false;
}
// Walk the index til the lines are found
$docs = array(); // hold docs found
$lno = 0;
$line = '';
$srch = array_shift($wids); // which word do we look for?
while (!feof($idx)) {
// read full line
$line .= fgets($idx, 4096);
if(substr($line,-1) != "\\n") continue;
if($lno > $srch) break; // shouldn't happen
// do we want this line?
if($lno == $srch){
// add docs to list
$docs[$srch] = idx_parseIndexLine($page_idx,$line);
// foreach (array_keys($docs[$srch]) as $zwi) {
// echo $zwi." search<br/>";
// }
$srch = array_shift($wids); // next word to look up
if($srch == null) break; // no more words
}
$line = ''; // reset line buffer
$lno++; // increase linecounter
}
fclose($idx);
// merge found pages into result array
foreach(array_keys($result) as $word){ // für jedes wort der Abfrage
if(is_int($result[$word])){
$result[$word]=array();
foreach($resultg[$word] as $docnr){ // für jedes generische wort lt wortindex
$lfddocs=$docs[$docnr]; // array der dokumente zu wort
foreach (array_keys($lfddocs) as $dockey) {
$result[$word][$dockey]=$lfddocs[$dockey]+$result[$word][$dockey];
}
}
}
}
return $result;
}