1<?php 2 3namespace dokuwiki\Search\Index; 4 5/** 6 * Provides operations on tuple records used in our indexes 7 * 8 * Tuples consist of a key (typically a RID from another Index) and a number (usually a count). 9 * Used to store page <-> word counts for example 10 */ 11class TupleOps 12{ 13 /** 14 * Insert or replace a tuple in a line 15 * 16 * @param string $record This is the current row value to be modified 17 * @param int|string $key The foreign rid or identifier 18 * @param int $count The count to store 19 * @return string A new row value 20 * @author Tom N Harris <tnharris@whoopdedo.org> 21 * 22 */ 23 public static function updateTuple($record, $key, $count) 24 { 25 if ($record != '') { 26 // remove any current version of the tuple (with or without explicit count) 27 $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record); 28 } 29 $record = trim($record, ':'); 30 if ($count) { 31 // Write tuples with frequency=1 without the asterisk 32 $tuple = ($count == 1) ? $key : "{$key}*{$count}"; 33 if ($record !== '') { 34 return "{$tuple}:" . $record; 35 } else { 36 return $tuple; 37 } 38 } 39 return $record; 40 } 41 42 /** 43 * Sum the counts in a list of tuples 44 * 45 * Tuples can be in format "key*count" or just "key" (implicit count of 1) 46 * 47 * @param string $record The row value to parse 48 * @return int sum of all counts 49 * @author Tom N Harris <tnharris@whoopdedo.org> 50 */ 51 public static function aggregateTupleCounts($record) 52 { 53 $freq = 0; 54 $parts = explode(':', $record); 55 foreach ($parts as $tuple) { 56 if ($tuple === '') continue; 57 if (strpos($tuple, '*') !== false) { 58 [/* $key */, $cnt] = explode('*', $tuple); 59 $freq += (int)$cnt; 60 } else { 61 // No explicit count means count of 1 62 $freq += 1; 63 } 64 } 65 return $freq; 66 } 67 68 /** 69 * Split a line into an array of tuples 70 * 71 * The given key of the given $filtermap defines which tuples to extract, the value 72 * gives the name in the output array. This basically allows to map RIDs to their 73 * respective real values. The result will contain the counts associated with the 74 * mapped keys. 75 * 76 * If no $filtermap is given (null), all tuples are returned keeping their original keys 77 * 78 * Tuples can be in format "key*count" or just "key" (implicit count of 1) 79 * 80 * @param string $record The row value to parse 81 * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples 82 * @return array mapped counts 83 * @author Andreas Gohr <andi@splitbrain.org> 84 * @author Tom N Harris <tnharris@whoopdedo.org> 85 */ 86 public static function parseTuples($record, $filtermap = null) 87 { 88 $result = array(); 89 if ($record == '') return $result; 90 $parts = explode(':', $record); 91 foreach ($parts as $tuple) { 92 if ($tuple === '') continue; 93 94 // Handle both "key*count" and "key" formats 95 if (strpos($tuple, '*') !== false) { 96 [$key, $cnt] = explode('*', $tuple); 97 if (!$cnt) continue; 98 } else { 99 // No explicit count means count of 1 100 $key = $tuple; 101 $cnt = 1; 102 } 103 104 if (is_array($filtermap)) { 105 if (!isset($filtermap[$key])) continue; 106 $mapped = $filtermap[$key]; 107 } else { 108 $mapped = $key; 109 } 110 $result[$mapped] = (int)$cnt; 111 } 112 return $result; 113 } 114} 115