1<?php 2 3namespace dokuwiki\Search\Index; 4 5/** 6 * Provides operations on tuple records used in our indexes 7 * 8 * Tuples consist of a key (typically a RID from another Index) and a number (usually a count). 9 * Used to store page <-> word counts for example 10 */ 11class TupleOps 12{ 13 /** 14 * Insert or replace a tuple in a line 15 * 16 * @param string $record This is the current row value to be modified 17 * @param int|string $key The foreign rid or identifier 18 * @param int $count The count to store 19 * @return string A new row value 20 * @author Tom N Harris <tnharris@whoopdedo.org> 21 */ 22 public static function updateTuple(string $record, int|string $key, int $count): string 23 { 24 if ($record != '') { 25 // remove any current version of the tuple (with or without explicit count) 26 $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record); 27 } 28 $record = trim($record, ':'); 29 if ($count) { 30 // Write tuples with frequency=1 without the asterisk 31 $tuple = ($count == 1) ? $key : "$key*$count"; 32 if ($record !== '') { 33 return "$tuple:" . $record; 34 } else { 35 return $tuple; 36 } 37 } 38 return $record; 39 } 40 41 /** 42 * Sum the counts in a list of tuples 43 * 44 * Tuples can be in format "key*count" or just "key" (implicit count of 1) 45 * 46 * @param string $record The row value to parse 47 * @return int sum of all counts 48 * @author Tom N Harris <tnharris@whoopdedo.org> 49 */ 50 public static function aggregateTupleCounts(string $record): int 51 { 52 $freq = 0; 53 $parts = explode(':', $record); 54 foreach ($parts as $tuple) { 55 if ($tuple === '') continue; 56 if (str_contains($tuple, '*')) { 57 [/* $key */, $cnt] = explode('*', $tuple); 58 $freq += (int)$cnt; 59 } else { 60 // No explicit count means count of 1 61 $freq += 1; 62 } 63 } 64 return $freq; 65 } 66 67 /** 68 * Split a line into an array of tuples 69 * 70 * The given key of the given $filtermap defines which tuples to extract, the value 71 * gives the name in the output array. This basically allows to map RIDs to their 72 * respective real values. The result will contain the counts associated with the 73 * mapped keys. 74 * 75 * If no $filtermap is given (null), all tuples are returned keeping their original keys 76 * 77 * Tuples can be in format "key*count" or just "key" (implicit count of 1) 78 * 79 * @param string $record The row value to parse 80 * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples 81 * @return array mapped counts 82 * @author Andreas Gohr <andi@splitbrain.org> 83 * @author Tom N Harris <tnharris@whoopdedo.org> 84 */ 85 public static function parseTuples(string $record, ?array $filtermap = null): array 86 { 87 $result = []; 88 if ($record == '') return $result; 89 $parts = explode(':', $record); 90 foreach ($parts as $tuple) { 91 if ($tuple === '') continue; 92 93 // Handle both "key*count" and "key" formats 94 if (str_contains($tuple, '*')) { 95 [$key, $cnt] = explode('*', $tuple); 96 if (!$cnt) continue; 97 } else { 98 // No explicit count means count of 1 99 $key = $tuple; 100 $cnt = 1; 101 } 102 103 if (is_array($filtermap)) { 104 if (!isset($filtermap[$key])) continue; 105 $mapped = $filtermap[$key]; 106 } else { 107 $mapped = $key; 108 } 109 $result[$mapped] = (int)$cnt; 110 } 111 return $result; 112 } 113} 114