19bd7d62fSAndreas Gohr<?php 29bd7d62fSAndreas Gohr 39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index; 49bd7d62fSAndreas Gohr 59bd7d62fSAndreas Gohr/** 69bd7d62fSAndreas Gohr * Provides operations on tuple records used in our indexes 79bd7d62fSAndreas Gohr * 89bd7d62fSAndreas Gohr * Tuples consist of a key (typically a RID from another Index) and a number (usually a count). 99bd7d62fSAndreas Gohr * Used to store page <-> word counts for example 109bd7d62fSAndreas Gohr */ 119bd7d62fSAndreas Gohrclass TupleOps 129bd7d62fSAndreas Gohr{ 139bd7d62fSAndreas Gohr /** 149bd7d62fSAndreas Gohr * Insert or replace a tuple in a line 159bd7d62fSAndreas Gohr * 169bd7d62fSAndreas Gohr * @param string $record This is the current row value to be modified 179bd7d62fSAndreas Gohr * @param int|string $key The foreign rid or identifier 189bd7d62fSAndreas Gohr * @param int $count The count to store 199bd7d62fSAndreas Gohr * @return string A new row value 209bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 219bd7d62fSAndreas Gohr */ 22*9369b4a9SAndreas Gohr public static function updateTuple(string $record, int|string $key, int $count): string 239bd7d62fSAndreas Gohr { 249bd7d62fSAndreas Gohr if ($record != '') { 25bc997a9dSAndreas Gohr // remove any current version of the tuple (with or without explicit count) 26bc997a9dSAndreas Gohr $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record); 279bd7d62fSAndreas Gohr } 289bd7d62fSAndreas Gohr $record = trim($record, ':'); 299bd7d62fSAndreas Gohr if ($count) { 30bc997a9dSAndreas Gohr // Write tuples with frequency=1 without the asterisk 31*9369b4a9SAndreas Gohr $tuple = ($count == 1) ? $key : "$key*$count"; 32bc997a9dSAndreas Gohr if ($record !== '') { 33*9369b4a9SAndreas Gohr return "$tuple:" . $record; 349bd7d62fSAndreas Gohr } else { 35bc997a9dSAndreas Gohr return $tuple; 369bd7d62fSAndreas Gohr } 379bd7d62fSAndreas Gohr } 389bd7d62fSAndreas Gohr return $record; 399bd7d62fSAndreas Gohr } 409bd7d62fSAndreas Gohr 419bd7d62fSAndreas Gohr /** 429bd7d62fSAndreas Gohr * Sum the counts in a list of tuples 439bd7d62fSAndreas Gohr * 44bc997a9dSAndreas Gohr * Tuples can be in format "key*count" or just "key" (implicit count of 1) 45bc997a9dSAndreas Gohr * 469bd7d62fSAndreas Gohr * @param string $record The row value to parse 479bd7d62fSAndreas Gohr * @return int sum of all counts 489bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 499bd7d62fSAndreas Gohr */ 50*9369b4a9SAndreas Gohr public static function aggregateTupleCounts(string $record): int 519bd7d62fSAndreas Gohr { 529bd7d62fSAndreas Gohr $freq = 0; 539bd7d62fSAndreas Gohr $parts = explode(':', $record); 549bd7d62fSAndreas Gohr foreach ($parts as $tuple) { 559bd7d62fSAndreas Gohr if ($tuple === '') continue; 56*9369b4a9SAndreas Gohr if (str_contains($tuple, '*')) { 57bc997a9dSAndreas Gohr [/* $key */, $cnt] = explode('*', $tuple); 589bd7d62fSAndreas Gohr $freq += (int)$cnt; 59bc997a9dSAndreas Gohr } else { 60bc997a9dSAndreas Gohr // No explicit count means count of 1 61bc997a9dSAndreas Gohr $freq += 1; 62bc997a9dSAndreas Gohr } 639bd7d62fSAndreas Gohr } 649bd7d62fSAndreas Gohr return $freq; 659bd7d62fSAndreas Gohr } 669bd7d62fSAndreas Gohr 679bd7d62fSAndreas Gohr /** 689bd7d62fSAndreas Gohr * Split a line into an array of tuples 699bd7d62fSAndreas Gohr * 709bd7d62fSAndreas Gohr * The given key of the given $filtermap defines which tuples to extract, the value 719bd7d62fSAndreas Gohr * gives the name in the output array. This basically allows to map RIDs to their 729bd7d62fSAndreas Gohr * respective real values. The result will contain the counts associated with the 739bd7d62fSAndreas Gohr * mapped keys. 749bd7d62fSAndreas Gohr * 75596d5287SAndreas Gohr * If no $filtermap is given (null), all tuples are returned keeping their original keys 76596d5287SAndreas Gohr * 77bc997a9dSAndreas Gohr * Tuples can be in format "key*count" or just "key" (implicit count of 1) 78bc997a9dSAndreas Gohr * 799bd7d62fSAndreas Gohr * @param string $record The row value to parse 80596d5287SAndreas Gohr * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples 819bd7d62fSAndreas Gohr * @return array mapped counts 829bd7d62fSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org> 839bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 849bd7d62fSAndreas Gohr */ 85*9369b4a9SAndreas Gohr public static function parseTuples(string $record, ?array $filtermap = null): array 869bd7d62fSAndreas Gohr { 87*9369b4a9SAndreas Gohr $result = []; 889bd7d62fSAndreas Gohr if ($record == '') return $result; 899bd7d62fSAndreas Gohr $parts = explode(':', $record); 909bd7d62fSAndreas Gohr foreach ($parts as $tuple) { 919bd7d62fSAndreas Gohr if ($tuple === '') continue; 92bc997a9dSAndreas Gohr 93bc997a9dSAndreas Gohr // Handle both "key*count" and "key" formats 94*9369b4a9SAndreas Gohr if (str_contains($tuple, '*')) { 95bc997a9dSAndreas Gohr [$key, $cnt] = explode('*', $tuple); 969bd7d62fSAndreas Gohr if (!$cnt) continue; 97bc997a9dSAndreas Gohr } else { 98bc997a9dSAndreas Gohr // No explicit count means count of 1 99bc997a9dSAndreas Gohr $key = $tuple; 100bc997a9dSAndreas Gohr $cnt = 1; 101bc997a9dSAndreas Gohr } 102bc997a9dSAndreas Gohr 103596d5287SAndreas Gohr if (is_array($filtermap)) { 104596d5287SAndreas Gohr if (!isset($filtermap[$key])) continue; 1059bd7d62fSAndreas Gohr $mapped = $filtermap[$key]; 106596d5287SAndreas Gohr } else { 107596d5287SAndreas Gohr $mapped = $key; 108596d5287SAndreas Gohr } 109db8be586SAndreas Gohr $result[$mapped] = (int)$cnt; 1109bd7d62fSAndreas Gohr } 1119bd7d62fSAndreas Gohr return $result; 1129bd7d62fSAndreas Gohr } 1139bd7d62fSAndreas Gohr} 114