19bd7d62fSAndreas Gohr<?php 29bd7d62fSAndreas Gohr 39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index; 49bd7d62fSAndreas Gohr 59bd7d62fSAndreas Gohr/** 69bd7d62fSAndreas Gohr * Provides operations on tuple records used in our indexes 79bd7d62fSAndreas Gohr * 89bd7d62fSAndreas Gohr * Tuples consist of a key (typically a RID from another Index) and a number (usually a count). 99bd7d62fSAndreas Gohr * Used to store page <-> word counts for example 109bd7d62fSAndreas Gohr */ 119bd7d62fSAndreas Gohrclass TupleOps 129bd7d62fSAndreas Gohr{ 139bd7d62fSAndreas Gohr /** 149bd7d62fSAndreas Gohr * Insert or replace a tuple in a line 159bd7d62fSAndreas Gohr * 169bd7d62fSAndreas Gohr * @param string $record This is the current row value to be modified 179bd7d62fSAndreas Gohr * @param int|string $key The foreign rid or identifier 189bd7d62fSAndreas Gohr * @param int $count The count to store 199bd7d62fSAndreas Gohr * @return string A new row value 209bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 219bd7d62fSAndreas Gohr * 229bd7d62fSAndreas Gohr */ 239bd7d62fSAndreas Gohr public static function updateTuple($record, $key, $count) 249bd7d62fSAndreas Gohr { 259bd7d62fSAndreas Gohr if ($record != '') { 26bc997a9dSAndreas Gohr // remove any current version of the tuple (with or without explicit count) 27bc997a9dSAndreas Gohr $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record); 289bd7d62fSAndreas Gohr } 299bd7d62fSAndreas Gohr $record = trim($record, ':'); 309bd7d62fSAndreas Gohr if ($count) { 31bc997a9dSAndreas Gohr // Write tuples with frequency=1 without the asterisk 32bc997a9dSAndreas Gohr $tuple = ($count == 1) ? $key : "{$key}*{$count}"; 33bc997a9dSAndreas Gohr if ($record !== '') { 34bc997a9dSAndreas Gohr return "{$tuple}:" . $record; 359bd7d62fSAndreas Gohr } else { 36bc997a9dSAndreas Gohr return $tuple; 379bd7d62fSAndreas Gohr } 389bd7d62fSAndreas Gohr } 399bd7d62fSAndreas Gohr return $record; 409bd7d62fSAndreas Gohr } 419bd7d62fSAndreas Gohr 429bd7d62fSAndreas Gohr /** 439bd7d62fSAndreas Gohr * Sum the counts in a list of tuples 449bd7d62fSAndreas Gohr * 45bc997a9dSAndreas Gohr * Tuples can be in format "key*count" or just "key" (implicit count of 1) 46bc997a9dSAndreas Gohr * 479bd7d62fSAndreas Gohr * @param string $record The row value to parse 489bd7d62fSAndreas Gohr * @return int sum of all counts 499bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 509bd7d62fSAndreas Gohr */ 519bd7d62fSAndreas Gohr public static function aggregateTupleCounts($record) 529bd7d62fSAndreas Gohr { 539bd7d62fSAndreas Gohr $freq = 0; 549bd7d62fSAndreas Gohr $parts = explode(':', $record); 559bd7d62fSAndreas Gohr foreach ($parts as $tuple) { 569bd7d62fSAndreas Gohr if ($tuple === '') continue; 57bc997a9dSAndreas Gohr if (strpos($tuple, '*') !== false) { 58bc997a9dSAndreas Gohr [/* $key */, $cnt] = explode('*', $tuple); 599bd7d62fSAndreas Gohr $freq += (int)$cnt; 60bc997a9dSAndreas Gohr } else { 61bc997a9dSAndreas Gohr // No explicit count means count of 1 62bc997a9dSAndreas Gohr $freq += 1; 63bc997a9dSAndreas Gohr } 649bd7d62fSAndreas Gohr } 659bd7d62fSAndreas Gohr return $freq; 669bd7d62fSAndreas Gohr } 679bd7d62fSAndreas Gohr 689bd7d62fSAndreas Gohr /** 699bd7d62fSAndreas Gohr * Split a line into an array of tuples 709bd7d62fSAndreas Gohr * 719bd7d62fSAndreas Gohr * The given key of the given $filtermap defines which tuples to extract, the value 729bd7d62fSAndreas Gohr * gives the name in the output array. This basically allows to map RIDs to their 739bd7d62fSAndreas Gohr * respective real values. The result will contain the counts associated with the 749bd7d62fSAndreas Gohr * mapped keys. 759bd7d62fSAndreas Gohr * 76596d5287SAndreas Gohr * If no $filtermap is given (null), all tuples are returned keeping their original keys 77596d5287SAndreas Gohr * 78bc997a9dSAndreas Gohr * Tuples can be in format "key*count" or just "key" (implicit count of 1) 79bc997a9dSAndreas Gohr * 809bd7d62fSAndreas Gohr * @param string $record The row value to parse 81596d5287SAndreas Gohr * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples 829bd7d62fSAndreas Gohr * @return array mapped counts 839bd7d62fSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org> 849bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 859bd7d62fSAndreas Gohr */ 86596d5287SAndreas Gohr public static function parseTuples($record, $filtermap = null) 879bd7d62fSAndreas Gohr { 889bd7d62fSAndreas Gohr $result = array(); 899bd7d62fSAndreas Gohr if ($record == '') return $result; 909bd7d62fSAndreas Gohr $parts = explode(':', $record); 919bd7d62fSAndreas Gohr foreach ($parts as $tuple) { 929bd7d62fSAndreas Gohr if ($tuple === '') continue; 93bc997a9dSAndreas Gohr 94bc997a9dSAndreas Gohr // Handle both "key*count" and "key" formats 95bc997a9dSAndreas Gohr if (strpos($tuple, '*') !== false) { 96bc997a9dSAndreas Gohr [$key, $cnt] = explode('*', $tuple); 979bd7d62fSAndreas Gohr if (!$cnt) continue; 98bc997a9dSAndreas Gohr } else { 99bc997a9dSAndreas Gohr // No explicit count means count of 1 100bc997a9dSAndreas Gohr $key = $tuple; 101bc997a9dSAndreas Gohr $cnt = 1; 102bc997a9dSAndreas Gohr } 103bc997a9dSAndreas Gohr 104596d5287SAndreas Gohr if (is_array($filtermap)) { 105596d5287SAndreas Gohr if (!isset($filtermap[$key])) continue; 1069bd7d62fSAndreas Gohr $mapped = $filtermap[$key]; 107596d5287SAndreas Gohr } else { 108596d5287SAndreas Gohr $mapped = $key; 109596d5287SAndreas Gohr } 110*db8be586SAndreas Gohr $result[$mapped] = (int)$cnt; 1119bd7d62fSAndreas Gohr } 1129bd7d62fSAndreas Gohr return $result; 1139bd7d62fSAndreas Gohr } 1149bd7d62fSAndreas Gohr} 115