xref: /dokuwiki/inc/Search/Index/TupleOps.php (revision 9369b4a991666bc911474806b106d8958e79f4c1)
19bd7d62fSAndreas Gohr<?php
29bd7d62fSAndreas Gohr
39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
49bd7d62fSAndreas Gohr
59bd7d62fSAndreas Gohr/**
69bd7d62fSAndreas Gohr * Provides operations on tuple records used in our indexes
79bd7d62fSAndreas Gohr *
89bd7d62fSAndreas Gohr * Tuples consist of a key (typically a RID from another Index) and a number (usually a count).
99bd7d62fSAndreas Gohr * Used to store page <-> word counts for example
109bd7d62fSAndreas Gohr */
119bd7d62fSAndreas Gohrclass TupleOps
129bd7d62fSAndreas Gohr{
139bd7d62fSAndreas Gohr    /**
149bd7d62fSAndreas Gohr     * Insert or replace a tuple in a line
159bd7d62fSAndreas Gohr     *
169bd7d62fSAndreas Gohr     * @param string $record This is the current row value to be modified
179bd7d62fSAndreas Gohr     * @param int|string $key The foreign rid or identifier
189bd7d62fSAndreas Gohr     * @param int $count The count to store
199bd7d62fSAndreas Gohr     * @return string A new row value
209bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
219bd7d62fSAndreas Gohr     */
22*9369b4a9SAndreas Gohr    public static function updateTuple(string $record, int|string $key, int $count): string
239bd7d62fSAndreas Gohr    {
249bd7d62fSAndreas Gohr        if ($record != '') {
25bc997a9dSAndreas Gohr            // remove any current version of the tuple (with or without explicit count)
26bc997a9dSAndreas Gohr            $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record);
279bd7d62fSAndreas Gohr        }
289bd7d62fSAndreas Gohr        $record = trim($record, ':');
299bd7d62fSAndreas Gohr        if ($count) {
30bc997a9dSAndreas Gohr            // Write tuples with frequency=1 without the asterisk
31*9369b4a9SAndreas Gohr            $tuple = ($count == 1) ? $key : "$key*$count";
32bc997a9dSAndreas Gohr            if ($record !== '') {
33*9369b4a9SAndreas Gohr                return "$tuple:" . $record;
349bd7d62fSAndreas Gohr            } else {
35bc997a9dSAndreas Gohr                return $tuple;
369bd7d62fSAndreas Gohr            }
379bd7d62fSAndreas Gohr        }
389bd7d62fSAndreas Gohr        return $record;
399bd7d62fSAndreas Gohr    }
409bd7d62fSAndreas Gohr
419bd7d62fSAndreas Gohr    /**
429bd7d62fSAndreas Gohr     * Sum the counts in a list of tuples
439bd7d62fSAndreas Gohr     *
44bc997a9dSAndreas Gohr     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
45bc997a9dSAndreas Gohr     *
469bd7d62fSAndreas Gohr     * @param string $record The row value to parse
479bd7d62fSAndreas Gohr     * @return int sum of all counts
489bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
499bd7d62fSAndreas Gohr     */
50*9369b4a9SAndreas Gohr    public static function aggregateTupleCounts(string $record): int
519bd7d62fSAndreas Gohr    {
529bd7d62fSAndreas Gohr        $freq = 0;
539bd7d62fSAndreas Gohr        $parts = explode(':', $record);
549bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
559bd7d62fSAndreas Gohr            if ($tuple === '') continue;
56*9369b4a9SAndreas Gohr            if (str_contains($tuple, '*')) {
57bc997a9dSAndreas Gohr                [/* $key */, $cnt] = explode('*', $tuple);
589bd7d62fSAndreas Gohr                $freq += (int)$cnt;
59bc997a9dSAndreas Gohr            } else {
60bc997a9dSAndreas Gohr                // No explicit count means count of 1
61bc997a9dSAndreas Gohr                $freq += 1;
62bc997a9dSAndreas Gohr            }
639bd7d62fSAndreas Gohr        }
649bd7d62fSAndreas Gohr        return $freq;
659bd7d62fSAndreas Gohr    }
669bd7d62fSAndreas Gohr
679bd7d62fSAndreas Gohr    /**
689bd7d62fSAndreas Gohr     * Split a line into an array of tuples
699bd7d62fSAndreas Gohr     *
709bd7d62fSAndreas Gohr     * The given key of the given $filtermap defines which tuples to extract, the value
719bd7d62fSAndreas Gohr     * gives the name in the output array. This basically allows to map RIDs to their
729bd7d62fSAndreas Gohr     * respective real values. The result will contain the counts associated with the
739bd7d62fSAndreas Gohr     * mapped keys.
749bd7d62fSAndreas Gohr     *
75596d5287SAndreas Gohr     * If no $filtermap is given (null), all tuples are returned keeping their original keys
76596d5287SAndreas Gohr     *
77bc997a9dSAndreas Gohr     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
78bc997a9dSAndreas Gohr     *
799bd7d62fSAndreas Gohr     * @param string $record The row value to parse
80596d5287SAndreas Gohr     * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples
819bd7d62fSAndreas Gohr     * @return array mapped counts
829bd7d62fSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
839bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
849bd7d62fSAndreas Gohr     */
85*9369b4a9SAndreas Gohr    public static function parseTuples(string $record, ?array $filtermap = null): array
869bd7d62fSAndreas Gohr    {
87*9369b4a9SAndreas Gohr        $result = [];
889bd7d62fSAndreas Gohr        if ($record == '') return $result;
899bd7d62fSAndreas Gohr        $parts = explode(':', $record);
909bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
919bd7d62fSAndreas Gohr            if ($tuple === '') continue;
92bc997a9dSAndreas Gohr
93bc997a9dSAndreas Gohr            // Handle both "key*count" and "key" formats
94*9369b4a9SAndreas Gohr            if (str_contains($tuple, '*')) {
95bc997a9dSAndreas Gohr                [$key, $cnt] = explode('*', $tuple);
969bd7d62fSAndreas Gohr                if (!$cnt) continue;
97bc997a9dSAndreas Gohr            } else {
98bc997a9dSAndreas Gohr                // No explicit count means count of 1
99bc997a9dSAndreas Gohr                $key = $tuple;
100bc997a9dSAndreas Gohr                $cnt = 1;
101bc997a9dSAndreas Gohr            }
102bc997a9dSAndreas Gohr
103596d5287SAndreas Gohr            if (is_array($filtermap)) {
104596d5287SAndreas Gohr                if (!isset($filtermap[$key])) continue;
1059bd7d62fSAndreas Gohr                $mapped = $filtermap[$key];
106596d5287SAndreas Gohr            } else {
107596d5287SAndreas Gohr                $mapped = $key;
108596d5287SAndreas Gohr            }
109db8be586SAndreas Gohr            $result[$mapped] = (int)$cnt;
1109bd7d62fSAndreas Gohr        }
1119bd7d62fSAndreas Gohr        return $result;
1129bd7d62fSAndreas Gohr    }
1139bd7d62fSAndreas Gohr}
114