xref: /dokuwiki/inc/Search/Index/TupleOps.php (revision 06053dca2fac9a1da4eb1accf8c2488942da5d2a)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5/**
6 * Provides operations on tuple records used in our indexes
7 *
8 * Tuples consist of a key (typically a RID from another Index) and a number (usually a count).
9 * Used to store page <-> word counts for example
10 */
11class TupleOps
12{
13    /**
14     * Insert or replace a tuple in a line
15     *
16     * @param string $record This is the current row value to be modified
17     * @param int|string $key The foreign rid or identifier
18     * @param int $count The count to store
19     * @return string A new row value
20     * @author Tom N Harris <tnharris@whoopdedo.org>
21     */
22    public static function updateTuple(string $record, int|string $key, int $count): string
23    {
24        if ($record != '') {
25            // remove any current version of the tuple (with or without explicit count)
26            $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record);
27        }
28        $record = trim($record, ':');
29        if ($count) {
30            // Write tuples with frequency=1 without the asterisk
31            $tuple = ($count == 1) ? $key : "$key*$count";
32            if ($record !== '') {
33                return "$tuple:" . $record;
34            } else {
35                return $tuple;
36            }
37        }
38        return $record;
39    }
40
41    /**
42     * Sum the counts in a list of tuples
43     *
44     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
45     *
46     * @param string $record The row value to parse
47     * @return int sum of all counts
48     * @author Tom N Harris <tnharris@whoopdedo.org>
49     */
50    public static function aggregateTupleCounts(string $record): int
51    {
52        $freq = 0;
53        $parts = explode(':', $record);
54        foreach ($parts as $tuple) {
55            if ($tuple === '') continue;
56            if (str_contains($tuple, '*')) {
57                [/* $key */, $cnt] = explode('*', $tuple);
58                $freq += (int)$cnt;
59            } else {
60                // No explicit count means count of 1
61                $freq += 1;
62            }
63        }
64        return $freq;
65    }
66
67    /**
68     * Split a line into an array of tuples
69     *
70     * The given key of the given $filtermap defines which tuples to extract, the value
71     * gives the name in the output array. This basically allows to map RIDs to their
72     * respective real values. The result will contain the counts associated with the
73     * mapped keys.
74     *
75     * If no $filtermap is given (null), all tuples are returned keeping their original keys
76     *
77     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
78     *
79     * @param string $record The row value to parse
80     * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples
81     * @return array mapped counts
82     * @author Andreas Gohr <andi@splitbrain.org>
83     * @author Tom N Harris <tnharris@whoopdedo.org>
84     */
85    public static function parseTuples(string $record, ?array $filtermap = null): array
86    {
87        $result = [];
88        if ($record == '') return $result;
89        $parts = explode(':', $record);
90        foreach ($parts as $tuple) {
91            if ($tuple === '') continue;
92
93            // Handle both "key*count" and "key" formats
94            if (str_contains($tuple, '*')) {
95                [$key, $cnt] = explode('*', $tuple);
96                if (!$cnt) continue;
97            } else {
98                // No explicit count means count of 1
99                $key = $tuple;
100                $cnt = 1;
101            }
102
103            if (is_array($filtermap)) {
104                if (!isset($filtermap[$key])) continue;
105                $mapped = $filtermap[$key];
106            } else {
107                $mapped = $key;
108            }
109            $result[$mapped] = (int)$cnt;
110        }
111        return $result;
112    }
113}
114