xref: /dokuwiki/inc/Search/Index/TupleOps.php (revision db8be586414d0dc05ca5131baddfa84f08c55520)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5/**
6 * Provides operations on tuple records used in our indexes
7 *
8 * Tuples consist of a key (typically a RID from another Index) and a number (usually a count).
9 * Used to store page <-> word counts for example
10 */
11class TupleOps
12{
13    /**
14     * Insert or replace a tuple in a line
15     *
16     * @param string $record This is the current row value to be modified
17     * @param int|string $key The foreign rid or identifier
18     * @param int $count The count to store
19     * @return string A new row value
20     * @author Tom N Harris <tnharris@whoopdedo.org>
21     *
22     */
23    public static function updateTuple($record, $key, $count)
24    {
25        if ($record != '') {
26            // remove any current version of the tuple (with or without explicit count)
27            $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record);
28        }
29        $record = trim($record, ':');
30        if ($count) {
31            // Write tuples with frequency=1 without the asterisk
32            $tuple = ($count == 1) ? $key : "{$key}*{$count}";
33            if ($record !== '') {
34                return "{$tuple}:" . $record;
35            } else {
36                return $tuple;
37            }
38        }
39        return $record;
40    }
41
42    /**
43     * Sum the counts in a list of tuples
44     *
45     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
46     *
47     * @param string $record The row value to parse
48     * @return int sum of all counts
49     * @author Tom N Harris <tnharris@whoopdedo.org>
50     */
51    public static function aggregateTupleCounts($record)
52    {
53        $freq = 0;
54        $parts = explode(':', $record);
55        foreach ($parts as $tuple) {
56            if ($tuple === '') continue;
57            if (strpos($tuple, '*') !== false) {
58                [/* $key */, $cnt] = explode('*', $tuple);
59                $freq += (int)$cnt;
60            } else {
61                // No explicit count means count of 1
62                $freq += 1;
63            }
64        }
65        return $freq;
66    }
67
68    /**
69     * Split a line into an array of tuples
70     *
71     * The given key of the given $filtermap defines which tuples to extract, the value
72     * gives the name in the output array. This basically allows to map RIDs to their
73     * respective real values. The result will contain the counts associated with the
74     * mapped keys.
75     *
76     * If no $filtermap is given (null), all tuples are returned keeping their original keys
77     *
78     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
79     *
80     * @param string $record The row value to parse
81     * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples
82     * @return array mapped counts
83     * @author Andreas Gohr <andi@splitbrain.org>
84     * @author Tom N Harris <tnharris@whoopdedo.org>
85     */
86    public static function parseTuples($record, $filtermap = null)
87    {
88        $result = array();
89        if ($record == '') return $result;
90        $parts = explode(':', $record);
91        foreach ($parts as $tuple) {
92            if ($tuple === '') continue;
93
94            // Handle both "key*count" and "key" formats
95            if (strpos($tuple, '*') !== false) {
96                [$key, $cnt] = explode('*', $tuple);
97                if (!$cnt) continue;
98            } else {
99                // No explicit count means count of 1
100                $key = $tuple;
101                $cnt = 1;
102            }
103
104            if (is_array($filtermap)) {
105                if (!isset($filtermap[$key])) continue;
106                $mapped = $filtermap[$key];
107            } else {
108                $mapped = $key;
109            }
110            $result[$mapped] = (int)$cnt;
111        }
112        return $result;
113    }
114}
115