xref: /dokuwiki/inc/Search/Index/TupleOps.php (revision 9bd7d62f47cb0e2a7651fefd7106f6ac10625281)
1*9bd7d62fSAndreas Gohr<?php
2*9bd7d62fSAndreas Gohr
3*9bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
4*9bd7d62fSAndreas Gohr
5*9bd7d62fSAndreas Gohr/**
6*9bd7d62fSAndreas Gohr * Provides operations on tuple records used in our indexes
7*9bd7d62fSAndreas Gohr *
8*9bd7d62fSAndreas Gohr * Tuples consist of a key (typically a RID from another Index) and a number (usually a count).
9*9bd7d62fSAndreas Gohr * Used to store page <-> word counts for example
10*9bd7d62fSAndreas Gohr */
11*9bd7d62fSAndreas Gohrclass TupleOps
12*9bd7d62fSAndreas Gohr{
13*9bd7d62fSAndreas Gohr    /**
14*9bd7d62fSAndreas Gohr     * Insert or replace a tuple in a line
15*9bd7d62fSAndreas Gohr     *
16*9bd7d62fSAndreas Gohr     * @param string $record This is the current row value to be modified
17*9bd7d62fSAndreas Gohr     * @param int|string $key The foreign rid or identifier
18*9bd7d62fSAndreas Gohr     * @param int $count The count to store
19*9bd7d62fSAndreas Gohr     * @return string A new row value
20*9bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
21*9bd7d62fSAndreas Gohr     *
22*9bd7d62fSAndreas Gohr     */
23*9bd7d62fSAndreas Gohr    public static function updateTuple($record, $key, $count)
24*9bd7d62fSAndreas Gohr    {
25*9bd7d62fSAndreas Gohr        if ($record != '') {
26*9bd7d62fSAndreas Gohr            // remove any current version of the tuple
27*9bd7d62fSAndreas Gohr            $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '\*\d*/', '', $record);
28*9bd7d62fSAndreas Gohr        }
29*9bd7d62fSAndreas Gohr        $record = trim($record, ':');
30*9bd7d62fSAndreas Gohr        if ($count) {
31*9bd7d62fSAndreas Gohr            if ($record) {
32*9bd7d62fSAndreas Gohr                return "{$key}*{$count}:" . $record;
33*9bd7d62fSAndreas Gohr            } else {
34*9bd7d62fSAndreas Gohr                return "{$key}*{$count}";
35*9bd7d62fSAndreas Gohr            }
36*9bd7d62fSAndreas Gohr        }
37*9bd7d62fSAndreas Gohr        return $record;
38*9bd7d62fSAndreas Gohr    }
39*9bd7d62fSAndreas Gohr
40*9bd7d62fSAndreas Gohr    /**
41*9bd7d62fSAndreas Gohr     * Sum the counts in a list of tuples
42*9bd7d62fSAndreas Gohr     *
43*9bd7d62fSAndreas Gohr     * @param string $record The row value to parse
44*9bd7d62fSAndreas Gohr     * @return int sum of all counts
45*9bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
46*9bd7d62fSAndreas Gohr     */
47*9bd7d62fSAndreas Gohr    public static function aggregateTupleCounts($record)
48*9bd7d62fSAndreas Gohr    {
49*9bd7d62fSAndreas Gohr        $freq = 0;
50*9bd7d62fSAndreas Gohr        $parts = explode(':', $record);
51*9bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
52*9bd7d62fSAndreas Gohr            if ($tuple === '') continue;
53*9bd7d62fSAndreas Gohr            list(/* $key */, $cnt) = explode('*', $tuple);
54*9bd7d62fSAndreas Gohr            $freq += (int)$cnt;
55*9bd7d62fSAndreas Gohr        }
56*9bd7d62fSAndreas Gohr        return $freq;
57*9bd7d62fSAndreas Gohr    }
58*9bd7d62fSAndreas Gohr
59*9bd7d62fSAndreas Gohr    /**
60*9bd7d62fSAndreas Gohr     * Split a line into an array of tuples
61*9bd7d62fSAndreas Gohr     *
62*9bd7d62fSAndreas Gohr     * The given key of the given $filtermap defines which tuples to extract, the value
63*9bd7d62fSAndreas Gohr     * gives the name in the output array. This basically allows to map RIDs to their
64*9bd7d62fSAndreas Gohr     * respective real values. The result will contain the counts associated with the
65*9bd7d62fSAndreas Gohr     * mapped keys.
66*9bd7d62fSAndreas Gohr     *
67*9bd7d62fSAndreas Gohr     * @param string $record The row value to parse
68*9bd7d62fSAndreas Gohr     * @param array $filtermap Associative array of ($key => $mapping)
69*9bd7d62fSAndreas Gohr     * @return array mapped counts
70*9bd7d62fSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
71*9bd7d62fSAndreas Gohr     *
72*9bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
73*9bd7d62fSAndreas Gohr     */
74*9bd7d62fSAndreas Gohr    public static function parseTuples($record, $filtermap)
75*9bd7d62fSAndreas Gohr    {
76*9bd7d62fSAndreas Gohr        $result = array();
77*9bd7d62fSAndreas Gohr        if ($record == '') return $result;
78*9bd7d62fSAndreas Gohr        $parts = explode(':', $record);
79*9bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
80*9bd7d62fSAndreas Gohr            if ($tuple === '') continue;
81*9bd7d62fSAndreas Gohr            list($key, $cnt) = explode('*', $tuple);
82*9bd7d62fSAndreas Gohr            if (!$cnt) continue;
83*9bd7d62fSAndreas Gohr            if (empty($filtermap[$key])) continue;
84*9bd7d62fSAndreas Gohr            $mapped = $filtermap[$key];
85*9bd7d62fSAndreas Gohr            $result[$mapped] = $cnt;
86*9bd7d62fSAndreas Gohr        }
87*9bd7d62fSAndreas Gohr        return $result;
88*9bd7d62fSAndreas Gohr    }
89*9bd7d62fSAndreas Gohr}
90