xref: /dokuwiki/inc/Search/Index/TupleOps.php (revision bc997a9db2138e341357040f232af36eb18589e7)
19bd7d62fSAndreas Gohr<?php
29bd7d62fSAndreas Gohr
39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
49bd7d62fSAndreas Gohr
59bd7d62fSAndreas Gohr/**
69bd7d62fSAndreas Gohr * Provides operations on tuple records used in our indexes
79bd7d62fSAndreas Gohr *
89bd7d62fSAndreas Gohr * Tuples consist of a key (typically a RID from another Index) and a number (usually a count).
99bd7d62fSAndreas Gohr * Used to store page <-> word counts for example
109bd7d62fSAndreas Gohr */
119bd7d62fSAndreas Gohrclass TupleOps
129bd7d62fSAndreas Gohr{
139bd7d62fSAndreas Gohr    /**
149bd7d62fSAndreas Gohr     * Insert or replace a tuple in a line
159bd7d62fSAndreas Gohr     *
169bd7d62fSAndreas Gohr     * @param string $record This is the current row value to be modified
179bd7d62fSAndreas Gohr     * @param int|string $key The foreign rid or identifier
189bd7d62fSAndreas Gohr     * @param int $count The count to store
199bd7d62fSAndreas Gohr     * @return string A new row value
209bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
219bd7d62fSAndreas Gohr     *
229bd7d62fSAndreas Gohr     */
239bd7d62fSAndreas Gohr    public static function updateTuple($record, $key, $count)
249bd7d62fSAndreas Gohr    {
259bd7d62fSAndreas Gohr        if ($record != '') {
26*bc997a9dSAndreas Gohr            // remove any current version of the tuple (with or without explicit count)
27*bc997a9dSAndreas Gohr            $record = preg_replace('/(^|:)' . preg_quote($key, '/') . '(\*\d+)?/', '', $record);
289bd7d62fSAndreas Gohr        }
299bd7d62fSAndreas Gohr        $record = trim($record, ':');
309bd7d62fSAndreas Gohr        if ($count) {
31*bc997a9dSAndreas Gohr            // Write tuples with frequency=1 without the asterisk
32*bc997a9dSAndreas Gohr            $tuple = ($count == 1) ? $key : "{$key}*{$count}";
33*bc997a9dSAndreas Gohr            if ($record !== '') {
34*bc997a9dSAndreas Gohr                return "{$tuple}:" . $record;
359bd7d62fSAndreas Gohr            } else {
36*bc997a9dSAndreas Gohr                return $tuple;
379bd7d62fSAndreas Gohr            }
389bd7d62fSAndreas Gohr        }
399bd7d62fSAndreas Gohr        return $record;
409bd7d62fSAndreas Gohr    }
419bd7d62fSAndreas Gohr
429bd7d62fSAndreas Gohr    /**
439bd7d62fSAndreas Gohr     * Sum the counts in a list of tuples
449bd7d62fSAndreas Gohr     *
45*bc997a9dSAndreas Gohr     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
46*bc997a9dSAndreas Gohr     *
479bd7d62fSAndreas Gohr     * @param string $record The row value to parse
489bd7d62fSAndreas Gohr     * @return int sum of all counts
499bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
509bd7d62fSAndreas Gohr     */
519bd7d62fSAndreas Gohr    public static function aggregateTupleCounts($record)
529bd7d62fSAndreas Gohr    {
539bd7d62fSAndreas Gohr        $freq = 0;
549bd7d62fSAndreas Gohr        $parts = explode(':', $record);
559bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
569bd7d62fSAndreas Gohr            if ($tuple === '') continue;
57*bc997a9dSAndreas Gohr            if (strpos($tuple, '*') !== false) {
58*bc997a9dSAndreas Gohr                [/* $key */, $cnt] = explode('*', $tuple);
599bd7d62fSAndreas Gohr                $freq += (int)$cnt;
60*bc997a9dSAndreas Gohr            } else {
61*bc997a9dSAndreas Gohr                // No explicit count means count of 1
62*bc997a9dSAndreas Gohr                $freq += 1;
63*bc997a9dSAndreas Gohr            }
649bd7d62fSAndreas Gohr        }
659bd7d62fSAndreas Gohr        return $freq;
669bd7d62fSAndreas Gohr    }
679bd7d62fSAndreas Gohr
689bd7d62fSAndreas Gohr    /**
699bd7d62fSAndreas Gohr     * Split a line into an array of tuples
709bd7d62fSAndreas Gohr     *
719bd7d62fSAndreas Gohr     * The given key of the given $filtermap defines which tuples to extract, the value
729bd7d62fSAndreas Gohr     * gives the name in the output array. This basically allows to map RIDs to their
739bd7d62fSAndreas Gohr     * respective real values. The result will contain the counts associated with the
749bd7d62fSAndreas Gohr     * mapped keys.
759bd7d62fSAndreas Gohr     *
76596d5287SAndreas Gohr     * If no $filtermap is given (null), all tuples are returned keeping their original keys
77596d5287SAndreas Gohr     *
78*bc997a9dSAndreas Gohr     * Tuples can be in format "key*count" or just "key" (implicit count of 1)
79*bc997a9dSAndreas Gohr     *
809bd7d62fSAndreas Gohr     * @param string $record The row value to parse
81596d5287SAndreas Gohr     * @param array|null $filtermap Associative array of ($key => $mapping), null for all tuples
829bd7d62fSAndreas Gohr     * @return array mapped counts
839bd7d62fSAndreas Gohr     * @author Andreas Gohr <andi@splitbrain.org>
849bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
859bd7d62fSAndreas Gohr     */
86596d5287SAndreas Gohr    public static function parseTuples($record, $filtermap = null)
879bd7d62fSAndreas Gohr    {
889bd7d62fSAndreas Gohr        $result = array();
899bd7d62fSAndreas Gohr        if ($record == '') return $result;
909bd7d62fSAndreas Gohr        $parts = explode(':', $record);
919bd7d62fSAndreas Gohr        foreach ($parts as $tuple) {
929bd7d62fSAndreas Gohr            if ($tuple === '') continue;
93*bc997a9dSAndreas Gohr
94*bc997a9dSAndreas Gohr            // Handle both "key*count" and "key" formats
95*bc997a9dSAndreas Gohr            if (strpos($tuple, '*') !== false) {
96*bc997a9dSAndreas Gohr                [$key, $cnt] = explode('*', $tuple);
979bd7d62fSAndreas Gohr                if (!$cnt) continue;
98*bc997a9dSAndreas Gohr            } else {
99*bc997a9dSAndreas Gohr                // No explicit count means count of 1
100*bc997a9dSAndreas Gohr                $key = $tuple;
101*bc997a9dSAndreas Gohr                $cnt = 1;
102*bc997a9dSAndreas Gohr            }
103*bc997a9dSAndreas Gohr
104596d5287SAndreas Gohr            if (is_array($filtermap)) {
105596d5287SAndreas Gohr                if (!isset($filtermap[$key])) continue;
1069bd7d62fSAndreas Gohr                $mapped = $filtermap[$key];
107596d5287SAndreas Gohr            } else {
108596d5287SAndreas Gohr                $mapped = $key;
109596d5287SAndreas Gohr            }
1109bd7d62fSAndreas Gohr            $result[$mapped] = $cnt;
1119bd7d62fSAndreas Gohr        }
1129bd7d62fSAndreas Gohr        return $result;
1139bd7d62fSAndreas Gohr    }
1149bd7d62fSAndreas Gohr}
115