10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface; 9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory; 1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 120337f47fSAndreas Gohr 130337f47fSAndreas Gohr/** 140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 150337f47fSAndreas Gohr * 160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 170337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 180337f47fSAndreas Gohr */ 197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 200337f47fSAndreas Gohr{ 21c2b7a1f7SAndreas Gohr /** @var ModelFactory */ 22c2b7a1f7SAndreas Gohr public $factory; 23c2b7a1f7SAndreas Gohr 243379af09SAndreas Gohr /** @var CLIPlugin $logger */ 253379af09SAndreas Gohr protected $logger; 26c2b7a1f7SAndreas Gohr 270337f47fSAndreas Gohr /** @var Embeddings */ 280337f47fSAndreas Gohr protected $embeddings; 2901f06932SAndreas Gohr /** @var AbstractStorage */ 3001f06932SAndreas Gohr protected $storage; 310337f47fSAndreas Gohr 32e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 33e75dc39fSAndreas Gohr protected $runDataFile; 34e75dc39fSAndreas Gohr 3551aa8517SAndreas Gohr 360337f47fSAndreas Gohr /** 37f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 38f8d5ae01SAndreas Gohr */ 39f8d5ae01SAndreas Gohr public function __construct() 40f8d5ae01SAndreas Gohr { 41e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42e75dc39fSAndreas Gohr global $conf; 43e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44d02b7935SAndreas Gohr $this->loadConfig(); 45c2b7a1f7SAndreas Gohr $this->factory = new ModelFactory($this->conf); 46f8d5ae01SAndreas Gohr } 47f8d5ae01SAndreas Gohr 48f8d5ae01SAndreas Gohr /** 493379af09SAndreas Gohr * Use the given CLI plugin for logging 503379af09SAndreas Gohr * 513379af09SAndreas Gohr * @param CLIPlugin $logger 523379af09SAndreas Gohr * @return void 533379af09SAndreas Gohr */ 548285fff9SAndreas Gohr public function setLogger($logger) 558285fff9SAndreas Gohr { 563379af09SAndreas Gohr $this->logger = $logger; 573379af09SAndreas Gohr } 583379af09SAndreas Gohr 593379af09SAndreas Gohr /** 60c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 61c4127b8eSAndreas Gohr * 62c4127b8eSAndreas Gohr * @return bool 63c4127b8eSAndreas Gohr */ 64c4127b8eSAndreas Gohr public function userMayAccess() 65c4127b8eSAndreas Gohr { 66c4127b8eSAndreas Gohr global $auth; 67c4127b8eSAndreas Gohr global $USERINFO; 68c4127b8eSAndreas Gohr global $INPUT; 69c4127b8eSAndreas Gohr 70c4127b8eSAndreas Gohr if (!$auth) return true; 71c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 72c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 73c4127b8eSAndreas Gohr 74c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75c4127b8eSAndreas Gohr } 76c4127b8eSAndreas Gohr 77c4127b8eSAndreas Gohr /** 786a18e0f4SAndreas Gohr * Access the Chat Model 790337f47fSAndreas Gohr * 80294a9eafSAndreas Gohr * @return ChatInterface 810337f47fSAndreas Gohr */ 826a18e0f4SAndreas Gohr public function getChatModel() 830337f47fSAndreas Gohr { 84c2b7a1f7SAndreas Gohr return $this->factory->getChatModel(); 859f6b34c4SAndreas Gohr } 869f6b34c4SAndreas Gohr 876a18e0f4SAndreas Gohr /** 8851aa8517SAndreas Gohr * @return ChatInterface 8951aa8517SAndreas Gohr */ 9051aa8517SAndreas Gohr public function getRephraseModel() 9151aa8517SAndreas Gohr { 92c2b7a1f7SAndreas Gohr return $this->factory->getRephraseModel(); 9351aa8517SAndreas Gohr } 9451aa8517SAndreas Gohr 9551aa8517SAndreas Gohr /** 966a18e0f4SAndreas Gohr * Access the Embedding Model 976a18e0f4SAndreas Gohr * 98294a9eafSAndreas Gohr * @return EmbeddingInterface 996a18e0f4SAndreas Gohr */ 100c2b7a1f7SAndreas Gohr public function getEmbeddingModel() 1016a18e0f4SAndreas Gohr { 102c2b7a1f7SAndreas Gohr return $this->factory->getEmbeddingModel(); 1030337f47fSAndreas Gohr } 1040337f47fSAndreas Gohr 1050337f47fSAndreas Gohr /** 1060337f47fSAndreas Gohr * Access the Embeddings interface 1070337f47fSAndreas Gohr * 1080337f47fSAndreas Gohr * @return Embeddings 1090337f47fSAndreas Gohr */ 1100337f47fSAndreas Gohr public function getEmbeddings() 1110337f47fSAndreas Gohr { 1126a18e0f4SAndreas Gohr if ($this->embeddings instanceof Embeddings) { 1136a18e0f4SAndreas Gohr return $this->embeddings; 1146a18e0f4SAndreas Gohr } 1156a18e0f4SAndreas Gohr 11634a1c478SAndreas Gohr $this->embeddings = new Embeddings( 11734a1c478SAndreas Gohr $this->getChatModel(), 118c2b7a1f7SAndreas Gohr $this->getEmbeddingModel(), 11934a1c478SAndreas Gohr $this->getStorage(), 12034a1c478SAndreas Gohr $this->conf 12134a1c478SAndreas Gohr ); 1223379af09SAndreas Gohr if ($this->logger) { 1233379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1243379af09SAndreas Gohr } 1259f6b34c4SAndreas Gohr 1260337f47fSAndreas Gohr return $this->embeddings; 1270337f47fSAndreas Gohr } 1280337f47fSAndreas Gohr 1290337f47fSAndreas Gohr /** 13001f06932SAndreas Gohr * Access the Storage interface 13101f06932SAndreas Gohr * 13201f06932SAndreas Gohr * @return AbstractStorage 13301f06932SAndreas Gohr */ 13401f06932SAndreas Gohr public function getStorage() 13501f06932SAndreas Gohr { 1366a18e0f4SAndreas Gohr if ($this->storage instanceof AbstractStorage) { 1376a18e0f4SAndreas Gohr return $this->storage; 1386a18e0f4SAndreas Gohr } 1396a18e0f4SAndreas Gohr 14004afb84fSAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 14104afb84fSAndreas Gohr $this->storage = new $class($this->conf); 1428285fff9SAndreas Gohr 1433379af09SAndreas Gohr if ($this->logger) { 1443379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1453379af09SAndreas Gohr } 14601f06932SAndreas Gohr 14701f06932SAndreas Gohr return $this->storage; 14801f06932SAndreas Gohr } 14901f06932SAndreas Gohr 15001f06932SAndreas Gohr /** 1510337f47fSAndreas Gohr * Ask a question with a chat history 1520337f47fSAndreas Gohr * 1530337f47fSAndreas Gohr * @param string $question 1540337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1550337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1560337f47fSAndreas Gohr * @throws Exception 1570337f47fSAndreas Gohr */ 1580337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1590337f47fSAndreas Gohr { 16051aa8517SAndreas Gohr if ($history && $this->getConf('rephraseHistory') > 0) { 1610337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 1620337f47fSAndreas Gohr } else { 1630337f47fSAndreas Gohr $standaloneQuestion = $question; 1640337f47fSAndreas Gohr } 16534a1c478SAndreas Gohr return $this->askQuestion($standaloneQuestion, $history); 1660337f47fSAndreas Gohr } 1670337f47fSAndreas Gohr 1680337f47fSAndreas Gohr /** 1690337f47fSAndreas Gohr * Ask a single standalone question 1700337f47fSAndreas Gohr * 1710337f47fSAndreas Gohr * @param string $question 17234a1c478SAndreas Gohr * @param array $history [user, ai] of the previous question 1730337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1740337f47fSAndreas Gohr * @throws Exception 1750337f47fSAndreas Gohr */ 17634a1c478SAndreas Gohr public function askQuestion($question, $history = []) 1770337f47fSAndreas Gohr { 178e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1799e81bea7SAndreas Gohr if ($similar) { 180441edf84SAndreas Gohr $context = implode( 181441edf84SAndreas Gohr "\n", 182441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 183441edf84SAndreas Gohr ); 184219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 185219268b1SAndreas Gohr 'context' => $context, 18659a2a267SAndreas Gohr 'question' => $question, 187219268b1SAndreas Gohr ]); 1889e81bea7SAndreas Gohr } else { 18959a2a267SAndreas Gohr $prompt = $this->getPrompt('noanswer', [ 19059a2a267SAndreas Gohr 'question' => $question, 19159a2a267SAndreas Gohr ]); 19234a1c478SAndreas Gohr $history = []; 1939e81bea7SAndreas Gohr } 19468908844SAndreas Gohr 19551aa8517SAndreas Gohr $messages = $this->prepareMessages( 196*2071dcedSAndreas Gohr $this->getChatModel(), 197*2071dcedSAndreas Gohr $prompt, 198*2071dcedSAndreas Gohr $history, 199*2071dcedSAndreas Gohr $this->getConf('chatHistory') 20051aa8517SAndreas Gohr ); 2016a18e0f4SAndreas Gohr $answer = $this->getChatModel()->getAnswer($messages); 2020337f47fSAndreas Gohr 2030337f47fSAndreas Gohr return [ 2040337f47fSAndreas Gohr 'question' => $question, 2050337f47fSAndreas Gohr 'answer' => $answer, 2060337f47fSAndreas Gohr 'sources' => $similar, 2070337f47fSAndreas Gohr ]; 2080337f47fSAndreas Gohr } 2090337f47fSAndreas Gohr 2100337f47fSAndreas Gohr /** 2110337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2120337f47fSAndreas Gohr * 2130337f47fSAndreas Gohr * @param string $question The original user question 2140337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2150337f47fSAndreas Gohr * @return string The rephrased question 2160337f47fSAndreas Gohr * @throws Exception 2170337f47fSAndreas Gohr */ 2180337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2190337f47fSAndreas Gohr { 22059a2a267SAndreas Gohr $prompt = $this->getPrompt('rephrase', [ 22159a2a267SAndreas Gohr 'question' => $question, 22259a2a267SAndreas Gohr ]); 22351aa8517SAndreas Gohr $messages = $this->prepareMessages( 224*2071dcedSAndreas Gohr $this->getRephraseModel(), 225*2071dcedSAndreas Gohr $prompt, 226*2071dcedSAndreas Gohr $history, 227*2071dcedSAndreas Gohr $this->getConf('rephraseHistory') 22851aa8517SAndreas Gohr ); 22951aa8517SAndreas Gohr return $this->getRephraseModel()->getAnswer($messages); 23034a1c478SAndreas Gohr } 23134a1c478SAndreas Gohr 23234a1c478SAndreas Gohr /** 23334a1c478SAndreas Gohr * Prepare the messages for the AI 23434a1c478SAndreas Gohr * 23551aa8517SAndreas Gohr * @param ChatInterface $model The used model 23659a2a267SAndreas Gohr * @param string $promptedQuestion The user question embedded in a prompt 23734a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 23851aa8517SAndreas Gohr * @param int $historySize The maximum number of messages to use from the history 23934a1c478SAndreas Gohr * @return array An OpenAI compatible array of messages 24034a1c478SAndreas Gohr */ 24151aa8517SAndreas Gohr protected function prepareMessages( 242*2071dcedSAndreas Gohr ChatInterface $model, 243*2071dcedSAndreas Gohr string $promptedQuestion, 244*2071dcedSAndreas Gohr array $history, 245*2071dcedSAndreas Gohr int $historySize 246*2071dcedSAndreas Gohr ): array { 24734a1c478SAndreas Gohr // calculate the space for context 24851aa8517SAndreas Gohr $remainingContext = $model->getMaxInputTokenLength(); 24959a2a267SAndreas Gohr $remainingContext -= $this->countTokens($promptedQuestion); 25034a1c478SAndreas Gohr $safetyMargin = $remainingContext * 0.05; // 5% safety margin 25134a1c478SAndreas Gohr $remainingContext -= $safetyMargin; 25234a1c478SAndreas Gohr // FIXME we may want to also have an upper limit for the history and not always use the full context 25334a1c478SAndreas Gohr 25451aa8517SAndreas Gohr $messages = $this->historyMessages($history, $remainingContext, $historySize); 25534a1c478SAndreas Gohr $messages[] = [ 25634a1c478SAndreas Gohr 'role' => 'user', 25759a2a267SAndreas Gohr 'content' => $promptedQuestion 25834a1c478SAndreas Gohr ]; 25934a1c478SAndreas Gohr return $messages; 26034a1c478SAndreas Gohr } 26134a1c478SAndreas Gohr 26234a1c478SAndreas Gohr /** 26334a1c478SAndreas Gohr * Create an array of OpenAI compatible messages from the given history 26434a1c478SAndreas Gohr * 26534a1c478SAndreas Gohr * Only as many messages are used as fit into the token limit 26634a1c478SAndreas Gohr * 26734a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 26851aa8517SAndreas Gohr * @param int $tokenLimit The maximum number of tokens to use 26951aa8517SAndreas Gohr * @param int $sizeLimit The maximum number of messages to use 27034a1c478SAndreas Gohr * @return array 27134a1c478SAndreas Gohr */ 27251aa8517SAndreas Gohr protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 27334a1c478SAndreas Gohr { 27434a1c478SAndreas Gohr $remainingContext = $tokenLimit; 27534a1c478SAndreas Gohr 27634a1c478SAndreas Gohr $messages = []; 2770337f47fSAndreas Gohr $history = array_reverse($history); 27851aa8517SAndreas Gohr $history = array_slice($history, 0, $sizeLimit); 2790337f47fSAndreas Gohr foreach ($history as $row) { 28034a1c478SAndreas Gohr $length = $this->countTokens($row[0] . $row[1]); 28134a1c478SAndreas Gohr if ($length > $remainingContext) { 2820337f47fSAndreas Gohr break; 2830337f47fSAndreas Gohr } 28434a1c478SAndreas Gohr $remainingContext -= $length; 2850337f47fSAndreas Gohr 28634a1c478SAndreas Gohr $messages[] = [ 28734a1c478SAndreas Gohr 'role' => 'assistant', 28834a1c478SAndreas Gohr 'content' => $row[1] 28934a1c478SAndreas Gohr ]; 29034a1c478SAndreas Gohr $messages[] = [ 29134a1c478SAndreas Gohr 'role' => 'user', 29234a1c478SAndreas Gohr 'content' => $row[0] 29334a1c478SAndreas Gohr ]; 29434a1c478SAndreas Gohr } 29534a1c478SAndreas Gohr return array_reverse($messages); 2960337f47fSAndreas Gohr } 2970337f47fSAndreas Gohr 29834a1c478SAndreas Gohr /** 29934a1c478SAndreas Gohr * Get an aproximation of the token count for the given text 30034a1c478SAndreas Gohr * 30134a1c478SAndreas Gohr * @param $text 30234a1c478SAndreas Gohr * @return int 30334a1c478SAndreas Gohr */ 30434a1c478SAndreas Gohr protected function countTokens($text) 30534a1c478SAndreas Gohr { 30634a1c478SAndreas Gohr return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 3070337f47fSAndreas Gohr } 3080337f47fSAndreas Gohr 3090337f47fSAndreas Gohr /** 3100337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 3110337f47fSAndreas Gohr * 3120337f47fSAndreas Gohr * @param string $type 3130337f47fSAndreas Gohr * @param string[] $vars 3140337f47fSAndreas Gohr * @return string 3150337f47fSAndreas Gohr */ 3160337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 3170337f47fSAndreas Gohr { 31859a2a267SAndreas Gohr $template = file_get_contents($this->localFN($type, 'prompt')); 31934a1c478SAndreas Gohr $vars['language'] = $this->getLanguagePrompt(); 3200337f47fSAndreas Gohr 3217ebc7895Ssplitbrain $replace = []; 3220337f47fSAndreas Gohr foreach ($vars as $key => $val) { 3230337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 3240337f47fSAndreas Gohr } 3250337f47fSAndreas Gohr 3260337f47fSAndreas Gohr return strtr($template, $replace); 3270337f47fSAndreas Gohr } 328219268b1SAndreas Gohr 329219268b1SAndreas Gohr /** 330219268b1SAndreas Gohr * Construct the prompt to define the answer language 331219268b1SAndreas Gohr * 332219268b1SAndreas Gohr * @return string 333219268b1SAndreas Gohr */ 334219268b1SAndreas Gohr protected function getLanguagePrompt() 335219268b1SAndreas Gohr { 336219268b1SAndreas Gohr global $conf; 337cfaf6b32SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 338cfaf6b32SAndreas Gohr 339cfaf6b32SAndreas Gohr $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 340219268b1SAndreas Gohr 341e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 342219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 343219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 344219268b1SAndreas Gohr return $languagePrompt; 345219268b1SAndreas Gohr } 346219268b1SAndreas Gohr } 347219268b1SAndreas Gohr 348cfaf6b32SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language. ' . 349cfaf6b32SAndreas Gohr "If you are unsure about the language, speak $currentLang."; 350219268b1SAndreas Gohr return $languagePrompt; 351219268b1SAndreas Gohr } 352e33a1d7aSAndreas Gohr 353e33a1d7aSAndreas Gohr /** 354e33a1d7aSAndreas Gohr * Should sources be limited to current language? 355e33a1d7aSAndreas Gohr * 356e33a1d7aSAndreas Gohr * @return string The current language code or empty string 357e33a1d7aSAndreas Gohr */ 358e33a1d7aSAndreas Gohr public function getLanguageLimit() 359e33a1d7aSAndreas Gohr { 360e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 361e33a1d7aSAndreas Gohr global $conf; 362e33a1d7aSAndreas Gohr return $conf['lang']; 363e33a1d7aSAndreas Gohr } else { 364e33a1d7aSAndreas Gohr return ''; 365e33a1d7aSAndreas Gohr } 366e33a1d7aSAndreas Gohr } 367e75dc39fSAndreas Gohr 368e75dc39fSAndreas Gohr /** 369e75dc39fSAndreas Gohr * Store info about the last run 370e75dc39fSAndreas Gohr * 371e75dc39fSAndreas Gohr * @param array $data 372e75dc39fSAndreas Gohr * @return void 373e75dc39fSAndreas Gohr */ 374e75dc39fSAndreas Gohr public function setRunData(array $data) 375e75dc39fSAndreas Gohr { 376e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 377e75dc39fSAndreas Gohr } 378e75dc39fSAndreas Gohr 379e75dc39fSAndreas Gohr /** 380e75dc39fSAndreas Gohr * Get info about the last run 381e75dc39fSAndreas Gohr * 382e75dc39fSAndreas Gohr * @return array 383e75dc39fSAndreas Gohr */ 384e75dc39fSAndreas Gohr public function getRunData() 385e75dc39fSAndreas Gohr { 386e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 387e75dc39fSAndreas Gohr return []; 388e75dc39fSAndreas Gohr } 389e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 390e75dc39fSAndreas Gohr } 3910337f47fSAndreas Gohr} 392