From 3ef8459b4f68a20266f5dce04fee7b71f602060a Mon Sep 17 00:00:00 2001 From: Jonas Cuyvers <jonas.cuyvers@iodigital.com> Date: Fri, 28 Mar 2025 16:35:44 +0100 Subject: [PATCH 1/3] #3516046: Add extra tags for moderation --- .../src/Plugin/EmbeddingStrategy/EmbeddingBase.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php index 785958758..02563c162 100644 --- a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php +++ b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php @@ -46,7 +46,7 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra [$title, $contextual_content, $main_content] = $this->groupFieldData($fields, $index); $chunks = $this->getChunks($title, $main_content, $contextual_content); $metadata = $this->buildBaseMetadata($fields, $index); - $raw_embeddings = $this->getRawEmbeddings($chunks); + $raw_embeddings = $this->getRawEmbeddings($chunks, $search_api_item); $embeddings = []; foreach ($chunks as $key => $chunk) { if (!isset($raw_embeddings[$key])) { @@ -69,11 +69,13 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra * * @param array $chunks * The text chunks. + * @param \Drupal\search_api\Item\ItemInterface $item + * The Search API Item * * @return array * The raw embeddings. */ - protected function getRawEmbeddings(array $chunks): array { + protected function getRawEmbeddings(array $chunks, ItemInterface $item): array { $raw_embeddings = []; /** @var \Drupal\ai\OperationType\Embeddings\EmbeddingsInterface $embedding_llm */ @@ -121,7 +123,7 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra $raw_embeddings[] = $embedding_llm->embeddings( $input, $this->modelId, - ['ai_search'], + ['ai_search', $item->getId()], )->getNormalized(); } } -- GitLab From 302ebc2aee3d31b66078113e4b27194ac5b08e77 Mon Sep 17 00:00:00 2001 From: Jonas Cuyvers <jonas.cuyvers@iodigital.com> Date: Wed, 2 Apr 2025 16:54:50 +0200 Subject: [PATCH 2/3] #3516046: Fix phpcs and phpstan errors --- .../Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php | 2 +- .../ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php b/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php index 16c67feaf..1acfeb537 100644 --- a/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php +++ b/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php @@ -38,7 +38,7 @@ class AveragePoolEmbeddingStrategy extends EmbeddingBase { $chunks = $this->getChunks($title, $main_content, $contextual_content); // Embed and average. - if ($raw_embeddings = $this->getRawEmbeddings($chunks)) { + if ($raw_embeddings = $this->getRawEmbeddings($chunks, $search_api_item)) { $embedding = $this->averagePooling($raw_embeddings); $content = $title . $main_content . $contextual_content; $metadata = $this->buildBaseMetadata($fields, $index); diff --git a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php index 02563c162..c7e00ba39 100644 --- a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php +++ b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php @@ -70,7 +70,7 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra * @param array $chunks * The text chunks. * @param \Drupal\search_api\Item\ItemInterface $item - * The Search API Item + * The Search API Item. * * @return array * The raw embeddings. -- GitLab From a2c162725900f18cdace5dc64c8c08adaf18acd2 Mon Sep 17 00:00:00 2001 From: Marcus Johansson <me@marcusmailbox.com> Date: Thu, 15 May 2025 09:50:31 +0200 Subject: [PATCH 3/3] Issue #3516046: Add search item id to tags --- .../AveragePoolEmbeddingStrategy.php | 3 ++- .../EmbeddingStrategy/EmbeddingBase.php | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php b/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php index 1acfeb537..3cbfdd4b2 100644 --- a/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php +++ b/modules/ai_search/src/Plugin/EmbeddingStrategy/AveragePoolEmbeddingStrategy.php @@ -33,12 +33,13 @@ class AveragePoolEmbeddingStrategy extends EmbeddingBase { ItemInterface $search_api_item, IndexInterface $index, ): array { + $this->searchApiItem = $search_api_item; $this->init($embedding_engine, $chat_model, $configuration); [$title, $contextual_content, $main_content] = $this->groupFieldData($fields, $index); $chunks = $this->getChunks($title, $main_content, $contextual_content); // Embed and average. - if ($raw_embeddings = $this->getRawEmbeddings($chunks, $search_api_item)) { + if ($raw_embeddings = $this->getRawEmbeddings($chunks)) { $embedding = $this->averagePooling($raw_embeddings); $content = $title . $main_content . $contextual_content; $metadata = $this->buildBaseMetadata($fields, $index); diff --git a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php index c7e00ba39..8f3f500f5 100644 --- a/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php +++ b/modules/ai_search/src/Plugin/EmbeddingStrategy/EmbeddingBase.php @@ -31,6 +31,13 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra */ protected int $contextualContentMaxPercentage = 30; + /** + * The search_api_item. + * + * @var \Drupal\search_api\Item\ItemInterface + */ + protected ItemInterface $searchApiItem; + /** * {@inheritDoc} */ @@ -42,11 +49,12 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra ItemInterface $search_api_item, IndexInterface $index, ): array { + $this->searchApiItem = $search_api_item; $this->init($embedding_engine, $chat_model, $configuration); [$title, $contextual_content, $main_content] = $this->groupFieldData($fields, $index); $chunks = $this->getChunks($title, $main_content, $contextual_content); $metadata = $this->buildBaseMetadata($fields, $index); - $raw_embeddings = $this->getRawEmbeddings($chunks, $search_api_item); + $raw_embeddings = $this->getRawEmbeddings($chunks); $embeddings = []; foreach ($chunks as $key => $chunk) { if (!isset($raw_embeddings[$key])) { @@ -69,13 +77,11 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra * * @param array $chunks * The text chunks. - * @param \Drupal\search_api\Item\ItemInterface $item - * The Search API Item. * * @return array * The raw embeddings. */ - protected function getRawEmbeddings(array $chunks, ItemInterface $item): array { + protected function getRawEmbeddings(array $chunks): array { $raw_embeddings = []; /** @var \Drupal\ai\OperationType\Embeddings\EmbeddingsInterface $embedding_llm */ @@ -120,10 +126,14 @@ class EmbeddingBase extends EmbeddingStrategyPluginBase implements EmbeddingStra if ($chunk) { // Normalize the chunk before embedding it. $input = new EmbeddingsInput($chunk); + $tags = ['ai_search']; + if ($this->searchApiItem !== NULL) { + $tags[] = $this->searchApiItem->getId(); + } $raw_embeddings[] = $embedding_llm->embeddings( $input, $this->modelId, - ['ai_search', $item->getId()], + $tags, )->getNormalized(); } } -- GitLab