Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
ai
Manage
Activity
Members
Labels
Plan
Wiki
Custom issue tracker
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Model registry
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
project
ai
Merge requests
!148
Chunk checker on index page
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Chunk checker on index page
issue/ai-3479229:3479229-add-chunker-checker
into
1.0.x
Overview
0
Commits
3
Pipelines
5
Changes
4
Merged
Scott Euser
requested to merge
issue/ai-3479229:3479229-add-chunker-checker
into
1.0.x
7 months ago
Overview
0
Commits
3
Pipelines
5
Changes
4
Expand
Closes
#3479229
0
0
Merge request reports
Compare
1.0.x
version 3
babb0259
7 months ago
version 2
2edcb012
7 months ago
version 1
ac32ac64
7 months ago
1.0.x (base)
and
latest version
latest version
babb0259
3 commits,
7 months ago
version 3
babb0259
3 commits,
7 months ago
version 2
2edcb012
2 commits,
7 months ago
version 1
ac32ac64
1 commit,
7 months ago
4 files
+
227
−
2
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
4
Search (e.g. *.vue) (Ctrl+P)
modules/ai_search/src/Form/AiSearchIndexFieldsForm.php
+
223
−
0
Options
@@ -2,10 +2,15 @@
namespace
Drupal\ai_search\Form
;
use
Drupal\Core\Entity\EntityInterface
;
use
Drupal\Core\Form\FormStateInterface
;
use
Drupal\Core\TypedData\ComplexDataInterface
;
use
Drupal\ai\Enum\EmbeddingStrategyCapability
;
use
Drupal\ai\Enum\EmbeddingStrategyIndexingOptions
;
use
Drupal\search_api
\Datasource\DatasourceInterface
;
use
Drupal\search_api
\Form\IndexFieldsForm
;
use
Drupal\search_api
\Item\ItemInterface
;
use
League\CommonMark\CommonMarkConverter
;
/**
* Override the Search API Index Fields Form.
@@ -153,9 +158,227 @@ class AiSearchIndexFieldsForm extends IndexFieldsForm {
}
}
// Chunk checker form.
if
(
$data_sources
=
$this
->
entity
->
getDatasources
())
{
$form
[
'checker'
]
=
[
'#type'
=>
'details'
,
'#title'
=>
$this
->
t
(
'Check configured chunking'
),
'#description'
=>
$this
->
t
(
'Resave your configuration then use this form to check the chunking of a specific item.'
),
'#open'
=>
FALSE
,
'#attributes'
=>
[
'id'
=>
'checker-wrapper'
],
];
// Entity type.
$current_type
=
FALSE
;
$current_data_source
=
FALSE
;
$current_bundles
=
[];
$form
[
'checker'
][
'data_source'
]
=
[
'#title'
=>
$this
->
t
(
'Data source'
),
'#type'
=>
'select'
,
'#options'
=>
[],
'#ajax'
=>
[
'callback'
=>
[
$this
,
'updateChecker'
],
'event'
=>
'change'
,
'method'
=>
'replaceWith'
,
'wrapper'
=>
'checker-wrapper'
,
],
];
foreach
(
$data_sources
as
$key
=>
$data_source
)
{
if
(
!
isset
(
$form
[
'checker'
][
'data_source'
][
'#default_value'
])
||
$form_state
->
getValue
([
'checker'
,
'data_source'
])
===
$key
)
{
$form
[
'checker'
][
'data_source'
][
'#default_value'
]
=
$key
;
$current_type_parts
=
explode
(
':'
,
$key
);
$current_type
=
end
(
$current_type_parts
);
$current_data_source
=
$data_source
;
$configuration
=
$data_source
->
getConfiguration
();
// Ignore static Drupal Service call: we do this to make it easier to
// keep this compatible with Search API as changes are expected here.
// @phpstan-ignore-next-line
$all_bundles
=
array_keys
(
\Drupal
::
service
(
'entity_type.bundle.info'
)
->
getBundleInfo
(
$current_type
));
if
(
$configuration
[
'bundles'
][
'default'
])
{
// All selections are exclusions.
$current_bundles
=
$all_bundles
;
if
(
!
empty
(
$configuration
[
'bundles'
][
'selected'
]))
{
$exclude_bundles
=
array_values
(
$configuration
[
'bundles'
][
'selected'
]);
$current_bundles
=
array_diff
(
$all_bundles
,
$exclude_bundles
);
}
}
else
{
// All selections are inclusions.
if
(
!
empty
(
$configuration
[
'bundles'
][
'selected'
]))
{
$current_bundles
=
array_values
(
$configuration
[
'bundles'
][
'selected'
]);
}
}
}
$form
[
'checker'
][
'data_source'
][
'#options'
][
$key
]
=
$data_source
->
label
();
}
if
(
$current_type
&&
$current_bundles
)
{
$form
[
'checker'
][
'entity'
]
=
[
'#type'
=>
'entity_autocomplete'
,
'#title'
=>
$this
->
t
(
'Search for an item by title'
),
'#target_type'
=>
$current_type
,
'#selection_handler'
=>
'default'
,
'#selection_settings'
=>
[
'target_bundles'
=>
$current_bundles
,
],
'#ajax'
=>
[
'callback'
=>
[
$this
,
'updateChecker'
],
'event'
=>
'autocompleteclose'
,
'method'
=>
'replaceWith'
,
'wrapper'
=>
'checker-wrapper'
,
],
];
$entity_id
=
$form_state
->
getValue
([
'checker'
,
'entity'
]);
if
(
$entity_id
)
{
$form
[
'checker'
][
'#open'
]
=
TRUE
;
$check_entity
=
$this
->
entityTypeManager
->
getStorage
(
$current_type
)
->
load
(
$entity_id
);
if
(
$check_entity
instanceof
EntityInterface
)
{
$embeddings
=
$this
->
getCheckerEmbeddings
(
$current_data_source
,
$check_entity
,
);
$form
[
'checker'
][
'embeddings_count'
]
=
[
'#type'
=>
'html_tag'
,
'#tag'
=>
'h3'
,
'#value'
=>
$this
->
t
(
'Total chunks for this content: @count'
,
[
'@count'
=>
count
(
$embeddings
),
]),
];
foreach
(
array_values
(
$embeddings
)
as
$number
=>
$embedding
)
{
$form
=
$this
->
buildCheckerChunkTable
(
$form
,
$number
,
$embedding
);
}
}
}
}
}
return
$form
;
}
/**
* Get the embeddings for the given entity.
*
* @param \Drupal\search_api\Datasource\DatasourceInterface $current_data_source
* The index data source.
* @param \Drupal\Core\Entity\EntityInterface $check_entity
* The entity to check.
*
* @return array
* The embeddings.
*/
protected
function
getCheckerEmbeddings
(
DatasourceInterface
$current_data_source
,
EntityInterface
$check_entity
,
):
array
{
$backend_config
=
$this
->
entity
->
getServerInstance
()
->
getBackendConfig
();
// Ignore static Drupal Service call: we do this to make it easier to keep
// this compatible with Search API as changes are expected here.
/** @var \Drupal\ai_search\EmbeddingStrategyPluginManager $embedding_strategy_provider */
// @phpstan-ignore-next-line
$embedding_strategy_provider
=
\Drupal
::
service
(
'ai_search.embedding_strategy'
);
/** @var \Drupal\ai_search\EmbeddingStrategyInterface $embedding_strategy */
$embedding_strategy
=
$embedding_strategy_provider
->
createInstance
(
$backend_config
[
'embedding_strategy'
]);
if
(
$current_data_source
instanceof
DatasourceInterface
)
{
$item_id
=
$current_data_source
->
getItemId
(
$check_entity
->
getTypedData
());
$item
=
$current_data_source
->
load
(
$item_id
);
if
(
$item
instanceof
ComplexDataInterface
)
{
// @phpstan-ignore-next-line
$search_item
=
\Drupal
::
getContainer
()
->
get
(
'search_api.fields_helper'
)
->
createItemFromObject
(
$this
->
entity
,
$item
,
$item_id
,
$current_data_source
);
if
(
$search_item
instanceof
ItemInterface
)
{
return
$embedding_strategy
->
getEmbedding
(
$backend_config
[
'embeddings_engine'
],
$backend_config
[
'chat_model'
],
$backend_config
[
'embedding_strategy_configuration'
],
$search_item
->
getFields
(),
$search_item
,
$this
->
entity
,
);
}
}
}
return
[];
}
/**
* Build a table per embedding chunk.
*
* @param array $form
* The original form.
* @param int $number
* The chunk number.
* @param array $embedding
* The embedding chunk.
*
* @return array
* The updated form.
*/
protected
function
buildCheckerChunkTable
(
array
$form
,
int
$number
,
array
$embedding
):
array
{
$form
[
'checker'
][
'embeddings_'
.
$number
]
=
[
'#type'
=>
'table'
,
'#header'
=>
[
[
'data'
=>
$this
->
t
(
'Property'
)],
[
'data'
=>
$this
->
t
(
'Content'
)],
],
'#rows'
=>
[],
'#empty'
=>
$this
->
t
(
'No chunks were generated for the given entity.'
),
];
$form
[
'checker'
][
'embeddings_'
.
$number
][
'#rows'
][]
=
[
'property'
=>
$this
->
t
(
'ID for chunk @chunk'
,
[
'@chunk'
=>
$number
,
]),
'content'
=>
$embedding
[
'id'
],
];
$form
[
'checker'
][
'embeddings_'
.
$number
][
'#rows'
][]
=
[
'property'
=>
$this
->
t
(
'Dimensions'
),
'content'
=>
count
(
$embedding
[
'values'
]),
];
$converter
=
new
CommonMarkConverter
([
'html_input'
=>
'strip'
,
'allow_unsafe_links'
=>
FALSE
,
]);
foreach
(
$embedding
[
'metadata'
]
as
$key
=>
$item
)
{
if
(
is_array
(
$item
))
{
$form
[
'checker'
][
'embeddings_'
.
$number
][
'#rows'
][]
=
[
'property'
=>
$key
,
'content'
=>
implode
(
', '
,
$item
)
.
' ('
.
$this
->
t
(
'Imploded array'
)
.
')'
,
];
}
else
{
if
(
$key
===
'content'
)
{
$item
=
$converter
->
convert
(
$item
);
}
$form
[
'checker'
][
'embeddings_'
.
$number
][
'#rows'
][]
=
[
'property'
=>
$key
,
'content'
=>
[
'data'
=>
[
'#markup'
=>
$item
,
],
],
];
}
}
return
$form
;
}
/**
* AJAX callback to update the checker.
*/
public
function
updateChecker
(
array
&
$form
,
FormStateInterface
$form_state
):
array
{
return
$form
[
'checker'
]
??
[];
}
/**
* {@inheritdoc}
*/
Loading