Skip to content
Snippets Groups Projects
Commit 8c1d02ba authored by Nia Kathoni's avatar Nia Kathoni Committed by Daniel Cothran
Browse files

Issue #3494340 by nikathone: Support grouping by and aggregating on same column

parent 2ee2ea40
Branches
Tags
1 merge request!21Support grouping by and aggregation on same column
Pipeline #382313 passed with warnings
......@@ -2,6 +2,7 @@
namespace Drupal\views_csv_source\Plugin\views;
use Drupal\Component\Utility\Html;
use Drupal\views_csv_source\Plugin\views\query\ViewsCsvQuery;
/**
......@@ -52,4 +53,19 @@ trait ColumnSelectorTrait {
return $form;
}
/**
* Provide an alias for the column.
*
* @return string
* The field alias.
*/
public function getColumnAlias(): string {
$key = Html::cleanCssIdentifier(strtolower($this->options['key']), [
' ' => '_',
'_' => '_',
'-' => '_',
]);
return $key . '___' . $this->options['id'];
}
}
......@@ -74,7 +74,7 @@ class ViewsCsvField extends FieldPluginBase {
$this->field_alias = $this->query->addField(
$this->tableAlias,
$this->options['key'],
'',
$this->getColumnAlias(),
$this->options + $params,
);
}
......
......@@ -50,21 +50,21 @@ class ViewsCsvQuery extends QueryPluginBase {
*
* @var array
*/
public $fields = [];
public array $fields = [];
/**
* A simple array of order by clauses.
*
* @var array
*/
public $orderby = [];
public array $orderby = [];
/**
* A simple array of group by clauses.
*
* @var array
*/
public $groupby = [];
public array $groupby = [];
/**
* Not actually used.
......@@ -185,7 +185,7 @@ class ViewsCsvQuery extends QueryPluginBase {
$query->addTag($tag);
}
// Assemble the groupby clause, if any.
// Assemble the group by clause, if any.
$this->hasAggregate = FALSE;
$non_aggregates = $this->getNonAggregates();
if (!$this->hasAggregate) {
......@@ -200,10 +200,10 @@ class ViewsCsvQuery extends QueryPluginBase {
// Add all fields to the query.
$this->compileFields($query);
// Add group by.
// Add a group by.
if ($group_by) {
foreach ($group_by as $field) {
$query->groupBy($field);
foreach ($group_by as $alias => $field) {
$query->groupBy($field, $alias);
}
}
......@@ -292,8 +292,9 @@ class ViewsCsvQuery extends QueryPluginBase {
$records = $query->execute();
$result = [];
$index = 0;
$column_aliases = $this->getColumnAliasesAsKeyValues();
foreach ($records as $row) {
$new_row = new ResultRow($row);
$new_row = $this->buildRowFromResultRow($row, $column_aliases);
$new_row->index = $index++;
$result[] = $new_row;
}
......@@ -420,18 +421,18 @@ class ViewsCsvQuery extends QueryPluginBase {
* The field.
*/
public function addField(string $table, mixed $field, string $alias = '', array $params = []): mixed {
$alias = $field;
$alias = empty($alias) ? $field : $alias;
// Add field info array.
if (empty($this->fields[$field])) {
$this->fields[$field] = [
// Add a field info array.
if (empty($this->fields[$alias])) {
$this->fields[$alias] = [
'field' => $field,
'table' => $table,
'alias' => $alias,
] + $params;
}
return $field;
return $alias;
}
/**
......@@ -572,7 +573,7 @@ class ViewsCsvQuery extends QueryPluginBase {
$form_state->setValueForElement($element, $uri);
// If getUserEnteredStringAsUri() mapped the entered value to an 'internal:'
// URI , ensure the raw value begins with '/'.
// URI, ensure the raw value begins with '/'.
if (parse_url($uri, PHP_URL_SCHEME) === 'internal' && $element['#value'][0] !== '/') {
$form_state->setError($element, new TranslatableMarkup('Manually entered paths should start with "/".'));
}
......@@ -665,7 +666,7 @@ class ViewsCsvQuery extends QueryPluginBase {
*/
protected function getNonAggregates(): array {
$non_aggregates = [];
foreach ($this->fields as $field) {
foreach ($this->fields as $alias => $field) {
$string = '';
$string .= $field['field'];
$fieldname = $string;
......@@ -678,7 +679,7 @@ class ViewsCsvQuery extends QueryPluginBase {
$this->hasAggregate = TRUE;
}
elseif (empty($field['aggregate'])) {
$non_aggregates[] = $fieldname;
$non_aggregates[$alias] = $fieldname;
}
}
return $non_aggregates;
......@@ -700,12 +701,12 @@ class ViewsCsvQuery extends QueryPluginBase {
if (!empty($field['function'])) {
$info = $this->getAggregationInfo();
if (!empty($info[$field['function']]['method'])) {
$query->addField($field['field'], $field['function']);
$query->addField($field['field'], $field['function'], $field['alias']);
}
$this->hasAggregate = TRUE;
}
elseif (empty($field['aggregate'])) {
$query->addField($field['field']);
$query->addField($field['field'], '', $field['alias']);
}
if ($this->getCountOptimized) {
......@@ -720,7 +721,7 @@ class ViewsCsvQuery extends QueryPluginBase {
*
* As views has to wrap the conditions from arguments with AND, a special
* group is wrapped around all conditions. This special group has the ID 0.
* There is other code in filters which makes sure that the group IDs are
* There is other code in filters that makes sure that the group IDs are
* higher than zero.
*
* @param string $where
......@@ -808,4 +809,37 @@ class ViewsCsvQuery extends QueryPluginBase {
return $query_options;
}
/**
* Builds row from result row.
*
* @param array $row
* The result row.
* @param array $column_aliases
* Column aliases associated with their column names.
*
* @return \Drupal\views\ResultRow
* The result row.
*/
protected function buildRowFromResultRow(array $row, array $column_aliases): ResultRow {
$new_row = [];
foreach ($column_aliases as $alias => $column) {
$new_row[$alias] = !empty($row[$alias]) ? $row[$alias] : $row[$column];
}
return new ResultRow($new_row);
}
/**
* Gets column aliases as array key values from fields.
*
* @return array
* The aliases as array keys associated with columns.
*/
protected function getColumnAliasesAsKeyValues(): array {
return array_map(
fn ($field) => $field['field'],
$this->fields
);
}
}
......@@ -227,7 +227,8 @@ class Connection {
try {
$csv = Reader::createFromString($this->fetchContent($uri, $options));
$this->csvHeader[$uri] = $csv->nth(0) ?? [];
$headers = $csv->nth(0) ?? [];
$this->csvHeader[$uri] = $headers ? array_filter($headers) : [];
return $this->csvHeader[$uri];
}
catch (\Exception $e) {
......
......@@ -51,6 +51,11 @@ class Select {
*/
protected array $groupedAndAggregatedRecords = [];
/**
* The selected column headers.
*/
protected array $selectedColumnHeaders = [];
/**
* Flag for whether execute() was already called for this query.
*
......@@ -159,16 +164,29 @@ class Select {
* The field to add.
* @param string $function
* The function to apply to the field.
* @param string $alias
* The field alias.
*
* @return $this
* The current object.
*/
public function addField(string $field, string $function = ''): static {
public function addField(string $field, string $function = '', string $alias = ''): static {
$alias = $alias ?: $field;
$column = new \stdClass();
$column->name = $field;
$column->function = $function;
$column->alias = $alias;
$this->addColumn($column);
$this->removeConditionColumn($column->name);
// Remove the added field from a condition and orderBy columns array if it
// was added in "condition_order_by_columns" object item.
$this->removeConditionOrderByColumn($column->name);
// Ensure that a field with group_by function is also added to the group by
// columns.
if ($function === 'group_by') {
$this->groupBy($field, $alias);
}
return $this;
}
......@@ -177,14 +195,17 @@ class Select {
*
* @param string $field
* The field to group by.
* @param string $alias
* The field alias.
*
* @return $this
* The current object.
*/
public function groupBy(string $field): static {
public function groupBy(string $field, string $alias = ''): static {
$group_by = $this->getObjectItem('group_by');
if (!in_array($field, $group_by)) {
$group_by[] = $field;
$alias = $alias ?: $field;
if (!isset($group_by[$alias])) {
$group_by[$alias] = $field;
}
return $this->setObjectItem('group_by', $group_by);
}
......@@ -206,6 +227,11 @@ class Select {
$order_by->column = $column;
$order_by->direction = $direction;
$data[] = $order_by;
// Ensure that the orderBy column is added to columns so that it can be
// selected when choosing the columns to work with.
$this->addConditionOrderByColumn($column);
return $this->setObjectItem('order_by', $data);
}
......@@ -401,10 +427,12 @@ class Select {
* If the CSV reader cannot be built.
*/
private function getRecords(): array|\Iterator {
$field_keys = array_keys($this->getSelectedFields());
$field_keys = array_unique(
array_map(fn($field) => $field['name'], $this->getSelectedFields())
);
// Add the condition columns as part of fields so that filters can be
// applied on them.
$field_keys = array_merge($field_keys, $this->getConditionColumns());
$field_keys = array_merge(array_values($field_keys), $this->getConditionOrderByColumns());
if (!$field_keys) {
return [];
}
......@@ -416,18 +444,36 @@ class Select {
}
$csv = Reader::createFromString($csv_content);
// Process CSV Headers.
// @todo maybe find a way to remove duplicates? Right now only removing
// empty columns.
$all_headers = $csv->nth(0);
if (!$all_headers) {
return [];
}
$this->selectedColumnHeaders = array_intersect(array_filter($all_headers), $field_keys);
$csv->mapHeader($this->selectedColumnHeaders);
// Initialize other reader options.
$csv = $this->initializeCsvOptions($csv);
$stmt = Statement::create();
// 1. Adding filter clause(s) to the statement.
$stmt = $this->applyFilters($stmt);
// Only select the necessary fields.
$csv = $csv->select(...$field_keys);
// 2. Apply grouping here.
// 2. Apply Sort.
if (!$this->isCountQuery()) {
// Only applying sorting for a non-counting query.
$stmt = $this->applyOrderBy($stmt);
}
// 3. Apply grouping here.
if ($group_by = $this->getObjectItem('group_by')) {
// Processing filtered csv to reduce the number of records to be grouped
// if necessary.
$result_set = $stmt->process($csv);
$result_set = $stmt->process($csv, $this->selectedColumnHeaders);
if ($result_set->count() === 0) {
return [];
}
......@@ -438,27 +484,20 @@ class Select {
$result_set = $this->buildFilteredResultSet($result_set->getHeader(), $result_set->getRecords());
// Applying a group by and aggregation callback.
$selected_aggregation_fields = array_filter($this->getSelectedFields(), fn(string $function) => $function && $function !== 'group_by');
$result_set = $this->applyGroupBy($result_set, $group_by, $selected_aggregation_fields);
$result_set = $this->applyGroupBy($result_set, $group_by, $this->getSelectedAggregatedFields());
// Initialize a new statement that doesn't have any of the filtering
// closures.
$group_by_stmt = Statement::create();
// Process the grouping.
$group_by_stmt->process($result_set);
$group_by_stmt->process($result_set, $this->selectedColumnHeaders);
}
// 3. Flatten the group by and aggregation array to build a new csv reader.
// 4. Flatten the group by and aggregation array to build a new csv reader.
if ($this->groupedAndAggregatedRecords) {
$csv = $this->buildCsvReader();
}
// 4. Apply Sort.
if (!$this->isCountQuery()) {
// Only applying sorting for a non-counting query.
$stmt = $this->applyOrderBy($stmt);
}
// 5. Apply range.
$offset = $this->getOffset();
$limit = $this->getLimit();
......@@ -466,7 +505,7 @@ class Select {
$stmt = $stmt->limit($limit)->offset($offset);
}
$result_set = $stmt->process($csv);
$result_set = $stmt->process($csv, $this->selectedColumnHeaders);
$this->records = $result_set->getRecords();
return $this->records;
}
......@@ -566,7 +605,7 @@ class Select {
$fields = $this->getObjectItem('fields');
$field_added = FALSE;
foreach ($fields as &$existing_field) {
if ($existing_field->name === $column->name && !$existing_field->function !== $column->function) {
if ($existing_field->alias === $column->alias && !$existing_field->function !== $column->function) {
$existing_field->function = $column->function;
$field_added = TRUE;
break;
......@@ -588,19 +627,20 @@ class Select {
* @return $this
* The current object.
*/
protected function addConditionColumn(string $column): static {
protected function addConditionOrderByColumn(string $column): static {
// Only adding the column to condition fields if it's not part of selected
// fields.
$selected_fields = $this->getSelectedFields();
if (isset($selected_fields[$column])) {
return $this;
foreach ($this->getSelectedFields() as $field) {
if ($column === $field['name']) {
return $this;
}
}
$columns = $this->getConditionColumns();
$columns = $this->getConditionOrderByColumns();
if (!in_array($column, $columns)) {
$columns[] = $column;
}
return $this->setObjectItem('condition_columns', $columns);
return $this->setObjectItem('condition_order_by_columns', $columns);
}
/**
......@@ -612,19 +652,19 @@ class Select {
* @return $this
* The current object.
*/
protected function removeConditionColumn(string $column): static {
$condition_columns = $this->getConditionColumns();
if ($column_index = array_search($column, $condition_columns)) {
unset($condition_columns[$column_index]);
protected function removeConditionOrderByColumn(string $column): static {
$columns = $this->getConditionOrderByColumns();
if ($column_index = array_search($column, $columns)) {
unset($columns[$column_index]);
}
return $this->setObjectItem('condition_columns', array_values($condition_columns));
return $this->setObjectItem('condition_order_by_columns', array_values($columns));
}
/**
* Gets the current stored condition columns.
*/
protected function getConditionColumns(): array {
return $this->getObjectItem('condition_columns');
protected function getConditionOrderByColumns(): array {
return $this->getObjectItem('condition_order_by_columns');
}
/**
......@@ -681,7 +721,7 @@ class Select {
}
}
else {
$this->addConditionColumn($column);
$this->addConditionOrderByColumn($column);
$new_condition['column'] = $column;
$new_condition['value'] = $condition['value'];
$new_condition['operator'] = $condition['operator'];
......@@ -789,11 +829,9 @@ class Select {
* The values of the grouped columns.
*/
protected static function extractGroupedColumnsValuesFromRecord(array $record, array $grouped_columns): array {
$values = [];
foreach ($grouped_columns as $column) {
$values[] = $record[$column];
}
return $values;
return array_map(function ($column) use ($record) {
return $record[$column];
}, $grouped_columns);
}
/**
......@@ -806,7 +844,7 @@ class Select {
$fields = [];
foreach ($this->getObjectItem('fields') as $field) {
if (!empty($field->name)) {
$fields[$field->name] = $field->function;
$fields[$field->alias] = (array) $field;
}
}
return $fields;
......@@ -819,7 +857,7 @@ class Select {
* The CSV reader.
*
* @throws \League\Csv\Exception
* If the CSV reader cannot be built.
* @throws \League\Csv\InvalidArgument
*/
protected function buildCsvReader(): Reader {
$records = [];
......@@ -831,6 +869,8 @@ class Select {
$records[] = array_values($record);
}
$this->selectedColumnHeaders = $headers;
return $this->buildNewCsvFromRecords($headers, $records);
}
......@@ -850,37 +890,40 @@ class Select {
$key = implode('_____', $group_by_values);
// Adding the grouping fields as part of the array before any aggregation.
if (!isset($this->groupedAndAggregatedRecords[$key])) {
$this->groupedAndAggregatedRecords[$key] = array_combine($group_by_columns, $group_by_values);
$group_by_column_aliases = array_keys($group_by_columns);
$this->groupedAndAggregatedRecords[$key] = array_combine($group_by_column_aliases, $group_by_values);
}
// Calculate aggregation if any.
foreach ($aggregated_columns as $column => $function) {
foreach ($aggregated_columns as $column_alias => $field) {
$function = $field['function'];
$column = $field['name'];
if ($function === 'count') {
$counter = $this->groupedAndAggregatedRecords[$key][$column] ?? 0;
$this->groupedAndAggregatedRecords[$key][$column] = $counter + (isset($row[$column]) ? 1 : 0);
$counter = $this->groupedAndAggregatedRecords[$key][$column_alias] ?? 0;
$this->groupedAndAggregatedRecords[$key][$column_alias] = $counter + (isset($row[$column]) ? 1 : 0);
}
elseif ($function === 'sum' && is_numeric($row[$column])) {
$sum = $this->groupedAndAggregatedRecords[$key][$column] ?? 0;
$this->groupedAndAggregatedRecords[$key][$column] = $sum + $row[$column];
$sum = $this->groupedAndAggregatedRecords[$key][$column_alias] ?? 0;
$this->groupedAndAggregatedRecords[$key][$column_alias] = $sum + $row[$column];
}
elseif ($function === 'min' && is_numeric($row[$column])) {
$min = $this->groupedAndAggregatedRecords[$key][$column] ?? $row[$column];
$this->groupedAndAggregatedRecords[$key][$column] = min($min, $row[$column]);
$min = $this->groupedAndAggregatedRecords[$key][$column_alias] ?? $row[$column];
$this->groupedAndAggregatedRecords[$key][$column_alias] = min($min, $row[$column]);
}
elseif ($function === 'max' && is_numeric($row[$column])) {
$max = $this->groupedAndAggregatedRecords[$key][$column] ?? $row[$column];
$this->groupedAndAggregatedRecords[$key][$column] = max($max, $row[$column]);
$max = $this->groupedAndAggregatedRecords[$key][$column_alias] ?? $row[$column];
$this->groupedAndAggregatedRecords[$key][$column_alias] = max($max, $row[$column]);
}
elseif ($function === 'avg' && is_numeric($row[$column])) {
$index_sum = $column . '_sum';
$index_count = $column . '_count';
$index_sum = $column_alias . '_sum';
$index_count = $column_alias . '_count';
$sum = $this->groupedAndAggregatedRecords[$key][$index_sum] ?? 0;
$count = $this->groupedAndAggregatedRecords[$key][$index_count] ?? 0;
$sum += $row[$column];
$count += 1;
$this->groupedAndAggregatedRecords[$key][$index_sum] = $sum;
$this->groupedAndAggregatedRecords[$key][$index_count] = $count;
$this->groupedAndAggregatedRecords[$key][$column] = $sum / $count;
$this->groupedAndAggregatedRecords[$key][$column_alias] = $sum / $count;
}
}
}
......@@ -921,7 +964,9 @@ class Select {
* The result set.
*
* @throws \League\Csv\Exception
* @throws \League\Csv\InvalidArgument
* @throws \League\Csv\SyntaxError
* @throws \ReflectionException
*/
protected function buildFilteredResultSet(array $header, \Iterator $records): ResultSet {
$csv = $this->buildNewCsvFromRecords($header, $records);
......@@ -990,4 +1035,20 @@ class Select {
return $checked;
}
/**
* Gets the selected aggregated fields.
*
* @return array
* The aggregated fields.
*/
protected function getSelectedAggregatedFields(): array {
$selected_aggregation_fields = [];
foreach ($this->getSelectedFields() as $field) {
if (!empty($field['function']) && $field['function'] !== 'group_by') {
$selected_aggregation_fields[$field['alias']] = $field;
}
}
return $selected_aggregation_fields;
}
}
Geography name,Occupation Name,Total count,Keywords,,,
Alabama,Counselors,34,Counselors,,,
Alabama,Massage Therapists,67,Therapists,,,
Alaska,Dental Hygienists,8989,Hygienists,,,
Alaska,Emergency Medical Technicians and Paramedics,56,"Technicians, Paramedics",,,
Hawaii,Health Practitioner Support Technologists and Technicians,543,"Technologists, Technicians",,,
Illinois,Dental Assistants,76,Assistants,,,
Kansas,Emergency Medical Technicians and Paramedics,76,"Technicians, Paramedics",,,
......@@ -202,10 +202,30 @@ class SelectTest extends UnitTestCase {
$records = $select->execute();
$records = array_values(iterator_to_array($records));
$this->assertCount(7, $records);
$this->assertCount(5, $records);
$this->assertEquals(['Geography name' => $expected_first], $records[0]);
$this->assertEquals(['Geography name' => $expected_last], $records[6]);
$this->assertEquals(['Geography name' => $expected_last], $records[4]);
}
/**
* @covers ::orderBy
* @covers ::execute
* @covers ::applyOrderBy
* @covers ::getRecords
*/
public function testOrderByColumnNotAddedAsField() {
$select = $this->getSelectQuery()
->addField('Geography name')
->orderBy('Occupation Name', Select::DIRECTION_DESC);
$records = $select->execute();
$records = array_values(iterator_to_array($records));
$this->assertEquals([
'Geography name' => 'Alabama',
'Occupation Name' => 'Massage Therapists',
], $records[0]);
}
/**
......@@ -297,8 +317,8 @@ class SelectTest extends UnitTestCase {
'Total count' => '543',
],
[
'Geography name' => 'Alabama',
'Total count' => '101',
'Geography name' => 'Illinois',
'Total count' => '76',
],
], $records);
}
......@@ -342,6 +362,55 @@ class SelectTest extends UnitTestCase {
], $records);
}
/**
* @covers ::addField
* @covers ::groupBy
* @covers ::getRecords
* @covers ::applyOrderBy
* @throws \League\Csv\Exception
*/
public function testGroupByAndAggregationOnSameColumnWithAlias() {
$select = $this->getSelectQuery()
->addField('Geography name', 'count', 'geography_name_count')
->addField('Geography name', 'group_by', 'geography_name_value')
->orderBy('Geography name', Select::DIRECTION_DESC);
$records = $select->execute();
$records = array_values(iterator_to_array($records));
$this->assertCount(5, $records);
// Confirm if ordering is applying as expected.
$this->assertEquals([
'geography_name_value' => 'Kansas',
'geography_name_count' => '1',
], $records[0]);
}
/**
* @covers ::getRecords
* @covers ::getSelectedAggregatedFields
* @throws \League\Csv\Exception
*/
public function testCsvWithEmptyColumns() {
$this->csvUri = $this->retrieveResource('/views_csv_data_with_empty_columns_test.csv');
// Testing regular field select.
$select = $this->getSelectQuery()
->addField('Geography name')
->addField('Occupation Name');
$records = $select->execute();
$this->assertCount(7, array_values(iterator_to_array($records)));
// Testing if group by also still working because the csv is rebuilt.
$select = $this->getSelectQuery()
->addField('Geography name', 'group_by');
$records = $select->execute();
$this->assertCount(5, array_values(iterator_to_array($records)));
// Restore the proper csv uri just in case it's cached.
$this->csvUri = $this->retrieveResource();
}
/**
* Data provider for ::testExecuteWithFieldAndCondition().
*/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment