Issue #2577417: Add an entity iterator to load entities in chunks
Merge request reports
Activity
84 yield from $this->entityStorage->loadMultiple($ids_chunk); 85 // We clear all memory cache as we want to remove all referenced entities 86 // as well, like for example the owner of an entity. 87 $this->memoryCache->deleteAll(); 88 } 89 } 90 91 /** 92 * Chunks the entity IDs, whether its an array of an iterable. 93 * 94 * @return iterable 95 */ 96 protected function chunkIds() : iterable { 97 // Optimize for the entity IDs be in the form of an array already. 98 if (is_array($this->entityIds)) { 99 foreach (array_chunk(array_values($this->entityIds), $this->chunkSize) as $chunk) { 61 * @param int $chunk_size 62 * The size per chunk we want to load in parallel. 63 */ 64 public function __construct(EntityStorageInterface $entity_storage, MemoryCacheInterface $memory_cache, iterable $ids, $chunk_size = 50) { 65 $this->entityStorage = $entity_storage; 66 $this->memoryCache = $memory_cache; 67 // Make sure we don't use a keyed array. 68 $this->entityIds = $ids; 69 $this->chunkSize = (int) $chunk_size; 70 } 71 72 /** 73 * @inheritdoc 74 */ 75 public function count() { 76 return count($this->entityIds); 6 7 /** 8 * Provides an Iterator class for dealing with large amounts of entities. 9 * 10 * Common usecases for this iterator is in a hook_post_update() hook if you need 11 * to load all entities of a type, or in some command line utility. 12 * 13 * Example: 14 * @code 15 * $iterator = new ChunkedIterator($entity_storage, \Drupal::service('entity.memory_cache'), $all_ids); 16 * foreach ($iterator as $entity) { 17 * // Process the entity 18 * } 19 * @endcode 20 */ 21 class ChunkedIterator implements \IteratorAggregate, \Countable { I don't like using "*Iterator" name for something that itself is not an iterator. To me "Iterator" implies that it implements the
\Iterator
interface.In the past I would have called these like "ChunkedEntityIA" with IA for "IteratorAggregate", in fact I had zoos of different *IA classes to iterate on different stuff. But I don't like it very much. First because it's an abbreviation, second because "Aggregate" never felt very explanatory to me.
Perhaps
ChunkedEntitiesIterable
? Just to have a word that is different from iterators.
74 */ 75 public function count() { 76 return count($this->entityIds); 77 } 78 79 /** 80 * @inheritdoc 81 */ 82 public function getIterator() { 83 foreach ($this->chunkIds() as $ids_chunk) { 84 yield from $this->entityStorage->loadMultiple($ids_chunk); 85 // We clear all memory cache as we want to remove all referenced entities 86 // as well, like for example the owner of an entity. 87 $this->memoryCache->deleteAll(); 88 } 89 } - Comment on lines +64 to +89
A more interesting solution to avoid the caching could be to have a version of entity storage that does not cache the entities.
39 public function __construct(EntityStorageInterface $entity_storage, MemoryCacheInterface $memory_cache, iterable $ids, $chunk_size = 50) { 40 $this->entityStorage = $entity_storage; 41 $this->memoryCache = $memory_cache; 42 // Make sure we don't use a keyed array. 43 $this->entityIds = $ids; 44 $this->chunkSize = (int) $chunk_size; 45 } 46 47 /** 48 * @inheritdoc 49 */ 50 public function count() { 51 return count($this->entityIds); 52 } 53 54 /** 55 * @inheritdoc 56 */ 57 public function getIterator() { 58 foreach ($this->chunkIds() as $ids_chunk) { 59 yield from $this->entityStorage->loadMultiple($ids_chunk); 60 // We clear all memory cache as we want to remove all referenced entities 61 // as well, like for example the owner of an entity. 62 $this->memoryCache->deleteAll(); 63 } 64 } 39 public function __construct(EntityStorageInterface $entity_storage, MemoryCacheInterface $memory_cache, iterable $ids, $chunk_size = 50) { 40 // New method in entity storage needed. 41 $this->entityStorage = $entity_storage->withoutCache(); 42 // Make sure we don't use a keyed array. 43 $this->entityIds = $ids; 44 $this->chunkSize = (int) $chunk_size; 45 } 46 47 /** 48 * @inheritdoc 49 */ 50 public function count() { 51 return count($this->entityIds); 52 } 53 54 /** 55 * @inheritdoc 56 */ 57 public function getIterator() { 58 foreach ($this->chunkIds() as $ids_chunk) { 59 yield from $this->entityStorage->loadMultiple($ids_chunk); 60 } 61 } Edited by Andreas Hennings
81 */ 82 public function getIterator() { 83 foreach ($this->chunkIds() as $ids_chunk) { 84 yield from $this->entityStorage->loadMultiple($ids_chunk); 85 // We clear all memory cache as we want to remove all referenced entities 86 // as well, like for example the owner of an entity. 87 $this->memoryCache->deleteAll(); 88 } 89 } 90 91 /** 92 * Chunks the entity IDs, whether its an array of an iterable. 93 * 94 * @return iterable 95 */ 96 protected function chunkIds() : iterable { The logic in this method could be moved to a static method outside the class.
class IteratorUtil { public static function readChunks(iterable $it, int $chunk_size = 10, bool $preserve_keys = FALSE): \Iterator { if (is_array($it)) { yield from array_chunk($it, $chunk_size, $preserve_keys); return; } $chunk = []; foreach ($it as $k => $v) { if ($preserve_keys) { // Overwrite existing values - this is a consequence of $preserve_keys. $chunk[$k] = $v; } else { $chunk[] = $v; } if (count($chunk) === $chunk_size) { yield $chunk; $chunk = []; } } if ($chunk) { yield $chunk; } } }
Or alternatively it could be its own class implementing
\IteratorAggregate
.// This should really be called just ChunkedIterable, but this already exists in this PR. class ChunkedValueIterable implements \IteratorAggregate { public function __construct( private iterable $source, private int $chunkSize = 10, private bool $preserveKeys = FALSE, ) {} public function getIterator(): \Iterator { if (is_array($this->source)) { yield from array_chunk($this->source, $this->chunkSize, $this->preserveKeys); return; } $chunk = []; foreach ($this->source as $k => $v) { if ($this->preserveKeys) { // Overwrite existing values - this is a consequence of $preserve_keys. $chunk[$k] = $v; } else { $chunk[] = $v; } if (count($chunk) === $this->chunkSize) { yield $chunk; $chunk = []; } } if ($chunk) { yield $chunk; } } }
68 $this->entityIds = $ids; 69 $this->chunkSize = (int) $chunk_size; 70 } 71 72 /** 73 * @inheritdoc 74 */ 75 public function count() { 76 return count($this->entityIds); 77 } 78 79 /** 80 * @inheritdoc 81 */ 82 public function getIterator() { 83 foreach ($this->chunkIds() as $ids_chunk) { 54 * @param \Drupal\Core\Entity\EntityStorageInterface $entity_storage 55 * The entity storage. 56 * @param \Drupal\Core\Cache\MemoryCache\MemoryCacheInterface $memory_cache 57 * The memory cache service. 58 * @param iterable $ids 59 * An iterable of entity IDs. Common examples would be an array or a 60 * generator. 61 * @param int $chunk_size 62 * The size per chunk we want to load in parallel. 63 */ 64 public function __construct(EntityStorageInterface $entity_storage, MemoryCacheInterface $memory_cache, iterable $ids, $chunk_size = 50) { 65 $this->entityStorage = $entity_storage; 66 $this->memoryCache = $memory_cache; 67 // Make sure we don't use a keyed array. 68 $this->entityIds = $ids; 69 $this->chunkSize = (int) $chunk_size; changed this line in version 11 of the diff
49 protected $memoryCache; 50 51 /** 52 * Constructs an entity iterator object. 53 * 54 * @param \Drupal\Core\Entity\EntityStorageInterface $entity_storage 55 * The entity storage. 56 * @param \Drupal\Core\Cache\MemoryCache\MemoryCacheInterface $memory_cache 57 * The memory cache service. 58 * @param iterable $ids 59 * An iterable of entity IDs. Common examples would be an array or a 60 * generator. 61 * @param int $chunk_size 62 * The size per chunk we want to load in parallel. 63 */ 64 public function __construct(EntityStorageInterface $entity_storage, MemoryCacheInterface $memory_cache, iterable $ids, $chunk_size = 50) { changed this line in version 11 of the diff
added 1 commit
added 5669 commits
-
d61b9b5a...f5012758 - 5668 commits from branch
project:11.x
- 23223082 - Merge branch '11.x' into '2577417-add-an-entity'
-
d61b9b5a...f5012758 - 5668 commits from branch