123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- <?php
- /**
- * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
- * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
- *
- * Licensed under The MIT License
- * For full copyright and license information, please see the LICENSE.txt
- * Redistributions of files must retain the above copyright notice.
- *
- * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
- * @link https://cakephp.org CakePHP(tm) Project
- * @license https://opensource.org/licenses/mit-license.php MIT License
- */
- namespace Cake\Collection\Iterator;
- use ArrayIterator;
- use IteratorAggregate;
- use LogicException;
- use Traversable;
- /**
- * Implements a simplistic version of the popular Map-Reduce algorithm. Acts
- * like an iterator for the original passed data after each result has been
- * processed, thus offering a transparent wrapper for results coming from any
- * source.
- */
- class MapReduce implements IteratorAggregate
- {
- /**
- * Holds the shuffled results that were emitted from the map
- * phase
- *
- * @var array
- */
- protected $_intermediate = [];
- /**
- * Holds the results as emitted during the reduce phase
- *
- * @var array
- */
- protected $_result = [];
- /**
- * Whether the Map-Reduce routine has been executed already on the data
- *
- * @var bool
- */
- protected $_executed = false;
- /**
- * Holds the original data that needs to be processed
- *
- * @var \Traversable|null
- */
- protected $_data;
- /**
- * A callable that will be executed for each record in the original data
- *
- * @var callable
- */
- protected $_mapper;
- /**
- * A callable that will be executed for each intermediate record emitted during
- * the Map phase
- *
- * @var callable|null
- */
- protected $_reducer;
- /**
- * Count of elements emitted during the Reduce phase
- *
- * @var int
- */
- protected $_counter = 0;
- /**
- * Constructor
- *
- * ### Example:
- *
- * Separate all unique odd and even numbers in an array
- *
- * ```
- * $data = new \ArrayObject([1, 2, 3, 4, 5, 3]);
- * $mapper = function ($value, $key, $mr) {
- * $type = ($value % 2 === 0) ? 'even' : 'odd';
- * $mr->emitIntermediate($value, $type);
- * };
- *
- * $reducer = function ($numbers, $type, $mr) {
- * $mr->emit(array_unique($numbers), $type);
- * };
- * $results = new MapReduce($data, $mapper, $reducer);
- * ```
- *
- * Previous example will generate the following result:
- *
- * ```
- * ['odd' => [1, 3, 5], 'even' => [2, 4]]
- * ```
- *
- * @param \Traversable $data the original data to be processed
- * @param callable $mapper the mapper callback. This function will receive 3 arguments.
- * The first one is the current value, second the current results key and third is
- * this class instance so you can call the result emitters.
- * @param callable|null $reducer the reducer callback. This function will receive 3 arguments.
- * The first one is the list of values inside a bucket, second one is the name
- * of the bucket that was created during the mapping phase and third one is an
- * instance of this class.
- */
- public function __construct(Traversable $data, callable $mapper, callable $reducer = null)
- {
- $this->_data = $data;
- $this->_mapper = $mapper;
- $this->_reducer = $reducer;
- }
- /**
- * Returns an iterator with the end result of running the Map and Reduce
- * phases on the original data
- *
- * @return \ArrayIterator
- */
- public function getIterator()
- {
- if (!$this->_executed) {
- $this->_execute();
- }
- return new ArrayIterator($this->_result);
- }
- /**
- * Appends a new record to the bucket labelled with $key, usually as a result
- * of mapping a single record from the original data.
- *
- * @param mixed $val The record itself to store in the bucket
- * @param string $bucket the name of the bucket where to put the record
- * @return void
- */
- public function emitIntermediate($val, $bucket)
- {
- $this->_intermediate[$bucket][] = $val;
- }
- /**
- * Appends a new record to the final list of results and optionally assign a key
- * for this record.
- *
- * @param mixed $val The value to be appended to the final list of results
- * @param string|null $key and optional key to assign to the value
- * @return void
- */
- public function emit($val, $key = null)
- {
- $this->_result[$key === null ? $this->_counter : $key] = $val;
- $this->_counter++;
- }
- /**
- * Runs the actual Map-Reduce algorithm. This is iterate the original data
- * and call the mapper function for each , then for each intermediate
- * bucket created during the Map phase call the reduce function.
- *
- * @return void
- * @throws \LogicException if emitIntermediate was called but no reducer function
- * was provided
- */
- protected function _execute()
- {
- $mapper = $this->_mapper;
- foreach ($this->_data as $key => $val) {
- $mapper($val, $key, $this);
- }
- $this->_data = null;
- if (!empty($this->_intermediate) && empty($this->_reducer)) {
- throw new LogicException('No reducer function was provided');
- }
- $reducer = $this->_reducer;
- foreach ($this->_intermediate as $key => $list) {
- $reducer($list, $key, $this);
- }
- $this->_intermediate = [];
- $this->_executed = true;
- }
- }
|