diff options
Diffstat (limited to 'engine/classes/ElggBatch.php')
-rw-r--r-- | engine/classes/ElggBatch.php | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/engine/classes/ElggBatch.php b/engine/classes/ElggBatch.php new file mode 100644 index 000000000..c1a77a0d9 --- /dev/null +++ b/engine/classes/ElggBatch.php @@ -0,0 +1,392 @@ +<?php +/** + * Efficiently run operations on batches of results for any function + * that supports an options array. + * + * This is usually used with elgg_get_entities() and friends, + * elgg_get_annotations(), and elgg_get_metadata(). + * + * If you pass a valid PHP callback, all results will be run through that + * callback. You can still foreach() through the result set after. Valid + * PHP callbacks can be a string, an array, or a closure. + * {@link http://php.net/manual/en/language.pseudo-types.php} + * + * The callback function must accept 3 arguments: an entity, the getter + * used, and the options used. + * + * Results from the callback are stored in callbackResult. If the callback + * returns only booleans, callbackResults will be the combined result of + * all calls. If no entities are processed, callbackResults will be null. + * + * If the callback returns anything else, callbackresult will be an indexed + * array of whatever the callback returns. If returning error handling + * information, you should include enough information to determine which + * result you're referring to. + * + * Don't combine returning bools and returning something else. + * + * Note that returning false will not stop the foreach. + * + * @warning If your callback or foreach loop deletes or disable entities + * you MUST call setIncrementOffset(false) or set that when instantiating. + * This forces the offset to stay what it was in the $options array. + * + * @example + * <code> + * // using foreach + * $batch = new ElggBatch('elgg_get_entities', array()); + * $batch->setIncrementOffset(false); + * + * foreach ($batch as $entity) { + * $entity->disable(); + * } + * + * // using both a callback + * $callback = function($result, $getter, $options) { + * var_dump("Looking at annotation id: $result->id"); + * return true; + * } + * + * $batch = new ElggBatch('elgg_get_annotations', array('guid' => 2), $callback); + * </code> + * + * @package Elgg.Core + * @subpackage DataModel + * @link http://docs.elgg.org/DataModel/ElggBatch + * @since 1.8 + */ +class ElggBatch + implements Iterator { + + /** + * The objects to interator over. + * + * @var array + */ + private $results = array(); + + /** + * The function used to get results. + * + * @var mixed A string, array, or closure, or lamda function + */ + private $getter = null; + + /** + * The number of results to grab at a time. + * + * @var int + */ + private $chunkSize = 25; + + /** + * A callback function to pass results through. + * + * @var mixed A string, array, or closure, or lamda function + */ + private $callback = null; + + /** + * Start after this many results. + * + * @var int + */ + private $offset = 0; + + /** + * Stop after this many results. + * + * @var int + */ + private $limit = 0; + + /** + * Number of processed results. + * + * @var int + */ + private $retrievedResults = 0; + + /** + * The index of the current result within the current chunk + * + * @var int + */ + private $resultIndex = 0; + + /** + * The index of the current chunk + * + * @var int + */ + private $chunkIndex = 0; + + /** + * The number of results iterated through + * + * @var int + */ + private $processedResults = 0; + + /** + * Is the getter a valid callback + * + * @var bool + */ + private $validGetter = null; + + /** + * The result of running all entities through the callback function. + * + * @var mixed + */ + public $callbackResult = null; + + /** + * If false, offset will not be incremented. This is used for callbacks/loops that delete. + * + * @var bool + */ + private $incrementOffset = true; + + /** + * Batches operations on any elgg_get_*() or compatible function that supports + * an options array. + * + * Instead of returning all objects in memory, it goes through $chunk_size + * objects, then requests more from the server. This avoids OOM errors. + * + * @param string $getter The function used to get objects. Usually + * an elgg_get_*() function, but can be any valid PHP callback. + * @param array $options The options array to pass to the getter function. If limit is + * not set, 10 is used as the default. In most cases that is not + * what you want. + * @param mixed $callback An optional callback function that all results will be passed + * to upon load. The callback needs to accept $result, $getter, + * $options. + * @param int $chunk_size The number of entities to pull in before requesting more. + * You have to balance this between running out of memory in PHP + * and hitting the db server too often. + * @param bool $inc_offset Increment the offset on each fetch. This must be false for + * callbacks that delete rows. You can set this after the + * object is created with {@see ElggBatch::setIncrementOffset()}. + */ + public function __construct($getter, $options, $callback = null, $chunk_size = 25, + $inc_offset = true) { + + $this->getter = $getter; + $this->options = $options; + $this->callback = $callback; + $this->chunkSize = $chunk_size; + $this->setIncrementOffset($inc_offset); + + if ($this->chunkSize <= 0) { + $this->chunkSize = 25; + } + + // store these so we can compare later + $this->offset = elgg_extract('offset', $options, 0); + $this->limit = elgg_extract('limit', $options, 10); + + // if passed a callback, create a new ElggBatch with the same options + // and pass each to the callback. + if ($callback && is_callable($callback)) { + $batch = new ElggBatch($getter, $options, null, $chunk_size, $inc_offset); + + $all_results = null; + + foreach ($batch as $result) { + if (is_string($callback)) { + $result = $callback($result, $getter, $options); + } else { + $result = call_user_func_array($callback, array($result, $getter, $options)); + } + + if (!isset($all_results)) { + if ($result === true || $result === false || $result === null) { + $all_results = $result; + } else { + $all_results = array(); + } + } + + if (($result === true || $result === false || $result === null) && !is_array($all_results)) { + $all_results = $result && $all_results; + } else { + $all_results[] = $result; + } + } + + $this->callbackResult = $all_results; + } + } + + /** + * Fetches the next chunk of results + * + * @return bool + */ + private function getNextResultsChunk() { + // reset memory caches after first chunk load + if ($this->chunkIndex > 0) { + global $DB_QUERY_CACHE, $ENTITY_CACHE; + $DB_QUERY_CACHE = $ENTITY_CACHE = array(); + } + + // always reset results. + $this->results = array(); + + if (!isset($this->validGetter)) { + $this->validGetter = is_callable($this->getter); + } + + if (!$this->validGetter) { + return false; + } + + $limit = $this->chunkSize; + + // if someone passed limit = 0 they want everything. + if ($this->limit != 0) { + if ($this->retrievedResults >= $this->limit) { + return false; + } + + // if original limit < chunk size, set limit to original limit + // else if the number of results we'll fetch if greater than the original limit + if ($this->limit < $this->chunkSize) { + $limit = $this->limit; + } elseif ($this->retrievedResults + $this->chunkSize > $this->limit) { + // set the limit to the number of results remaining in the original limit + $limit = $this->limit - $this->retrievedResults; + } + } + + if ($this->incrementOffset) { + $offset = $this->offset + $this->retrievedResults; + } else { + $offset = $this->offset; + } + + $current_options = array( + 'limit' => $limit, + 'offset' => $offset + ); + + $options = array_merge($this->options, $current_options); + $getter = $this->getter; + + if (is_string($getter)) { + $this->results = $getter($options); + } else { + $this->results = call_user_func_array($getter, array($options)); + } + + if ($this->results) { + $this->chunkIndex++; + $this->resultIndex = 0; + $this->retrievedResults += count($this->results); + return true; + } else { + return false; + } + } + + /** + * Increment the offset from the original options array? Setting to + * false is required for callbacks that delete rows. + * + * @param bool $increment + */ + public function setIncrementOffset($increment = true) { + $this->incrementOffset = (bool) $increment; + } + + /** + * Implements Iterator + */ + + /** + * PHP Iterator Interface + * + * @see Iterator::rewind() + * @return void + */ + public function rewind() { + $this->resultIndex = 0; + $this->retrievedResults = 0; + $this->processedResults = 0; + + // only grab results if we haven't yet or we're crossing chunks + if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) { + $this->chunkIndex = 0; + $this->getNextResultsChunk(); + } + } + + /** + * PHP Iterator Interface + * + * @see Iterator::current() + * @return mixed + */ + public function current() { + return current($this->results); + } + + /** + * PHP Iterator Interface + * + * @see Iterator::key() + * @return int + */ + public function key() { + return $this->processedResults; + } + + /** + * PHP Iterator Interface + * + * @see Iterator::next() + * @return mixed + */ + public function next() { + // if we'll be at the end. + if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) { + $this->results = array(); + return false; + } + + // if we'll need new results. + if (($this->resultIndex + 1) >= $this->chunkSize) { + if (!$this->getNextResultsChunk()) { + $this->results = array(); + return false; + } + + $result = current($this->results); + } else { + // the function above resets the indexes, so only inc if not + // getting new set + $this->resultIndex++; + $result = next($this->results); + } + + $this->processedResults++; + return $result; + } + + /** + * PHP Iterator Interface + * + * @see Iterator::valid() + * @return bool + */ + public function valid() { + if (!is_array($this->results)) { + return false; + } + $key = key($this->results); + return ($key !== NULL && $key !== FALSE); + } +} |