<?php
/**
 * Efficiently run operations on batches of results for any function
 * that supports an options array.
 *
 * This is usually used with elgg_get_entities() and friends,
 * elgg_get_annotations(), and elgg_get_metadata().
 *
 * If you pass a valid PHP callback, all results will be run through that
 * callback. You can still foreach() through the result set after.  Valid
 * PHP callbacks can be a string, an array, or a closure.
 * {@link http://php.net/manual/en/language.pseudo-types.php}
 *
 * The callback function must accept 3 arguments: an entity, the getter
 * used, and the options used.
 *
 * Results from the callback are stored in callbackResult. If the callback
 * returns only booleans, callbackResults will be the combined result of
 * all calls. If no entities are processed, callbackResults will be null.
 *
 * If the callback returns anything else, callbackresult will be an indexed
 * array of whatever the callback returns.  If returning error handling
 * information, you should include enough information to determine which
 * result you're referring to.
 *
 * Don't combine returning bools and returning something else.
 *
 * Note that returning false will not stop the foreach.
 *
 * @warning If your callback or foreach loop deletes or disable entities
 * you MUST call setIncrementOffset(false) or set that when instantiating.
 * This forces the offset to stay what it was in the $options array.
 *
 * @example
 * <code>
 * // using foreach
 * $batch = new ElggBatch('elgg_get_entities', array());
 * $batch->setIncrementOffset(false);
 *
 * foreach ($batch as $entity) {
 * 	$entity->disable();
 * }
 *
 * // using both a callback
 * $callback = function($result, $getter, $options) {
 * 	var_dump("Looking at annotation id: $result->id");
 *  return true;
 * }
 *
 * $batch = new ElggBatch('elgg_get_annotations', array('guid' => 2), $callback);
 * </code>
 *
 * @package    Elgg.Core
 * @subpackage DataModel
 * @link       http://docs.elgg.org/DataModel/ElggBatch
 * @since      1.8
 */
class ElggBatch
	implements Iterator {

	/**
	 * The objects to interator over.
	 *
	 * @var array
	 */
	private $results = array();

	/**
	 * The function used to get results.
	 *
	 * @var mixed A string, array, or closure, or lamda function
	 */
	private $getter = null;

	/**
	 * The number of results to grab at a time.
	 *
	 * @var int
	 */
	private $chunkSize = 25;

	/**
	 * A callback function to pass results through.
	 *
	 * @var mixed A string, array, or closure, or lamda function
	 */
	private $callback = null;

	/**
	 * Start after this many results.
	 *
	 * @var int
	 */
	private $offset = 0;

	/**
	 * Stop after this many results.
	 *
	 * @var int
	 */
	private $limit = 0;

	/**
	 * Number of processed results.
	 *
	 * @var int
	 */
	private $retrievedResults = 0;

	/**
	 * The index of the current result within the current chunk
	 *
	 * @var int
	 */
	private $resultIndex = 0;

	/**
	 * The index of the current chunk
	 *
	 * @var int
	 */
	private $chunkIndex = 0;

	/**
	 * The number of results iterated through
	 *
	 * @var int
	 */
	private $processedResults = 0;

	/**
	 * Is the getter a valid callback
	 *
	 * @var bool
	 */
	private $validGetter = null;

	/**
	 * The result of running all entities through the callback function.
	 *
	 * @var mixed
	 */
	public $callbackResult = null;

	/**
	 * If false, offset will not be incremented. This is used for callbacks/loops that delete.
	 *
	 * @var bool
	 */
	private $incrementOffset = true;

	/**
	 * Batches operations on any elgg_get_*() or compatible function that supports
	 * an options array.
	 *
	 * Instead of returning all objects in memory, it goes through $chunk_size
	 * objects, then requests more from the server.  This avoids OOM errors.
	 *
	 * @param string $getter     The function used to get objects.  Usually
	 *                           an elgg_get_*() function, but can be any valid PHP callback.
	 * @param array  $options    The options array to pass to the getter function. If limit is
	 *                           not set, 10 is used as the default. In most cases that is not
	 *                           what you want.
	 * @param mixed  $callback   An optional callback function that all results will be passed
	 *                           to upon load.  The callback needs to accept $result, $getter,
	 *                           $options.
	 * @param int    $chunk_size The number of entities to pull in before requesting more.
	 *                           You have to balance this between running out of memory in PHP
	 *                           and hitting the db server too often.
	 * @param bool   $inc_offset Increment the offset on each fetch. This must be false for
	 *                           callbacks that delete rows. You can set this after the
	 *                           object is created with {@see ElggBatch::setIncrementOffset()}.
	 */
	public function __construct($getter, $options, $callback = null, $chunk_size = 25,
			$inc_offset = true) {
		
		$this->getter = $getter;
		$this->options = $options;
		$this->callback = $callback;
		$this->chunkSize = $chunk_size;
		$this->setIncrementOffset($inc_offset);

		if ($this->chunkSize <= 0) {
			$this->chunkSize = 25;
		}

		// store these so we can compare later
		$this->offset = elgg_extract('offset', $options, 0);
		$this->limit = elgg_extract('limit', $options, 10);

		// if passed a callback, create a new ElggBatch with the same options
		// and pass each to the callback.
		if ($callback && is_callable($callback)) {
			$batch = new ElggBatch($getter, $options, null, $chunk_size, $inc_offset);

			$all_results = null;

			foreach ($batch as $result) {
				if (is_string($callback)) {
					$result = $callback($result, $getter, $options);
				} else {
					$result = call_user_func_array($callback, array($result, $getter, $options));
				}

				if (!isset($all_results)) {
					if ($result === true || $result === false || $result === null) {
						$all_results = $result;
					} else {
						$all_results = array();
					}
				}

				if (($result === true || $result === false || $result === null) && !is_array($all_results)) {
					$all_results = $result && $all_results;
				} else {
					$all_results[] = $result;
				}
			}

			$this->callbackResult = $all_results;
		}
	}

	/**
	 * Fetches the next chunk of results
	 *
	 * @return bool
	 */
	private function getNextResultsChunk() {
		// reset memory caches after first chunk load
		if ($this->chunkIndex > 0) {
			global $DB_QUERY_CACHE, $ENTITY_CACHE;
			$DB_QUERY_CACHE = $ENTITY_CACHE = array();
		}

		// always reset results.
		$this->results = array();

		if (!isset($this->validGetter)) {
			$this->validGetter = is_callable($this->getter);
		}

		if (!$this->validGetter) {
			return false;
		}

		$limit = $this->chunkSize;

		// if someone passed limit = 0 they want everything.
		if ($this->limit != 0) {
			if ($this->retrievedResults >= $this->limit) {
				return false;
			}

			// if original limit < chunk size, set limit to original limit
			// else if the number of results we'll fetch if greater than the original limit
			if ($this->limit < $this->chunkSize) {
				$limit = $this->limit;
			} elseif ($this->retrievedResults + $this->chunkSize > $this->limit) {
				// set the limit to the number of results remaining in the original limit
				$limit = $this->limit - $this->retrievedResults;
			}
		}

		if ($this->incrementOffset) {
			$offset = $this->offset + $this->retrievedResults;
		} else {
			$offset = $this->offset;
		}

		$current_options = array(
			'limit' => $limit,
			'offset' => $offset
		);

		$options = array_merge($this->options, $current_options);
		$getter = $this->getter;

		if (is_string($getter)) {
			$this->results = $getter($options);
		} else {
			$this->results = call_user_func_array($getter, array($options));
		}

		if ($this->results) {
			$this->chunkIndex++;
			$this->resultIndex = 0;
			$this->retrievedResults += count($this->results);
			return true;
		} else {
			return false;
		}
	}

	/**
	 * Increment the offset from the original options array? Setting to
	 * false is required for callbacks that delete rows.
	 *
	 * @param bool $increment
	 */
	public function setIncrementOffset($increment = true) {
		$this->incrementOffset = (bool) $increment;
	}

	/**
	 * Implements Iterator
	 */

	/**
	 * PHP Iterator Interface
	 *
	 * @see Iterator::rewind()
	 * @return void
	 */
	public function rewind() {
		$this->resultIndex = 0;
		$this->retrievedResults = 0;
		$this->processedResults = 0;

		// only grab results if we haven't yet or we're crossing chunks
		if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) {
			$this->chunkIndex = 0;
			$this->getNextResultsChunk();
		}
	}

	/**
	 * PHP Iterator Interface
	 *
	 * @see Iterator::current()
	 * @return mixed
	 */
	public function current() {
		return current($this->results);
	}

	/**
	 * PHP Iterator Interface
	 *
	 * @see Iterator::key()
	 * @return int
	 */
	public function key() {
		return $this->processedResults;
	}

	/**
	 * PHP Iterator Interface
	 *
	 * @see Iterator::next()
	 * @return mixed
	 */
	public function next() {
		// if we'll be at the end.
		if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) {
			$this->results = array();
			return false;
		}

		// if we'll need new results.
		if (($this->resultIndex + 1) >= $this->chunkSize) {
			if (!$this->getNextResultsChunk()) {
				$this->results = array();
				return false;
			}

			$result = current($this->results);
		} else {
			// the function above resets the indexes, so only inc if not
			// getting new set
			$this->resultIndex++;
			$result = next($this->results);
		}

		$this->processedResults++;
		return $result;
	}

	/**
	 * PHP Iterator Interface
	 *
	 * @see Iterator::valid()
	 * @return bool
	 */
	public function valid() {
		if (!is_array($this->results)) {
			return false;
		}
		$key = key($this->results);
		return ($key !== NULL && $key !== FALSE);
	}
}