From 33c786c1b7838d38e9797b8731d4e45c9ceb1aac Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Mon, 26 Nov 2012 01:17:39 -0500 Subject: Fixes #4929: Optimize elgg_get_entities and add attribute loader --- engine/classes/ElggAttributeLoader.php | 199 +++++++++++++++++++++++++++ engine/classes/ElggGroup.php | 37 ++--- engine/classes/ElggObject.php | 37 ++--- engine/classes/ElggPlugin.php | 58 -------- engine/classes/ElggSite.php | 37 ++--- engine/classes/ElggUser.php | 36 ++--- engine/classes/IncompleteEntityException.php | 10 ++ engine/lib/entities.php | 106 +++++++++++++- engine/lib/plugins.php | 1 + 9 files changed, 349 insertions(+), 172 deletions(-) create mode 100644 engine/classes/ElggAttributeLoader.php create mode 100644 engine/classes/IncompleteEntityException.php diff --git a/engine/classes/ElggAttributeLoader.php b/engine/classes/ElggAttributeLoader.php new file mode 100644 index 000000000..602bb8bae --- /dev/null +++ b/engine/classes/ElggAttributeLoader.php @@ -0,0 +1,199 @@ +class = $class; + + if (!is_string($required_type)) { + throw new InvalidArgumentException('$requiredType must be a system entity type.'); + } + $this->required_type = $required_type; + + $this->initialized_attributes = $initialized_attrs; + unset($initialized_attrs['tables_split'], $initialized_attrs['tables_loaded']); + $all_attr_names = array_keys($initialized_attrs); + $this->secondary_attr_names = array_diff($all_attr_names, self::$primary_attr_names); + } + + protected function isMissingPrimaries($row) { + return array_diff(self::$primary_attr_names, array_keys($row)) !== array(); + } + + protected function isMissingSecondaries($row) { + return array_diff($this->secondary_attr_names, array_keys($row)) !== array(); + } + + protected function checkType($row) { + if ($row['type'] !== $this->required_type) { + $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($row['guid'], $this->class)); + throw new InvalidClassException($msg); + } + } + + /** + * Get all required attributes for the entity, validating any that are passed in. Returns empty array + * if can't be loaded (Check $failure_reason). + * + * This function splits loading between "primary" attributes (those in {prefix}entities table) and + * "secondary" attributes (e.g. those in {prefix}objects_entity), but can load all at once if a + * combined loader is available. + * + * @param mixed $row a row loaded from DB (array or stdClass) or a GUID + * @return array will be empty if failed to load all attributes (access control or entity doesn't exist) + * + * @throws InvalidArgumentException|LogicException|IncompleteEntityException + */ + public function getRequiredAttributes($row) { + if (!is_array($row) && !($row instanceof stdClass)) { + // assume row is the GUID + $row = array('guid' => $row); + } + $row = (array) $row; + if (empty($row['guid'])) { + throw new InvalidArgumentException('$row must be or contain a GUID'); + } + + // these must be present to support isFullyLoaded() + foreach (array('tables_split', 'tables_loaded') as $key) { + if (isset($this->initialized_attributes[$key])) { + $row[$key] = $this->initialized_attributes[$key]; + } + } + + $was_missing_primaries = $this->isMissingPrimaries($row); + $was_missing_secondaries = $this->isMissingSecondaries($row); + + // some types have a function to load all attributes at once, it should be faster + if (($was_missing_primaries || $was_missing_secondaries) && is_callable($this->full_loader)) { + $fetched = (array) call_user_func($this->full_loader, $row['guid']); + if (!$fetched) { + return array(); + } + $row = array_merge($row, $fetched); + $this->checkType($row); + } else { + if ($was_missing_primaries) { + if (!is_callable($this->primary_loader)) { + throw new LogicException('Primary attribute loader must be callable'); + } + if (!$this->requires_access_control) { + $ignoring_access = elgg_set_ignore_access(); + } + $fetched = (array) call_user_func($this->primary_loader, $row['guid']); + if (!$this->requires_access_control) { + elgg_set_ignore_access($ignoring_access); + } + if (!$fetched) { + return array(); + } + $row = array_merge($row, $fetched); + } + + // We must test type before trying to load the secondaries so that InvalidClassException + // gets thrown. Otherwise the secondary loader will fail and return false. + $this->checkType($row); + + if ($was_missing_secondaries) { + if (!is_callable($this->secondary_loader)) { + throw new LogicException('Secondary attribute loader must be callable'); + } + $fetched = (array) call_user_func($this->secondary_loader, $row['guid']); + if (!$fetched) { + if ($row['type'] === 'site') { + // A special case is needed for sites: When vanilla ElggEntities are created and + // saved, these are stored w/ type "site", but with no sites_entity row. These + // are probably only created in the unit tests. + // @todo Don't save vanilla ElggEntities with type "site" + $row['guid'] = (int) $row['guid']; + return $row; + } + throw new IncompleteEntityException("Secondary loader failed to return row for {$row['guid']}"); + } + $row = array_merge($row, $fetched); + } + } + + // loading complete: re-check missing and check type + if (($was_missing_primaries && $this->isMissingPrimaries($row)) + || ($was_missing_secondaries && $this->isMissingSecondaries($row))) { + throw new LogicException('Attribute loaders failed to return proper attributes'); + } + + // guid needs to be an int http://trac.elgg.org/ticket/4111 + $row['guid'] = (int) $row['guid']; + + return $row; + } +} diff --git a/engine/classes/ElggGroup.php b/engine/classes/ElggGroup.php index 121186196..ea257f368 100644 --- a/engine/classes/ElggGroup.php +++ b/engine/classes/ElggGroup.php @@ -324,37 +324,18 @@ class ElggGroup extends ElggEntity * @return bool */ protected function load($guid) { - // Test to see if we have the generic stuff - if (!parent::load($guid)) { - return false; - } + $attr_loader = new ElggAttributeLoader(get_class(), 'group', $this->attributes); + $attr_loader->requires_access_control = !($this instanceof ElggPlugin); + $attr_loader->secondary_loader = 'get_group_entity_as_row'; - // Only work with GUID from here - if ($guid instanceof stdClass) { - $guid = $guid->guid; - } - - // Check the type - if ($this->attributes['type'] != 'group') { - $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($guid, get_class())); - throw new InvalidClassException($msg); - } - - // Load missing data - $row = get_group_entity_as_row($guid); - if (($row) && (!$this->isFullyLoaded())) { - // If $row isn't a cached copy then increment the counter - $this->attributes['tables_loaded']++; - } - - // Now put these into the attributes array as core values - $objarray = (array) $row; - foreach ($objarray as $key => $value) { - $this->attributes[$key] = $value; + $attrs = $attr_loader->getRequiredAttributes($guid); + if (!$attrs) { + return false; } - // guid needs to be an int http://trac.elgg.org/ticket/4111 - $this->attributes['guid'] = (int)$this->attributes['guid']; + $this->attributes = $attrs; + $this->attributes['tables_loaded'] = 2; + cache_entity($this); return true; } diff --git a/engine/classes/ElggObject.php b/engine/classes/ElggObject.php index fa6296c8c..84f6e09c8 100644 --- a/engine/classes/ElggObject.php +++ b/engine/classes/ElggObject.php @@ -99,37 +99,18 @@ class ElggObject extends ElggEntity { * @throws InvalidClassException */ protected function load($guid) { - // Load data from entity table if needed - if (!parent::load($guid)) { - return false; - } + $attr_loader = new ElggAttributeLoader(get_class(), 'object', $this->attributes); + $attr_loader->requires_access_control = !($this instanceof ElggPlugin); + $attr_loader->secondary_loader = 'get_object_entity_as_row'; - // Only work with GUID from here - if ($guid instanceof stdClass) { - $guid = $guid->guid; - } - - // Check the type - if ($this->attributes['type'] != 'object') { - $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($guid, get_class())); - throw new InvalidClassException($msg); - } - - // Load missing data - $row = get_object_entity_as_row($guid); - if (($row) && (!$this->isFullyLoaded())) { - // If $row isn't a cached copy then increment the counter - $this->attributes['tables_loaded']++; - } - - // Now put these into the attributes array as core values - $objarray = (array) $row; - foreach ($objarray as $key => $value) { - $this->attributes[$key] = $value; + $attrs = $attr_loader->getRequiredAttributes($guid); + if (!$attrs) { + return false; } - // guid needs to be an int http://trac.elgg.org/ticket/4111 - $this->attributes['guid'] = (int)$this->attributes['guid']; + $this->attributes = $attrs; + $this->attributes['tables_loaded'] = 2; + cache_entity($this); return true; } diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index 3e43c8e81..33f14ae37 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -78,64 +78,6 @@ class ElggPlugin extends ElggObject { } } - /** - * Overridden from ElggEntity and ElggObject::load(). Core always inits plugins with - * a query joined to the objects_entity table, so all the info is there. - * - * @param mixed $guid GUID of an ElggObject or the stdClass object from entities table - * - * @return bool - * @throws InvalidClassException - */ - protected function load($guid) { - - $expected_attributes = $this->attributes; - unset($expected_attributes['tables_split']); - unset($expected_attributes['tables_loaded']); - - // this was loaded with a full join - $needs_loaded = false; - - if ($guid instanceof stdClass) { - $row = (array) $guid; - $missing_attributes = array_diff_key($expected_attributes, $row); - if ($missing_attributes) { - $needs_loaded = true; - $guid = $row['guid']; - } else { - $this->attributes = $row; - } - } else { - $needs_loaded = true; - } - - if ($needs_loaded) { - $entity = (array) get_entity_as_row($guid); - $object = (array) get_object_entity_as_row($guid); - - if (!$entity || !$object) { - return false; - } - - $this->attributes = array_merge($this->attributes, $entity, $object); - } - - $this->attributes['tables_loaded'] = 2; - - // Check the type - if ($this->attributes['type'] != 'object') { - $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($guid, get_class())); - throw new InvalidClassException($msg); - } - - // guid needs to be an int http://trac.elgg.org/ticket/4111 - $this->attributes['guid'] = (int)$this->attributes['guid']; - - cache_entity($this); - - return true; - } - /** * Save the plugin object. Make sure required values exist. * diff --git a/engine/classes/ElggSite.php b/engine/classes/ElggSite.php index 401939005..f7f5b68ea 100644 --- a/engine/classes/ElggSite.php +++ b/engine/classes/ElggSite.php @@ -117,37 +117,18 @@ class ElggSite extends ElggEntity { * @throws InvalidClassException */ protected function load($guid) { - // Test to see if we have the generic stuff - if (!parent::load($guid)) { - return false; - } + $attr_loader = new ElggAttributeLoader(get_class(), 'site', $this->attributes); + $attr_loader->requires_access_control = !($this instanceof ElggPlugin); + $attr_loader->secondary_loader = 'get_site_entity_as_row'; - // Only work with GUID from here - if ($guid instanceof stdClass) { - $guid = $guid->guid; - } - - // Check the type - if ($this->attributes['type'] != 'site') { - $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($guid, get_class())); - throw new InvalidClassException($msg); - } - - // Load missing data - $row = get_site_entity_as_row($guid); - if (($row) && (!$this->isFullyLoaded())) { - // If $row isn't a cached copy then increment the counter - $this->attributes['tables_loaded']++; - } - - // Now put these into the attributes array as core values - $objarray = (array) $row; - foreach ($objarray as $key => $value) { - $this->attributes[$key] = $value; + $attrs = $attr_loader->getRequiredAttributes($guid); + if (!$attrs) { + return false; } - // guid needs to be an int http://trac.elgg.org/ticket/4111 - $this->attributes['guid'] = (int)$this->attributes['guid']; + $this->attributes = $attrs; + $this->attributes['tables_loaded'] = 2; + cache_entity($this); return true; } diff --git a/engine/classes/ElggUser.php b/engine/classes/ElggUser.php index d7bb89265..6c1cdc1de 100644 --- a/engine/classes/ElggUser.php +++ b/engine/classes/ElggUser.php @@ -106,37 +106,17 @@ class ElggUser extends ElggEntity * @return bool */ protected function load($guid) { - // Test to see if we have the generic stuff - if (!parent::load($guid)) { - return false; - } + $attr_loader = new ElggAttributeLoader(get_class(), 'user', $this->attributes); + $attr_loader->secondary_loader = 'get_user_entity_as_row'; - // Only work with GUID from here - if ($guid instanceof stdClass) { - $guid = $guid->guid; - } - - // Check the type - if ($this->attributes['type'] != 'user') { - $msg = elgg_echo('InvalidClassException:NotValidElggStar', array($guid, get_class())); - throw new InvalidClassException($msg); - } - - // Load missing data - $row = get_user_entity_as_row($guid); - if (($row) && (!$this->isFullyLoaded())) { - // If $row isn't a cached copy then increment the counter - $this->attributes['tables_loaded']++; - } - - // Now put these into the attributes array as core values - $objarray = (array) $row; - foreach ($objarray as $key => $value) { - $this->attributes[$key] = $value; + $attrs = $attr_loader->getRequiredAttributes($guid); + if (!$attrs) { + return false; } - // guid needs to be an int http://trac.elgg.org/ticket/4111 - $this->attributes['guid'] = (int)$this->attributes['guid']; + $this->attributes = $attrs; + $this->attributes['tables_loaded'] = 2; + cache_entity($this); return true; } diff --git a/engine/classes/IncompleteEntityException.php b/engine/classes/IncompleteEntityException.php new file mode 100644 index 000000000..8c86edcc6 --- /dev/null +++ b/engine/classes/IncompleteEntityException.php @@ -0,0 +1,10 @@ + 'title', + 'user' => 'password', + 'group' => 'name', + ); + + $rows = get_data($sql); + + // guids to look up in each type + $lookup_types = array(); + // maps GUIDs to the $rows key + $guid_to_key = array(); + + if (isset($rows[0]->type, $rows[0]->subtype) + && $rows[0]->type === 'object' + && $rows[0]->subtype == $plugin_subtype) { + // Likely the entire resultset is plugins, which have already been optimized + // to JOIN the secondary table. In this case we allow retrieving from cache, + // but abandon the extra queries. + $types_to_optimize = array(); + } + + // First pass: use cache where possible, gather GUIDs that we're optimizing + foreach ($rows as $i => $row) { + if (empty($row->guid) || empty($row->type)) { + throw new LogicException('Entity row missing guid or type'); + } + if ($entity = retrieve_cached_entity($row->guid)) { + $rows[$i] = $entity; + continue; + } + if (isset($types_to_optimize[$row->type])) { + // check if row already looks JOINed. + if (isset($row->{$types_to_optimize[$row->type]})) { + // Row probably already contains JOINed secondary table. Don't make another query just + // to pull data that's already there + continue; + } + $lookup_types[$row->type][] = $row->guid; + $guid_to_key[$row->guid] = $i; + } + } + // Do secondary queries and merge rows + if ($lookup_types) { + $dbprefix = elgg_get_config('dbprefix'); + } + foreach ($lookup_types as $type => $guids) { + $set = "(" . implode(',', $guids) . ")"; + $sql = "SELECT * FROM {$dbprefix}{$type}s_entity WHERE guid IN $set"; + $secondary_rows = get_data($sql); + if ($secondary_rows) { + foreach ($secondary_rows as $secondary_row) { + $key = $guid_to_key[$secondary_row->guid]; + // cast to arrays to merge then cast back + $rows[$key] = (object)array_merge((array)$rows[$key], (array)$secondary_row); + } + } + } + // Second pass to finish conversion + foreach ($rows as $i => $row) { + if ($row instanceof ElggEntity) { + continue; + } else { + try { + $rows[$i] = entity_row_to_elggstar($row); + } catch (IncompleteEntityException $e) { + // don't let incomplete entities throw fatal errors + unset($rows[$i]); + } + } + } + return $rows; +} + /** * Returns SQL where clause for type and subtype on main entity table * diff --git a/engine/lib/plugins.php b/engine/lib/plugins.php index d5cd4fe76..cc1b4b9c7 100644 --- a/engine/lib/plugins.php +++ b/engine/lib/plugins.php @@ -190,6 +190,7 @@ function elgg_get_plugin_from_id($plugin_id) { 'type' => 'object', 'subtype' => 'plugin', 'joins' => array("JOIN {$db_prefix}objects_entity oe on oe.guid = e.guid"), + 'selects' => array("oe.title", "oe.description"), 'wheres' => array("oe.title = '$plugin_id'"), 'limit' => 1 ); -- cgit v1.2.3