From 61274b8cec92ee86dec24c99fd6ef180c1681ab2 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Fri, 1 Jun 2012 17:02:18 -0400 Subject: Fixes #4290: adds volatile metadata cache, unit tests, and pre-loading for fetched entities --- engine/classes/ElggEntity.php | 44 ++-- engine/classes/ElggMetadata.php | 34 ++- engine/classes/ElggPlugin.php | 6 +- engine/classes/ElggVolatileMetadataCache.php | 344 +++++++++++++++++++++++++++ 4 files changed, 400 insertions(+), 28 deletions(-) create mode 100644 engine/classes/ElggVolatileMetadataCache.php (limited to 'engine/classes') diff --git a/engine/classes/ElggEntity.php b/engine/classes/ElggEntity.php index 77c2bbf4d..929abceb2 100644 --- a/engine/classes/ElggEntity.php +++ b/engine/classes/ElggEntity.php @@ -248,7 +248,9 @@ abstract class ElggEntity extends ElggData implements * @return mixed The value, or NULL if not found. */ public function getMetaData($name) { - if ((int) ($this->guid) == 0) { + $guid = $this->getGUID(); + + if (! $guid) { if (isset($this->temp_metadata[$name])) { // md is returned as an array only if more than 1 entry if (count($this->temp_metadata[$name]) == 1) { @@ -261,21 +263,38 @@ abstract class ElggEntity extends ElggData implements } } + // upon first cache miss, just load/cache all the metadata and retry. + // if this works, the rest of this function may not be needed! + $cache = elgg_get_metadata_cache(); + if ($cache->isKnown($guid, $name)) { + return $cache->load($guid, $name); + } else { + $cache->populateFromEntities(array($guid)); + // in case ignore_access was on, we have to check again... + if ($cache->isKnown($guid, $name)) { + return $cache->load($guid, $name); + } + } + $md = elgg_get_metadata(array( - 'guid' => $this->getGUID(), + 'guid' => $guid, 'metadata_name' => $name, 'limit' => 0, )); + $value = null; + if ($md && !is_array($md)) { - return $md->value; + $value = $md->value; } elseif (count($md) == 1) { - return $md[0]->value; + $value = $md[0]->value; } else if ($md && is_array($md)) { - return metadata_array_to_values($md); + $value = metadata_array_to_values($md); } - return null; + $cache->save($guid, $name, $value); + + return $value; } /** @@ -1007,7 +1026,7 @@ abstract class ElggEntity extends ElggData implements /** * Returns the guid. * - * @return int GUID + * @return int|null GUID */ public function getGUID() { return $this->get('guid'); @@ -1245,16 +1264,16 @@ abstract class ElggEntity extends ElggData implements /** * Save an entity. * - * @return bool/int + * @return bool|int * @throws IOException */ public function save() { - $guid = (int) $this->guid; + $guid = $this->getGUID(); if ($guid > 0) { cache_entity($this); return update_entity( - $this->get('guid'), + $guid, $this->get('owner_guid'), $this->get('access_id'), $this->get('container_guid'), @@ -1301,10 +1320,7 @@ abstract class ElggEntity extends ElggData implements $this->attributes['subtype'] = get_subtype_id($this->attributes['type'], $this->attributes['subtype']); - // Cache object handle - if ($this->attributes['guid']) { - cache_entity($this); - } + cache_entity($this); return $this->attributes['guid']; } diff --git a/engine/classes/ElggMetadata.php b/engine/classes/ElggMetadata.php index 634a122e5..7f45dc3ea 100644 --- a/engine/classes/ElggMetadata.php +++ b/engine/classes/ElggMetadata.php @@ -26,8 +26,6 @@ class ElggMetadata extends ElggExtender { * Construct a metadata object * * @param mixed $id ID of metadata or a database row as stdClass object - * - * @return void */ function __construct($id = null) { $this->initializeAttributes(); @@ -54,7 +52,7 @@ class ElggMetadata extends ElggExtender { * * @param int $user_guid The GUID of the user (defaults to currently logged in user) * - * @return true|false Depending on permissions + * @return bool Depending on permissions */ function canEdit($user_guid = 0) { if ($entity = get_entity($this->get('entity_guid'))) { @@ -64,9 +62,11 @@ class ElggMetadata extends ElggExtender { } /** - * Save matadata object + * Save metadata object * - * @return int the metadata object id + * @return int|bool the metadata object id or true if updated + * + * @throws IOException */ function save() { if ($this->id > 0) { @@ -89,7 +89,13 @@ class ElggMetadata extends ElggExtender { * @return bool */ function delete() { - return elgg_delete_metastring_based_object_by_id($this->id, 'metadata'); + $success = elgg_delete_metastring_based_object_by_id($this->id, 'metadata'); + if ($success) { + // we mark unknown here because this deletes only one value + // under this name, and there may be others remaining. + elgg_get_metadata_cache()->markUnknown($this->entity_guid, $this->name); + } + return $success; } /** @@ -99,17 +105,27 @@ class ElggMetadata extends ElggExtender { * @since 1.8 */ function disable() { - return elgg_set_metastring_based_object_enabled_by_id($this->id, 'no', 'metadata'); + $success = elgg_set_metastring_based_object_enabled_by_id($this->id, 'no', 'metadata'); + if ($success) { + // we mark unknown here because this disables only one value + // under this name, and there may be others remaining. + elgg_get_metadata_cache()->markUnknown($this->entity_guid, $this->name); + } + return $success; } /** - * Disable the metadata + * Enable the metadata * * @return bool * @since 1.8 */ function enable() { - return elgg_set_metastring_based_object_enabled_by_id($this->id, 'yes', 'metadata'); + $success = elgg_set_metastring_based_object_enabled_by_id($this->id, 'yes', 'metadata'); + if ($success) { + elgg_get_metadata_cache()->markUnknown($this->entity_guid, $this->name); + } + return $success; } /** diff --git a/engine/classes/ElggPlugin.php b/engine/classes/ElggPlugin.php index 8c9093834..3e43c8e81 100644 --- a/engine/classes/ElggPlugin.php +++ b/engine/classes/ElggPlugin.php @@ -101,7 +101,6 @@ class ElggPlugin extends ElggObject { $missing_attributes = array_diff_key($expected_attributes, $row); if ($missing_attributes) { $needs_loaded = true; - $old_guid = $guid; $guid = $row['guid']; } else { $this->attributes = $row; @@ -132,10 +131,7 @@ class ElggPlugin extends ElggObject { // guid needs to be an int http://trac.elgg.org/ticket/4111 $this->attributes['guid'] = (int)$this->attributes['guid']; - // cache the entity - if ($this->attributes['guid']) { - cache_entity($this); - } + cache_entity($this); return true; } diff --git a/engine/classes/ElggVolatileMetadataCache.php b/engine/classes/ElggVolatileMetadataCache.php new file mode 100644 index 000000000..24ae58d42 --- /dev/null +++ b/engine/classes/ElggVolatileMetadataCache.php @@ -0,0 +1,344 @@ +getIgnoreAccess()) { + $this->values[$entity_guid] = $values; + $this->isSynchronized[$entity_guid] = true; + } + } + + /** + * @param int $entity_guid + * + * @return array + */ + public function loadAll($entity_guid) { + if (isset($this->values[$entity_guid])) { + return $this->values[$entity_guid]; + } else { + return array(); + } + } + + /** + * Declare that there may be fetch-able metadata names in storage that this + * cache doesn't know about + * + * @param int $entity_guid + */ + public function markOutOfSync($entity_guid) { + unset($this->isSynchronized[$entity_guid]); + } + + /** + * @param $entity_guid + * + * @return bool + */ + public function isSynchronized($entity_guid) { + return isset($this->isSynchronized[$entity_guid]); + } + + /** + * @param int $entity_guid + * + * @param string $name + * + * @param array|int|string|null $value null means it is known that there is no + * fetch-able metadata under this name + * @param bool $allow_multiple + */ + public function save($entity_guid, $name, $value, $allow_multiple = false) { + if ($this->getIgnoreAccess()) { + // we don't know if what gets saves here will be available to user once + // access control returns, hence it's best to forget :/ + $this->markUnknown($entity_guid, $name); + } else { + if ($allow_multiple) { + if ($this->isKnown($entity_guid, $name)) { + $existing = $this->load($entity_guid, $name); + if ($existing !== null) { + $existing = (array) $existing; + $existing[] = $value; + $value = $existing; + } + } else { + // we don't know whether there are unknown values, so it's + // safest to leave that assumption + $this->markUnknown($entity_guid, $name); + return; + } + } + $this->values[$entity_guid][$name] = $value; + } + } + + /** + * Warning: You should always call isKnown() beforehand to verify that this + * function's return value should be trusted (otherwise a null return value + * is ambiguous). + * + * @param int $entity_guid + * + * @param string $name + * + * @return array|string|int|null null = value does not exist + */ + public function load($entity_guid, $name) { + if (isset($this->values[$entity_guid]) && array_key_exists($name, $this->values[$entity_guid])) { + return $this->values[$entity_guid][$name]; + } else { + return null; + } + } + + /** + * Forget about this metadata entry. We don't want to try to guess what the + * next fetch from storage will return + * + * @param int $entity_guid + * + * @param string $name + */ + public function markUnknown($entity_guid, $name) { + unset($this->values[$entity_guid][$name]); + $this->markOutOfSync($entity_guid); + } + + /** + * If true, load() will return an accurate value for this name + * + * @param int $entity_guid + * + * @param string $name + * + * @return bool + */ + public function isKnown($entity_guid, $name) { + if (isset($this->isSynchronized[$entity_guid])) { + return true; + } else { + return (isset($this->values[$entity_guid]) && array_key_exists($name, $this->values[$entity_guid])); + } + + } + + /** + * Declare that metadata under this name is known to be not fetch-able from storage + * + * @param int $entity_guid + * + * @param string $name + * + * @return array + */ + public function markEmpty($entity_guid, $name) { + $this->values[$entity_guid][$name] = null; + } + + /** + * Forget about all metadata for an entity + * + * @param int $entity_guid + */ + public function clear($entity_guid) { + $this->values[$entity_guid] = array(); + $this->markOutOfSync($entity_guid); + } + + /** + * Clear entire cache and mark all entities as out of sync + */ + public function flush() { + $this->values = array(); + $this->isSynchronized = array(); + } + + /** + * Use this value instead of calling elgg_get_ignore_access(). By default that + * function will be called. + * + * This setting makes this component a little more loosely-coupled. + * + * @param bool $ignore + */ + public function setIgnoreAccess($ignore) { + $this->ignoreAccess = (bool) $ignore; + } + + /** + * Tell the cache to call elgg_get_ignore_access() to determing access status. + */ + public function unsetIgnoreAccess() { + $this->ignoreAccess = null; + } + + /** + * @return bool + */ + protected function getIgnoreAccess() { + if (null === $this->ignoreAccess) { + return elgg_get_ignore_access(); + } else { + return $this->ignoreAccess; + } + } + + /** + * Invalidate based on options passed to the global *_metadata functions + * + * @param string $action Action performed on metadata. "delete", "disable", or "enable" + * + * @param array $options Options passed to elgg_(delete|disable|enable)_metadata + * + * "guid" if given, invalidation will be limited to this entity + * + * "metadata_name" if given, invalidation will be limited to metadata with this name + */ + public function invalidateByOptions($action, array $options) { + // remove as little as possible, optimizing for common cases + if (empty($options['guid'])) { + // safest to clear everything unless we want to make this even more complex :( + $this->flush(); + } else { + if (empty($options['metadata_name'])) { + // safest to clear the whole entity + $this->clear($options['guid']); + } else { + switch ($action) { + case 'delete': + $this->markEmpty($options['guid'], $options['metadata_name']); + break; + default: + $this->markUnknown($options['guid'], $options['metadata_name']); + } + } + } + } + + /** + * @param int|array $guids + */ + public function populateFromEntities($guids) { + if (empty($guids)) { + return; + } + if (!is_array($guids)) { + $guids = array($guids); + } + $guids = array_unique($guids); + + // could be useful at some point in future + //$guids = $this->filterMetadataHeavyEntities($guids); + + $db_prefix = elgg_get_config('dbprefix'); + $options = array( + 'guids' => $guids, + 'limit' => 0, + 'callback' => false, + 'joins' => array( + "JOIN {$db_prefix}metastrings v ON n_table.value_id = v.id", + "JOIN {$db_prefix}metastrings n ON n_table.name_id = n.id", + ), + 'selects' => array('n.string AS name', 'v.string AS value'), + 'order_by' => 'n_table.entity_guid, n_table.time_created ASC', + ); + $data = elgg_get_metadata($options); + + // build up metadata for each entity, save when GUID changes (or data ends) + $last_guid = null; + $metadata = array(); + $last_row_idx = count($data) - 1; + foreach ($data as $i => $row) { + $name = $row->name; + $value = ($row->value_type === 'text') ? $row->value : (int) $row->value; + $guid = $row->entity_guid; + if ($guid !== $last_guid) { + if ($last_guid) { + $this->saveAll($last_guid, $metadata); + } + $metadata = array(); + } + if (isset($metadata[$name])) { + $metadata[$name] = (array) $metadata[$name]; + $metadata[$name][] = $value; + } else { + $metadata[$name] = $value; + } + if (($i == $last_row_idx)) { + $this->saveAll($guid, $metadata); + } + $last_guid = $guid; + } + } + + /** + * Filter out entities whose concatenated metadata values (INTs casted as string) + * exceed a threshold in characters. This could be used to avoid overpopulating the + * cache if RAM usage becomes an issue. + * + * @param array $guids GUIDs of entities to examine + * + * @param int $limit Limit in characters of all metadata (with ints casted to strings) + * + * @return array + */ + public function filterMetadataHeavyEntities(array $guids, $limit = 1024000) { + $db_prefix = elgg_get_config('dbprefix'); + + $options = array( + 'guids' => $guids, + 'limit' => 0, + 'callback' => false, + 'joins' => "JOIN {$db_prefix}metastrings v ON n_table.value_id = v.id", + 'selects' => array('SUM(LENGTH(v.string)) AS bytes'), + 'order_by' => 'n_table.entity_guid, n_table.time_created ASC', + 'group_by' => 'n_table.entity_guid', + ); + $data = elgg_get_metadata($options); + // don't cache if metadata for entity is over 10MB (or rolled INT) + foreach ($data as $row) { + if ($row->bytes > $limit || $row->bytes < 0) { + array_splice($guids, array_search($row->entity_guid, $guids), 1); + } + } + return $guids; + } +} -- cgit v1.2.3