/*
- $Id: dircache.c,v 1.1.2.7 2010-02-11 13:06:54 franklahm Exp $
Copyright (c) 2010 Frank Lahm <franklahm@gmail.com>
This program is free software; you can redistribute it and/or modify
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
+#include <time.h>
#include <atalk/util.h>
#include <atalk/cnid.h>
#include "globals.h"
/*
- * Dircache and indexes
- * ====================
+ * Directory Cache
+ * ===============
+ *
+ * Cache files and directories in a LRU cache.
+ *
+ * The directory cache caches directories and files(!). The main reason for having the cache
+ * is avoiding recursive walks up the path, querying the CNID database each time, when
+ * we have to calculate the location of eg directory with CNID 30, which is located in a dir with
+ * CNID 25, next CNID 20 and then CNID 2 (the volume root as per AFP spec).
+ * If all these dirs where in the cache, each database look up can be avoided. Additionally there's
+ * the element "fullpath" in struct dir, which is used to avoid the recursion in any case. Wheneveer
+ * a struct dir is initialized, the fullpath to the directory is stored there.
+ *
+ * In order to speed up the CNID query for files too, which eg happens when a directory is enumerated,
+ * files are stored too in the dircache. In order to differentiate between files and dirs, we set
+ * the flag DIRF_ISFILE in struct dir.d_flags for files.
+ *
+ * The most frequent codepatch that leads to caching is directory enumeration (cf enumerate.c):
+ * - if a element is a directory:
+ * (1) the cache is searched by dircache_search_by_name()
+ * (2) if it wasn't found a new struct dir is created and cached both from within dir_add()
+ * - for files the caching happens a little bit down the call chain:
+ * (3) first getfilparams() is called, which calls
+ * (4) getmetadata() where the cache is searched with dircache_search_by_name()
+ * (5) if the element is not found
+ * (6) get_id() queries the CNID from the database
+ * (7) then a struct dir is initialized via dir_new() (note the fullpath arg is NULL)
+ * (8) finally added to the cache with dircache_add()
+ * (2) of course does contain the steps 6,7 and 8.
+ *
+ * The dircache is a LRU cache, whenever it fills up we call dircache_evict internally which removes
+ * DIRCACHE_FREE_QUANTUM elements from the cache.
+ *
+ * There is only one cache for all volumes, so of course we use the volume id in hashing calculations.
+ *
+ * In order to avoid cache poisoning, we store the cached entries st_ctime from stat in
+ * struct dir.ctime_dircache. Later when we search the cache we compare the stored
+ * value with the result of a fresh stat. If the times differ, we remove the cached
+ * entry and return "no entry found in cache".
+ * A elements ctime changes when
+ * 1) the element is renamed
+ * (we loose the cached entry here, but it will expire when the cache fills)
+ * 2) its a directory and an object has been created therein
+ * 3) the element is deleted and recreated under the same name
+ * Using ctime leads to cache eviction in case 2) where it wouldn't be necessary, because
+ * the dir itself (name, CNID, ...) hasn't changed, but there's no other way.
+ *
+ * Indexes
+ * =======
+ *
* The maximum dircache size is:
* max(DEFAULT_MAX_DIRCACHE_SIZE, min(size, MAX_POSSIBLE_DIRCACHE_SIZE)).
* It is a hashtable which we use to store "struct dir"s in. If the cache get full, oldest
* entries are evicted in chunks of DIRCACHE_FREE.
+ *
* We have/need two indexes:
* - a DID/name index on the main dircache, another hashtable
* - a queue index on the dircache, for evicting the oldest entries
- * The cache supports locking of struct dir elements through the DIRF_CACHELOCK flag. A dir
- * locked this way wont ever be removed from the cache, so be careful.
+ *
+ * Debugging
+ * =========
+ *
+ * Sending SIGINT to a afpd child causes it to dump the dircache to a file "/tmp/dircache.PID".
*/
/********************************************************
********************************************************/
/*****************************
- * THE dircache */
+ * the dircache */
static hash_t *dircache; /* The actual cache */
static unsigned int dircache_maxsize; /* cache maximum size */
+static struct dircache_stat {
+ unsigned long long lookups;
+ unsigned long long hits;
+ unsigned long long misses;
+ unsigned long long added;
+ unsigned long long removed;
+ unsigned long long expunged;
+ unsigned long long evicted;
+} dircache_stat;
+
/* FNV 1a */
static hash_val_t hash_vid_did(const void *key)
{
/***************************
* queue index on dircache */
-static queue_t *index_queue; /* the index itself */
-static unsigned int queue_count;
-static const int dircache_free_quantum = 256; /* number of entries to free */
+static q_t *index_queue; /* the index itself */
+static unsigned long queue_count;
/*!
* @brief Remove a fixed number of (oldest) entries from the cache and indexes
* The default is to remove the 256 oldest entries from the cache.
* 1. Get the oldest entry
* 2. If it's in use ie open forks reference it or it's curdir requeue it,
- * or it's locked (from catsearch) dont remove it
+ * dont remove it
* 3. Remove the dir from the main cache and the didname index
* 4. Free the struct dir structure and all its members
*/
static void dircache_evict(void)
{
- int i = dircache_free_quantum;
+ int i = DIRCACHE_FREE_QUANTUM;
struct dir *dir;
LOG(log_debug, logtype_afpd, "dircache: {starting cache eviction}");
while (i--) {
if ((dir = (struct dir *)dequeue(index_queue)) == NULL) { /* 1 */
dircache_dump();
- exit(EXITERR_SYS);
+ AFP_PANIC("dircache_evict");
}
queue_count--;
- if (curdir == dir
- || dir->d_ofork
- || (dir->d_flags & DIRF_CACHELOCK)) { /* 2 */
+ if (curdir == dir) { /* 2 */
if ((dir->qidx_node = enqueue(index_queue, dir)) == NULL) {
dircache_dump();
- exit(EXITERR_SYS);
+ AFP_PANIC("dircache_evict");
}
queue_count++;
continue;
dir_free(dir); /* 4 */
}
- AFP_ASSERT(queue_count == dircache->hash_nodecount);
-
+ AFP_ASSERT(queue_count == dircache->hash_nodecount);
+ dircache_stat.evicted += DIRCACHE_FREE_QUANTUM;
LOG(log_debug, logtype_afpd, "dircache: {finished cache eviction}");
}
********************************************************/
/*!
- * @brief Search the dircache via a DID
+ * @brief Search the dircache via a CNID for a directory
*
- * @param vol (r) pointer to struct vol
- * @param did (r) CNID of the directory
+ * Found cache entries are expunged if both the parent directory st_ctime and the objects
+ * st_ctime are modified.
+ * This func builds on the fact, that all our code only ever needs to and does search
+ * the dircache by CNID expecting directories to be returned, but not files.
+ * Thus
+ * (1) if we find a file for a given CNID we
+ * (1a) remove it from the cache
+ * (1b) return NULL indicating nothing found
+ * (2) we can then use d_fullpath to stat the directory
*
- * @returns Pointer to struct dir if found, else NULL
+ * @param vol (r) pointer to struct vol
+ * @param cnid (r) CNID of the directory to search
+ *
+ * @returns Pointer to struct dir if found, else NULL
*/
-struct dir *dircache_search_by_did(const struct vol *vol, cnid_t did)
+struct dir *dircache_search_by_did(const struct vol *vol, cnid_t cnid)
{
struct dir *cdir = NULL;
struct dir key;
+ struct stat st;
hnode_t *hn;
- AFP_ASSERT(vol);
- AFP_ASSERT(ntohl(did) >= CNID_START);
+ AFP_ASSERT(vol);
+ AFP_ASSERT(ntohl(cnid) >= CNID_START);
+ dircache_stat.lookups++;
key.d_vid = vol->v_vid;
- key.d_did = did;
+ key.d_did = cnid;
if ((hn = hash_lookup(dircache, &key)))
cdir = hnode_get(hn);
- if (cdir)
- LOG(log_debug, logtype_afpd, "dircache(did:%u): {cached: path:'%s'}", ntohl(did), cfrombstring(cdir->d_fullpath));
- else
- LOG(log_debug, logtype_afpd, "dircache(did:%u): {not in cache}", ntohl(did));
+ if (cdir) {
+ if (cdir->d_flags & DIRF_ISFILE) { /* (1) */
+ LOG(log_debug, logtype_afpd, "dircache(cnid:%u): {not a directory:\"%s\"}",
+ ntohl(cnid), cfrombstr(cdir->d_u_name));
+ (void)dir_remove(vol, cdir); /* (1a) */
+ dircache_stat.expunged++;
+ return NULL; /* (1b) */
+ }
+ if (lstat(cfrombstr(cdir->d_fullpath), &st) != 0) {
+ LOG(log_debug, logtype_afpd, "dircache(cnid:%u): {missing:\"%s\"}",
+ ntohl(cnid), cfrombstr(cdir->d_fullpath));
+ (void)dir_remove(vol, cdir);
+ dircache_stat.expunged++;
+ return NULL;
+ }
+ if ((cdir->dcache_ctime != st.st_ctime) || (cdir->dcache_ino != st.st_ino)) {
+ LOG(log_debug, logtype_afpd, "dircache(cnid:%u): {modified:\"%s\"}",
+ ntohl(cnid), cfrombstr(cdir->d_u_name));
+ (void)dir_remove(vol, cdir);
+ dircache_stat.expunged++;
+ return NULL;
+ }
+ LOG(log_debug, logtype_afpd, "dircache(cnid:%u): {cached: path:\"%s\"}",
+ ntohl(cnid), cfrombstr(cdir->d_fullpath));
+ dircache_stat.hits++;
+ } else {
+ LOG(log_debug, logtype_afpd, "dircache(cnid:%u): {not in cache}", ntohl(cnid));
+ dircache_stat.misses++;
+ }
+
return cdir;
}
/*!
* @brief Search the cache via did/name hashtable
*
- * @param vol (r) volume
- * @param dir (r) directory
- * @param name (r) name (server side encoding)
- * @parma len (r) strlen of name
+ * Found cache entries are expunged if both the parent directory st_ctime and the objects
+ * st_ctime are modified.
+ *
+ * @param vol (r) volume
+ * @param dir (r) directory
+ * @param name (r) name (server side encoding)
+ * @parma len (r) strlen of name
*
* @returns pointer to struct dir if found in cache, else NULL
*/
-struct dir *dircache_search_by_name(const struct vol *vol, const struct dir *dir, char *name, int len)
+struct dir *dircache_search_by_name(const struct vol *vol,
+ const struct dir *dir,
+ char *name,
+ int len)
{
struct dir *cdir = NULL;
struct dir key;
+ struct stat st;
+
hnode_t *hn;
static_bstring uname = {-1, len, (unsigned char *)name};
- AFP_ASSERT(vol);
- AFP_ASSERT(dir);
- AFP_ASSERT(name);
- AFP_ASSERT(len == strlen(name));
- AFP_ASSERT(len < 256);
+ AFP_ASSERT(vol);
+ AFP_ASSERT(dir);
+ AFP_ASSERT(name);
+ AFP_ASSERT(len == strlen(name));
+ AFP_ASSERT(len < 256);
+
+ dircache_stat.lookups++;
+ LOG(log_debug, logtype_afpd, "dircache_search_by_name(did:%u, \"%s\")",
+ ntohl(dir->d_did), name);
if (dir->d_did != DIRDID_ROOT_PARENT) {
key.d_vid = vol->v_vid;
cdir = hnode_get(hn);
}
- if (cdir)
- LOG(log_debug, logtype_afpd, "dircache(pdid:%u, did:%u, '%s'): {found in cache}",
- ntohl(dir->d_did), ntohl(cdir->d_did), cfrombstring(cdir->d_fullpath));
- else
- LOG(log_debug, logtype_afpd, "dircache(pdid:%u,'%s/%s'): {not in cache}",
- ntohl(dir->d_did), cfrombstring(dir->d_fullpath), name);
+ if (cdir) {
+ if (lstat(cfrombstr(cdir->d_fullpath), &st) != 0) {
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,\"%s\"): {missing:\"%s\"}",
+ ntohl(dir->d_did), name, cfrombstr(cdir->d_fullpath));
+ (void)dir_remove(vol, cdir);
+ dircache_stat.expunged++;
+ return NULL;
+ }
+
+ /* Remove modified directories and files */
+ if ((cdir->dcache_ctime != st.st_ctime) || (cdir->dcache_ino != st.st_ino)) {
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,\"%s\"): {modified}",
+ ntohl(dir->d_did), name);
+ (void)dir_remove(vol, cdir);
+ dircache_stat.expunged++;
+ return NULL;
+ }
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,\"%s\"): {found in cache}",
+ ntohl(dir->d_did), name);
+ dircache_stat.hits++;
+ } else {
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,\"%s\"): {not in cache}",
+ ntohl(dir->d_did), name);
+ dircache_stat.misses++;
+ }
return cdir;
}
*
* @returns 0 on success, -1 on error which should result in an abort
*/
-int dircache_add(struct dir *dir)
+int dircache_add(const struct vol *vol,
+ struct dir *dir)
{
- AFP_ASSERT(dir);
- AFP_ASSERT(ntohl(dir->d_pdid) >= 2);
- AFP_ASSERT(ntohl(dir->d_did) >= CNID_START);
- AFP_ASSERT(dir->d_fullpath);
- AFP_ASSERT(dir->d_u_name);
- AFP_ASSERT(dir->d_vid);
- AFP_ASSERT(dircache->hash_nodecount <= dircache_maxsize);
+ struct dir *cdir = NULL;
+ struct dir key;
+ hnode_t *hn;
+
+ AFP_ASSERT(dir);
+ AFP_ASSERT(ntohl(dir->d_pdid) >= 2);
+ AFP_ASSERT(ntohl(dir->d_did) >= CNID_START);
+ AFP_ASSERT(dir->d_u_name);
+ AFP_ASSERT(dir->d_vid);
+ AFP_ASSERT(dircache->hash_nodecount <= dircache_maxsize);
/* Check if cache is full */
if (dircache->hash_nodecount == dircache_maxsize)
dircache_evict();
+ /*
+ * Make sure we don't add duplicates
+ */
+
+ /* Search primary cache by CNID */
+ key.d_vid = dir->d_vid;
+ key.d_did = dir->d_did;
+ if ((hn = hash_lookup(dircache, &key))) {
+ /* Found an entry with the same CNID, delete it */
+ dir_remove(vol, hnode_get(hn));
+ dircache_stat.expunged++;
+ }
+ key.d_vid = vol->v_vid;
+ key.d_pdid = dir->d_did;
+ key.d_u_name = dir->d_u_name;
+ if ((hn = hash_lookup(index_didname, &key))) {
+ /* Found an entry with the same DID/name, delete it */
+ dir_remove(vol, hnode_get(hn));
+ dircache_stat.expunged++;
+ }
+
/* Add it to the main dircache */
if (hash_alloc_insert(dircache, dir, dir) == 0) {
dircache_dump();
queue_count++;
}
- LOG(log_debug, logtype_afpd, "dircache(did:%u,'%s'): {added}", ntohl(dir->d_did), cfrombstring(dir->d_fullpath));
+ dircache_stat.added++;
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,'%s'): {added}",
+ ntohl(dir->d_did), cfrombstr(dir->d_u_name));
AFP_ASSERT(queue_count == index_didname->hash_nodecount
&& queue_count == dircache->hash_nodecount);
{
hnode_t *hn;
- AFP_ASSERT(dir);
- AFP_ASSERT((flags & ~(QUEUE_INDEX | DIDNAME_INDEX | DIRCACHE)) == 0);
-
- if (dir->d_flags & DIRF_CACHELOCK)
- return;
+ AFP_ASSERT(dir);
+ AFP_ASSERT((flags & ~(QUEUE_INDEX | DIDNAME_INDEX | DIRCACHE)) == 0);
if (flags & QUEUE_INDEX) {
/* remove it from the queue index */
if (flags & DIDNAME_INDEX) {
if ((hn = hash_lookup(index_didname, dir)) == NULL) {
- LOG(log_error, logtype_default, "dircache_remove(%u,%s): not in didname index",
- ntohl(dir->d_did), cfrombstring(dir->d_fullpath));
+ LOG(log_error, logtype_afpd, "dircache_remove(%u,\"%s\"): not in didname index",
+ ntohl(dir->d_did), cfrombstr(dir->d_u_name));
dircache_dump();
- exit(EXITERR_SYS);
+ AFP_PANIC("dircache_remove");
}
- hash_delete(index_didname, hn);
+ hash_delete_free(index_didname, hn);
}
if (flags & DIRCACHE) {
if ((hn = hash_lookup(dircache, dir)) == NULL) {
- LOG(log_error, logtype_default, "dircache_remove(%u,%s): not in dircache",
- ntohl(dir->d_did), cfrombstring(dir->d_fullpath));
+ LOG(log_error, logtype_afpd, "dircache_remove(%u,\"%s\"): not in dircache",
+ ntohl(dir->d_did), cfrombstr(dir->d_u_name));
dircache_dump();
- exit(EXITERR_SYS);
+ AFP_PANIC("dircache_remove");
}
- hash_delete(dircache, hn);
+ hash_delete_free(dircache, hn);
}
- LOG(log_debug, logtype_afpd, "dircache(did:%u,'%s'): {removed}", ntohl(dir->d_did), cfrombstring(dir->d_fullpath));
+ LOG(log_debug, logtype_afpd, "dircache(did:%u,\"%s\"): {removed}",
+ ntohl(dir->d_did), cfrombstr(dir->d_u_name));
- AFP_ASSERT(queue_count == index_didname->hash_nodecount
- && queue_count == dircache->hash_nodecount);
+ dircache_stat.removed++;
+ AFP_ASSERT(queue_count == index_didname->hash_nodecount
+ && queue_count == dircache->hash_nodecount);
}
/*!
else
queue_count = 0;
+ /* Initialize index queue */
+ if ((invalid_dircache_entries = queue_init()) == NULL)
+ return -1;
+
/* As long as directory.c hasn't got its own initializer call, we do it for it */
rootParent.d_did = DIRDID_ROOT_PARENT;
rootParent.d_fullpath = bfromcstr("ROOT_PARENT");
return 0;
}
+/*!
+ * Log dircache statistics
+ */
+void log_dircache_stat(void)
+{
+ LOG(log_info, logtype_afpd, "dircache statistics: "
+ "entries: %lu, lookups: %llu, hits: %llu, misses: %llu, added: %llu, removed: %llu, expunged: %llu, evicted: %llu",
+ queue_count,
+ dircache_stat.lookups,
+ dircache_stat.hits,
+ dircache_stat.misses,
+ dircache_stat.added,
+ dircache_stat.removed,
+ dircache_stat.expunged,
+ dircache_stat.evicted);
+}
+
/*!
* @brief Dump dircache to /tmp/dircache.PID
*/
char tmpnam[64];
FILE *dump;
qnode_t *n = index_queue->next;
+ hnode_t *hn;
+ hscan_t hs;
const struct dir *dir;
+ int i;
LOG(log_warning, logtype_afpd, "Dumping directory cache...");
}
setbuf(dump, NULL);
- fprintf(dump, "Number of cache entries: %u\n", queue_count);
- fprintf(dump, "Configured maximum cache size: %u\n", dircache_maxsize);
- fprintf(dump, "==================================================\n\n");
+ fprintf(dump, "Number of cache entries in LRU queue: %lu\n", queue_count);
+ fprintf(dump, "Configured maximum cache size: %u\n\n", dircache_maxsize);
+
+ fprintf(dump, "Primary CNID index:\n");
+ fprintf(dump, " VID DID CNID STAT PATH\n");
+ fprintf(dump, "====================================================================\n");
+ hash_scan_begin(&hs, dircache);
+ i = 1;
+ while ((hn = hash_scan_next(&hs))) {
+ dir = hnode_get(hn);
+ fprintf(dump, "%05u: %3u %6u %6u %s %s\n",
+ i++,
+ ntohs(dir->d_vid),
+ ntohl(dir->d_pdid),
+ ntohl(dir->d_did),
+ dir->d_flags & DIRF_ISFILE ? "f" : "d",
+ cfrombstr(dir->d_fullpath));
+ }
+
+ fprintf(dump, "\nSecondary DID/name index:\n");
+ fprintf(dump, " VID DID CNID STAT PATH\n");
+ fprintf(dump, "====================================================================\n");
+ hash_scan_begin(&hs, index_didname);
+ i = 1;
+ while ((hn = hash_scan_next(&hs))) {
+ dir = hnode_get(hn);
+ fprintf(dump, "%05u: %3u %6u %6u %s %s\n",
+ i++,
+ ntohs(dir->d_vid),
+ ntohl(dir->d_pdid),
+ ntohl(dir->d_did),
+ dir->d_flags & DIRF_ISFILE ? "f" : "d",
+ cfrombstr(dir->d_fullpath));
+ }
+
+ fprintf(dump, "\nLRU Queue:\n");
+ fprintf(dump, " VID DID CNID STAT PATH\n");
+ fprintf(dump, "====================================================================\n");
- for (int i = 1; i <= queue_count; i++) {
+ for (i = 1; i <= queue_count; i++) {
if (n == index_queue)
break;
dir = (struct dir *)n->data;
- fprintf(dump, "%05u: vid:%u, pdid:%6u, did:%6u, path:%s, locked:%3s, oforks:%s\n",
- i, ntohs(dir->d_vid), ntohl(dir->d_pdid), ntohl(dir->d_did), cfrombstring(dir->d_fullpath),
- (dir->d_flags & DIRF_CACHELOCK) ? "yes" : "no",
- dir->d_ofork ? "yes" : "no");
+ fprintf(dump, "%05u: %3u %6u %6u %s %s\n",
+ i,
+ ntohs(dir->d_vid),
+ ntohl(dir->d_pdid),
+ ntohl(dir->d_did),
+ dir->d_flags & DIRF_ISFILE ? "f" : "d",
+ cfrombstr(dir->d_fullpath));
n = n->next;
}
fprintf(dump, "\n");
+ fflush(dump);
return;
}