]> arthur.barton.de Git - bup.git/commitdiff
Cache all password (pwd) and group (grp) database lookups.
authorRob Browning <rlb@defaultvalue.org>
Sun, 23 Sep 2012 23:26:48 +0000 (18:26 -0500)
committerRob Browning <rlb@defaultvalue.org>
Sun, 3 Feb 2013 02:59:55 +0000 (20:59 -0600)
Thanks to Jann Horn <jannhorn@googlemail.com> for determining that
even just caching user names and group names could provide a notable
performance improvement in some cases.

Signed-off-by: Rob Browning <rlb@defaultvalue.org>
Reviewed-by: Zoran Zaric <zz@zoranzaric.de>
lib/bup/helpers.py
lib/bup/metadata.py

index 8b9d0d2f61cc24662112061bf11d86ae8ab28b0d..fad48faa5f5f8db577f3920751a06977a86da218 100644 (file)
@@ -1,7 +1,7 @@
 """Helper functions and classes for bup."""
 
 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
-import heapq, operator, time, platform
+import heapq, operator, time, platform, grp
 from bup import _version, _helpers
 import bup._helpers as _helpers
 
@@ -211,16 +211,82 @@ def is_superuser():
         return os.geteuid() == 0
 
 
+def _cache_key_value(get_value, key, cache):
+    """Return (value, was_cached).  If there is a value in the cache
+    for key, use that, otherwise, call get_value(key) which should
+    throw a KeyError if there is no value -- in which case the cached
+    and returned value will be None.
+    """
+    try: # Do we already have it (or know there wasn't one)?
+        value = cache[key]
+        return value, True
+    except KeyError:
+        pass
+    value = None
+    try:
+        cache[key] = value = get_value(key)
+    except KeyError:
+        cache[key] = None
+    return value, False
+
+
+_uid_to_pwd_cache = {}
+_name_to_pwd_cache = {}
+
+def pwd_from_uid(uid):
+    """Return password database entry for uid (may be a cached value).
+    Return None if no entry is found.
+    """
+    global _uid_to_pwd_cache, _name_to_pwd_cache
+    entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
+    if entry and not cached:
+        _name_to_pwd_cache[entry.pw_name] = entry
+    return entry
+
+
+def pwd_from_name(name):
+    """Return password database entry for name (may be a cached value).
+    Return None if no entry is found.
+    """
+    global _uid_to_pwd_cache, _name_to_pwd_cache
+    entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
+    if entry and not cached:
+        _uid_to_pwd_cache[entry.pw_uid] = entry
+    return entry
+
+
+_gid_to_grp_cache = {}
+_name_to_grp_cache = {}
+
+def grp_from_gid(gid):
+    """Return password database entry for gid (may be a cached value).
+    Return None if no entry is found.
+    """
+    global _gid_to_grp_cache, _name_to_grp_cache
+    entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
+    if entry and not cached:
+        _name_to_grp_cache[entry.gr_name] = entry
+    return entry
+
+
+def grp_from_name(name):
+    """Return password database entry for name (may be a cached value).
+    Return None if no entry is found.
+    """
+    global _gid_to_grp_cache, _name_to_grp_cache
+    entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
+    if entry and not cached:
+        _gid_to_grp_cache[entry.gr_gid] = entry
+    return entry
+
+
 _username = None
 def username():
     """Get the user's login name."""
     global _username
     if not _username:
         uid = os.getuid()
-        try:
-            _username = pwd.getpwuid(uid)[0]
-        except KeyError:
-            _username = 'user%d' % uid
+        _username = pwd_from_uid(uid)[0] or 'user%d' % uid
     return _username
 
 
@@ -230,14 +296,11 @@ def userfullname():
     global _userfullname
     if not _userfullname:
         uid = os.getuid()
-        try:
-            entry = pwd.getpwuid(uid)
+        entry = pwd_from_uid(uid)
+        if entry:
             _userfullname = entry[4].split(',')[0] or entry[0]
-        except KeyError:
-            pass
-        finally:
-            if not _userfullname:
-              _userfullname = 'user%d' % uid
+        if not _userfullname:
+            _userfullname = 'user%d' % uid
     return _userfullname
 
 
index 5ffddd4b2bf11623365f71cdfa09a3f1aa8795ef..dfb7438b8a8efc467d3dd3969ce6ffa45795691e 100644 (file)
@@ -9,6 +9,7 @@ from cStringIO import StringIO
 from bup import vint, xstat
 from bup.drecurse import recursive_dirlist
 from bup.helpers import add_error, mkdirp, log, is_superuser
+from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name
 from bup.xstat import utime, lutime
 
 try:
@@ -199,16 +200,12 @@ class Metadata:
         self.mtime = st.st_mtime
         self.ctime = st.st_ctime
         self.user = self.group = ''
-        # FIXME: should we be caching id -> user/group name mappings?
-        # IIRC, tar uses some trick -- possibly caching the last pair.
-        try:
-            self.user = pwd.getpwuid(st.st_uid)[0]
-        except KeyError, e:
-            pass
-        try:
-            self.group = grp.getgrgid(st.st_gid)[0]
-        except KeyError, e:
-            pass
+        entry = pwd_from_uid(st.st_uid)
+        if entry:
+            self.user = entry.pw_name
+        entry = grp_from_gid(st.st_gid)
+        if entry:
+            self.group = entry.gr_name
         self.mode = st.st_mode
 
     def _same_common(self, other):
@@ -361,24 +358,22 @@ class Metadata:
             gid = self.gid
             if not restore_numeric_ids:
                 if self.uid != 0 and self.user:
-                    try:
-                        uid = pwd.getpwnam(self.user)[2]
-                    except KeyError:
-                        pass # Fall back to self.uid.
+                    entry = pwd_from_name(self.user)
+                    if entry:
+                        uid = entry.pw_uid
                 if self.gid != 0 and self.group:
-                    try:
-                        gid = grp.getgrnam(self.group)[2]
-                    except KeyError:
-                        pass # Fall back to self.gid.
+                    entry = grp_from_name(self.group)
+                    if entry:
+                        gid = entry.gr_gid
         else: # not superuser - only consider changing the group/gid
             user_gids = os.getgroups()
             if self.gid in user_gids:
                 gid = self.gid
             if not restore_numeric_ids and \
                     self.gid != 0 and \
-                    self.group in [grp.getgrgid(x)[0] for x in user_gids]:
+                    self.group in [grp_from_gid(x).gr_name for x in user_gids]:
                 try:
-                    gid = grp.getgrnam(self.group)[2]
+                    gid = grp_from_name(self.group).gr_gid
                 except KeyError:
                     pass # Fall back to gid.