From: Rob Browning Date: Sun, 20 Oct 2019 17:49:14 +0000 (-0500) Subject: Move pwd grp functions to pwdgrp module; require binary fields X-Git-Tag: 0.31~247 X-Git-Url: https://arthur.barton.de/cgi-bin/gitweb.cgi?p=bup.git;a=commitdiff_plain;h=e861537c86a6b25b39a093342408d0e222f32afc Move pwd grp functions to pwdgrp module; require binary fields Move the pwd and grp related functions in helpers to a new more focused pwdgrp module and adjust them (for py3) to require and provide binary (bytes) values for the appropriate fields: name, passwd, etc. These fields are arbitrary bytes with no encoding information on platforms like Linux and *BSD, so must be handled that way. Create our own Passwd and Group classes to carry the data, and make them __slots__ based since they're quite a lot smaller, even than namedtuples: https://justmachinelearning.com/2019/07/29/python-consumes-a-lot-of-memory-or-how-to-reduce-the-size-of-objects/ See the DESIGN section on Python 3 strings for additional information. Signed-off-by: Rob Browning Tested-by: Rob Browning --- diff --git a/cmd/get-cmd.py b/cmd/get-cmd.py index efeb78f..f5fe01a 100755 --- a/cmd/get-cmd.py +++ b/cmd/get-cmd.py @@ -15,7 +15,8 @@ from bup import git, client, helpers, vfs from bup.compat import wrap_main from bup.git import get_cat_data, parse_commit, walk_object from bup.helpers import add_error, debug1, handle_ctrl_c, log, saved_errors -from bup.helpers import hostname, shstr, tty_width, userfullname, username +from bup.helpers import hostname, shstr, tty_width +from bup.pwdgrp import userfullname, username from bup.repo import LocalRepo, RemoteRepo argspec = ( diff --git a/cmd/save-cmd.py b/cmd/save-cmd.py index e78796b..04cf55d 100755 --- a/cmd/save-cmd.py +++ b/cmd/save-cmd.py @@ -16,7 +16,8 @@ from bup.helpers import (add_error, grafted_path_components, handle_ctrl_c, hostname, istty2, log, parse_date_or_fatal, parse_num, path_components, progress, qprogress, resolve_parent, saved_errors, stripped_path_components, - userfullname, username, valid_save_name) + valid_save_name) +from bup.pwdgrp import userfullname, username optspec = """ diff --git a/cmd/split-cmd.py b/cmd/split-cmd.py index 500fb19..31950ec 100755 --- a/cmd/split-cmd.py +++ b/cmd/split-cmd.py @@ -11,8 +11,9 @@ import os, sys, time from bup import hashsplit, git, options, client from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, qprogress, reprogress, saved_errors, - userfullname, username, valid_save_name, + valid_save_name, parse_date_or_fatal) +from bup.pwdgrp import userfullname, username optspec = """ diff --git a/lib/bup/git.py b/lib/bup/git.py index ff48da7..13913a5 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -19,8 +19,9 @@ from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, mmap_read, mmap_readwrite, parse_num, progress, qprogress, shstr, stat_if_exists, - unlink, username, userfullname, + unlink, utc_offset_str) +from bup.pwdgrp import username, userfullname verbose = 0 ignore_midx = 0 diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py index d66b5f1..f6b71cd 100644 --- a/lib/bup/helpers.py +++ b/lib/bup/helpers.py @@ -367,7 +367,7 @@ else: return os.geteuid() == 0 -def _cache_key_value(get_value, key, cache): +def cache_key_value(get_value, key, cache): """Return (value, was_cached). If there is a value in the cache for key, use that, otherwise, call get_value(key) which should throw a KeyError if there is no value -- in which case the cached @@ -386,80 +386,6 @@ def _cache_key_value(get_value, key, cache): return value, False -_uid_to_pwd_cache = {} -_name_to_pwd_cache = {} - -def pwd_from_uid(uid): - """Return password database entry for uid (may be a cached value). - Return None if no entry is found. - """ - global _uid_to_pwd_cache, _name_to_pwd_cache - entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache) - if entry and not cached: - _name_to_pwd_cache[entry.pw_name] = entry - return entry - - -def pwd_from_name(name): - """Return password database entry for name (may be a cached value). - Return None if no entry is found. - """ - global _uid_to_pwd_cache, _name_to_pwd_cache - entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache) - if entry and not cached: - _uid_to_pwd_cache[entry.pw_uid] = entry - return entry - - -_gid_to_grp_cache = {} -_name_to_grp_cache = {} - -def grp_from_gid(gid): - """Return password database entry for gid (may be a cached value). - Return None if no entry is found. - """ - global _gid_to_grp_cache, _name_to_grp_cache - entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache) - if entry and not cached: - _name_to_grp_cache[entry.gr_name] = entry - return entry - - -def grp_from_name(name): - """Return password database entry for name (may be a cached value). - Return None if no entry is found. - """ - global _gid_to_grp_cache, _name_to_grp_cache - entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache) - if entry and not cached: - _gid_to_grp_cache[entry.gr_gid] = entry - return entry - - -_username = None -def username(): - """Get the user's login name.""" - global _username - if not _username: - uid = os.getuid() - _username = pwd_from_uid(uid).pw_name or b'user%d' % uid - return _username - - -_userfullname = None -def userfullname(): - """Get the user's full name.""" - global _userfullname - if not _userfullname: - uid = os.getuid() - entry = pwd_from_uid(uid) - if entry: - _userfullname = entry.pw_gecos.split(b',')[0] or entry.pw_name - if not _userfullname: - _userfullname = b'user%d' % uid - return _userfullname - - _hostname = None def hostname(): """Get the FQDN of this machine.""" diff --git a/lib/bup/metadata.py b/lib/bup/metadata.py index 2f3ee06..84b0f2a 100644 --- a/lib/bup/metadata.py +++ b/lib/bup/metadata.py @@ -15,7 +15,7 @@ import errno, os, sys, stat, time, pwd, grp, socket, struct from bup import vint, xstat from bup.drecurse import recursive_dirlist from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize -from bup.helpers import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name +from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name from bup.xstat import utime, lutime xattr = None diff --git a/lib/bup/pwdgrp.py b/lib/bup/pwdgrp.py new file mode 100644 index 0000000..89912ba --- /dev/null +++ b/lib/bup/pwdgrp.py @@ -0,0 +1,141 @@ + +from __future__ import absolute_import, print_function +import os, pwd, grp + +from bup import compat # to force the LC_CTYPE check +from bup.compat import py_maj +from bup.helpers import cache_key_value + + +# Using __slots__ makes these much smaller (even than a namedtuple) + +class Passwd: + """Drop in replacement for pwd's structure with bytes instead of strings.""" + __slots__ = ('pw_name', 'pw_passwd', 'pw_uid', 'pw_gid', 'pw_gecos', + 'pw_dir', 'pw_shell') + def __init__(self, name, passwd, uid, gid, gecos, dir, shell): + assert type(name) == bytes + assert type(passwd) == bytes + assert type(gecos) == bytes + assert type(dir) == bytes + assert type(shell) == bytes + (self.pw_name, self.pw_passwd, self.pw_uid, self.pw_gid, + self.pw_gecos, self.pw_dir, self.pw_shell) = \ + name, passwd, uid, gid, gecos, dir, shell + +def _passwd_from_py(py): + if py_maj < 3: + return py + return Passwd(py.pw_name.encode('iso-8859-1'), + py.pw_passwd.encode("iso-8859-1"), + py.pw_uid, py.pw_gid, + py.pw_gecos.encode('iso-8859-1'), + py.pw_dir.encode('iso-8859-1'), + py.pw_shell.encode('iso-8859-1')) + +def getpwuid(uid): + return _passwd_from_py(pwd.getpwuid(uid)) + +def getpwnam(name): + return _passwd_from_py(pwd.getpwnam(name)) + + +class Group: + """Drop in replacement for grp's structure with bytes instead of strings.""" + __slots__ = 'gr_name', 'gr_passwd', 'gr_gid', 'gr_mem' + def __init__(self, name, passwd, gid, mem): + assert type(name) == bytes + assert type(passwd) == bytes + for m in mem: + assert type(m) == bytes + self.gr_name, self.gr_passwd, self.gr_gid, self.gr_mem = \ + name, passwd, gid, mem + +def _group_from_py(py): + if py_maj < 3: + return py + return Group(py.gr_name.encode('iso-8859-1'), + py.gr_passwd.encode('iso-8859-1'), + py.gr_gid, + tuple(x.encode('iso-8859-1') for x in py.gr_mem)) + +def getgrgid(uid): + return _group_from_py(pwd.getgrgid(uid)) + +def getgrnam(name): + return _group_from_py(pwd.getgrnam(name)) + + +_uid_to_pwd_cache = {} +_name_to_pwd_cache = {} + +def pwd_from_uid(uid): + """Return password database entry for uid (may be a cached value). + Return None if no entry is found. + """ + global _uid_to_pwd_cache, _name_to_pwd_cache + entry, cached = cache_key_value(getpwuid, uid, _uid_to_pwd_cache) + if entry and not cached: + _name_to_pwd_cache[entry.pw_name] = entry + return entry + +def pwd_from_name(name): + """Return password database entry for name (may be a cached value). + Return None if no entry is found. + """ + assert type(name) == bytes + global _uid_to_pwd_cache, _name_to_pwd_cache + entry, cached = cache_key_value(getpwnam, name, _name_to_pwd_cache) + if entry and not cached: + _uid_to_pwd_cache[entry.pw_uid] = entry + return entry + + +_gid_to_grp_cache = {} +_name_to_grp_cache = {} + +def grp_from_gid(gid): + """Return password database entry for gid (may be a cached value). + Return None if no entry is found. + """ + global _gid_to_grp_cache, _name_to_grp_cache + entry, cached = cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache) + if entry and not cached: + _name_to_grp_cache[entry.gr_name] = entry + return entry + + +def grp_from_name(name): + """Return password database entry for name (may be a cached value). + Return None if no entry is found. + """ + assert type(name) == bytes + global _gid_to_grp_cache, _name_to_grp_cache + entry, cached = cache_key_value(grp.getgrnam, name, _name_to_grp_cache) + if entry and not cached: + _gid_to_grp_cache[entry.gr_gid] = entry + return entry + + +_username = None +def username(): + """Get the user's login name.""" + global _username + if not _username: + uid = os.getuid() + _username = pwd_from_uid(uid).pw_name or b'user%d' % uid + return _username + + +_userfullname = None +def userfullname(): + """Get the user's full name.""" + global _userfullname + if not _userfullname: + uid = os.getuid() + entry = pwd_from_uid(uid) + if entry: + _userfullname = entry.pw_gecos.split(b',')[0] or entry.pw_name + if not _userfullname: + _userfullname = b'user%d' % uid + return _userfullname