In the process of working on Hunter I have found many strange things from merely trying to do a repr on objects that are passed around. Code blowing up with an exception is the least of your concerns. Take a look at this:
class lazy(object):
def __init__(self, fun, *args, **kwargs):
self._fun = fun
self._args = args
self._kwargs = kwargs
def __call__(self):
return self.evaluate()
def evaluate(self):
return self._fun(*self._args, **self._kwargs)
def __repr__(self):
return repr(self())
Simply doing a repr on that will change the flow of the program, exactly what you don't want a debugging tool to do!
So then I tried something like:
def rudimentary_repr(obj):
if isinstance(obj, dict):
...
elif isinstance(obj, list):
...
elif ... # goes on for a while
...
else:
# give the not very useful
# '<Something object at 0x123>'
return object.__repr__(obj)
Add a simple depth check to deal with deep or infinite recursion and you're good right? I went for a simple depth check instead of pprint's recursion checker (that stores id of objects):
def rudimentary_repr(obj, maxdepth=5):
if not maxdepth:
return '...'
newdepth = maxdepth - 1
# then pass around newdepth, easy-peasy
At this point I thought the only real problem was how to reduce the number of branches and figure out on which objects it's safe to call repr (to avoid reimplementing __repr__ of everything interesting).
Then I added this, hoping this would save me lots of typing:
elif not hasattr(obj, '__dict__'):
return repr(obj)
No __dict__ doesn't necessarily mean no state, but I hoped no one with do crummy stuff in __repr__ if they have an dict-less object.
But then I found this little fella:
class ApiModule(ModuleType):
@property
def __dict__(self):
# force all the content of the module
# to be loaded when __dict__ is read
...
And doubled down in the terrible idea of checking for a __dict__ (instead of hasattr(obj, '__dict__') I'd use hasdict(type(obj))):
def hasdict(obj_type, obj, tolerance=25):
"""
A contrived mess to check that object
doesn't have a __dit__ but avoid checking
it if any ancestor is evil enough to
explicitly define __dict__
"""
ancestor_types = deque()
while obj_type is not type and tolerance:
ancestor_types.appendleft(obj_type)
obj_type = type(obj_type)
tolerance -= 1
for ancestor in ancestor_types:
__dict__ = getattr(ancestor, '__dict__', None)
if __dict__ is not None:
if '__dict__' in __dict__:
return True
return hasattr(obj, '__dict__')
I used that for a while until I came to the sad realization that you can't really trust anything. Behold:
class LazyObject(object):
# Need to pretend to be the wrapped class, for the sake of objects that
# care about this (especially in equality tests)
__class__ = property(new_method_proxy(operator.attrgetter("__class__")))
What exactly is going on there? A simplified example to illustrate the problem:
>>> class Surprise(object):
... @property
... def __class__(self):
... print('Boom!')
...
>>> p = Surprise()
>>> isinstance(p, dict)
Boom!
False
At this point it became clear that the hasdict idea wasn't going to fly for long so I ripped that out as well.
New plan:
- Don't bother showing details for subclasses of builtin types (like dict, list etc). Subclasses could do any of the crazy things shown above.
- Use type instead of isinstance. For example: to check if it's a Exception instance just check if BaseException is in type's MRO. As I'm typing this I realise someone could stick a descriptor into the args attribute, damn it. Perhaps getattr_static would solve it.
- Use repr only on objects deemed to have a safe builtin type. Start with builtins, io, socket, _socket.
What I got now:
def safe_repr(obj, maxdepth=5):
if not maxdepth:
return '...'
obj_type = type(obj)
obj_type_type = type(obj_type)
newdepth = maxdepth - 1
# only represent exact builtins
# (subclasses can have side-effects due to __class__ being
# a property, __instancecheck__, __subclasscheck__ etc)
if obj_type is dict:
return '{%s}' % ', '.join('%s: %s' % (
safe_repr(k, maxdepth),
safe_repr(v, newdepth)
) for k, v in obj.items())
elif obj_type is list:
return '[%s]' % ', '.join(
safe_repr(i, newdepth) for i in obj
)
elif obj_type is tuple:
return '(%s%s)' % (
', '.join(safe_repr(i, newdepth) for i in obj),
',' if len(obj) == 1 else ''
)
elif obj_type is set:
return '{%s}' % ', '.join(
safe_repr(i, newdepth) for i in obj
)
elif obj_type is frozenset:
return '%s({%s})' % (
obj_type.__name__,
', '.join(safe_repr(i, newdepth) for i in obj)
)
elif obj_type is deque:
return '%s([%s])' % (
obj_type.__name__,
', '.join(safe_repr(i, newdepth) for i in obj)
)
elif obj_type in (Counter, OrderedDict, defaultdict):
return '%s({%s})' % (
obj_type.__name__,
', '.join('%s: %s' % (
safe_repr(k, maxdepth),
safe_repr(v, newdepth)
) for k, v in obj.items())
)
elif obj_type is types.MethodType: # noqa
self = obj.__self__
name = getattr(obj, '__qualname__', None)
if name is None:
name = obj.__name__
return '<%sbound method %s of %s>' % (
'un' if self is None else '',
name,
safe_repr(self, newdepth)
)
elif obj_type_type is type and BaseException in obj_type.__mro__:
return '%s(%s)' % (
obj_type.__name__,
', '.join(safe_repr(i, newdepth) for i in obj.args)
)
elif obj_type_type is type and \
obj_type is not InstanceType and \
obj_type.__module__ in (builtins.__name__, 'io', 'socket', '_socket'):
# hardcoded list of safe things. note that isinstance ain't used
# (and we don't trust subclasses to do the right thing in __repr__)
return repr(obj)
else:
return object.__repr__(obj)
The problematic code examples are taken out of popular projects like Celery, Pytest and Django but I don't think it matters who does it. What do you think?