528 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			528 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
| """Bag class definitions."""
 | |
| import heapq
 | |
| from operator import itemgetter
 | |
| from collections import Set, MutableSet, Hashable
 | |
| 
 | |
| from . import _compat
 | |
| 
 | |
| 
 | |
| class _basebag(Set):
 | |
| 	"""Base class for bag classes.
 | |
| 
 | |
| 	Base class for bag and frozenbag.	Is not mutable and not hashable, so there's
 | |
| 	no reason to use this instead of either bag or frozenbag.
 | |
| 	"""
 | |
| 
 | |
| 	# Basic object methods
 | |
| 
 | |
| 	def __init__(self, iterable=None):
 | |
| 		"""Create a new basebag.
 | |
| 
 | |
| 		If iterable isn't given, is None or is empty then the bag starts empty.
 | |
| 		Otherwise each element from iterable will be added to the bag
 | |
| 		however many times it appears.
 | |
| 
 | |
| 		This runs in O(len(iterable))
 | |
| 		"""
 | |
| 		self._dict = dict()
 | |
| 		self._size = 0
 | |
| 		if iterable:
 | |
| 			if isinstance(iterable, _basebag):
 | |
| 				for elem, count in iterable._dict.items():
 | |
| 					self._dict[elem] = count
 | |
| 					self._size += count
 | |
| 			else:
 | |
| 				for value in iterable:
 | |
| 					self._dict[value] = self._dict.get(value, 0) + 1
 | |
| 					self._size += 1
 | |
| 
 | |
| 	def __repr__(self):
 | |
| 		if self._size == 0:
 | |
| 			return '{0}()'.format(self.__class__.__name__)
 | |
| 		else:
 | |
| 			repr_format = '{class_name}({values!r})'
 | |
| 			return repr_format.format(
 | |
| 				class_name=self.__class__.__name__,
 | |
| 				values=tuple(self),
 | |
| 				)
 | |
| 
 | |
| 	def __str__(self):
 | |
| 		if self._size == 0:
 | |
| 			return '{class_name}()'.format(class_name=self.__class__.__name__)
 | |
| 		else:
 | |
| 			format_single = '{elem!r}'
 | |
| 			format_mult = '{elem!r}^{mult}'
 | |
| 			strings = []
 | |
| 			for elem, mult in self._dict.items():
 | |
| 				if mult > 1:
 | |
| 					strings.append(format_mult.format(elem=elem, mult=mult))
 | |
| 				else:
 | |
| 					strings.append(format_single.format(elem=elem))
 | |
| 			return '{%s}' % ', '.join(strings)
 | |
| 
 | |
| 	# New public methods (not overriding/implementing anything)
 | |
| 
 | |
| 	def num_unique_elements(self):
 | |
| 		"""Return the number of unique elements.
 | |
| 
 | |
| 		This runs in O(1) time
 | |
| 		"""
 | |
| 		return len(self._dict)
 | |
| 
 | |
| 	def unique_elements(self):
 | |
| 		"""Return a view of unique elements in this bag.
 | |
| 
 | |
| 		In Python 3:
 | |
| 			This runs in O(1) time and returns a view of the unique elements
 | |
| 		In Python 2:
 | |
| 			This runs in O(n) and returns set of the current elements.
 | |
| 		"""
 | |
| 		return _compat.keys_set(self._dict)
 | |
| 
 | |
| 	def count(self, value):
 | |
| 		"""Return the number of value present in this bag.
 | |
| 
 | |
| 		If value is not in the bag no Error is raised, instead 0 is returned.
 | |
| 
 | |
| 		This runs in O(1) time
 | |
| 
 | |
| 		Args:
 | |
| 			value: The element of self to get the count of
 | |
| 		Returns:
 | |
| 			int: The count of value in self
 | |
| 		"""
 | |
| 		return self._dict.get(value, 0)
 | |
| 
 | |
| 	def nlargest(self, n=None):
 | |
| 		"""List the n most common elements and their counts.
 | |
| 
 | |
| 		List is from the most
 | |
| 		common to the least.  If n is None, the list all element counts.
 | |
| 
 | |
| 		Run time should be O(m log m) where m is len(self)
 | |
| 		Args:
 | |
| 			n (int): The number of elements to return
 | |
| 		"""
 | |
| 		if n is None:
 | |
| 			return sorted(self._dict.items(), key=itemgetter(1), reverse=True)
 | |
| 		else:
 | |
| 			return heapq.nlargest(n, self._dict.items(), key=itemgetter(1))
 | |
| 
 | |
| 	@classmethod
 | |
| 	def _from_iterable(cls, it):
 | |
| 		return cls(it)
 | |
| 
 | |
| 	@classmethod
 | |
| 	def from_mapping(cls, mapping):
 | |
| 		"""Create a bag from a dict of elem->count.
 | |
| 
 | |
| 		Each key in the dict is added if the value is > 0.
 | |
| 		"""
 | |
| 		out = cls()
 | |
| 		for elem, count in mapping.items():
 | |
| 			if count > 0:
 | |
| 				out._dict[elem] = count
 | |
| 				out._size += count
 | |
| 		return out
 | |
| 
 | |
| 	def copy(self):
 | |
| 		"""Create a shallow copy of self.
 | |
| 
 | |
| 		This runs in O(len(self.num_unique_elements()))
 | |
| 		"""
 | |
| 		return self.from_mapping(self._dict)
 | |
| 
 | |
| 	# implementing Sized methods
 | |
| 
 | |
| 	def __len__(self):
 | |
| 		"""Return the cardinality of the bag.
 | |
| 
 | |
| 		This runs in O(1)
 | |
| 		"""
 | |
| 		return self._size
 | |
| 
 | |
| 	# implementing Container methods
 | |
| 
 | |
| 	def __contains__(self, value):
 | |
| 		"""Return the multiplicity of the element.
 | |
| 
 | |
| 		This runs in O(1)
 | |
| 		"""
 | |
| 		return self._dict.get(value, 0)
 | |
| 
 | |
| 	# implementing Iterable methods
 | |
| 
 | |
| 	def __iter__(self):
 | |
| 		"""Iterate through all elements.
 | |
| 
 | |
| 		Multiple copies will be returned if they exist.
 | |
| 		"""
 | |
| 		for value, count in self._dict.items():
 | |
| 			for i in range(count):
 | |
| 				yield(value)
 | |
| 
 | |
| 	# Comparison methods
 | |
| 
 | |
| 	def _is_subset(self, other):
 | |
| 		"""Check that every element in self has a count <= in other.
 | |
| 
 | |
| 		Args:
 | |
| 			other (Set)
 | |
| 		"""
 | |
| 		if isinstance(other, _basebag):
 | |
| 			for elem, count in self._dict.items():
 | |
| 				if not count <= other._dict.get(elem, 0):
 | |
| 					return False
 | |
| 		else:
 | |
| 			for elem in self:
 | |
| 				if self._dict.get(elem, 0) > 1 or elem not in other:
 | |
| 					return False
 | |
| 		return True
 | |
| 
 | |
| 	def _is_superset(self, other):
 | |
| 		"""Check that every element in self has a count >= in other.
 | |
| 
 | |
| 		Args:
 | |
| 			other (Set)
 | |
| 		"""
 | |
| 		if isinstance(other, _basebag):
 | |
| 			for elem, count in other._dict.items():
 | |
| 				if not self._dict.get(elem, 0) >= count:
 | |
| 					return False
 | |
| 		else:
 | |
| 			for elem in other:
 | |
| 				if elem not in self:
 | |
| 					return False
 | |
| 		return True
 | |
| 
 | |
| 	def __le__(self, other):
 | |
| 		if not isinstance(other, Set):
 | |
| 			return _compat.handle_rich_comp_not_implemented()
 | |
| 		return len(self) <= len(other) and self._is_subset(other)
 | |
| 
 | |
| 	def __lt__(self, other):
 | |
| 		if not isinstance(other, Set):
 | |
| 			return _compat.handle_rich_comp_not_implemented()
 | |
| 		return len(self) < len(other) and self._is_subset(other)
 | |
| 
 | |
| 	def __gt__(self, other):
 | |
| 		if not isinstance(other, Set):
 | |
| 			return _compat.handle_rich_comp_not_implemented()
 | |
| 		return len(self) > len(other) and self._is_superset(other)
 | |
| 
 | |
| 	def __ge__(self, other):
 | |
| 		if not isinstance(other, Set):
 | |
| 			return _compat.handle_rich_comp_not_implemented()
 | |
| 		return len(self) >= len(other) and self._is_superset(other)
 | |
| 
 | |
| 	def __eq__(self, other):
 | |
| 		if not isinstance(other, Set):
 | |
| 			return False
 | |
| 		if isinstance(other, _basebag):
 | |
| 			return self._dict == other._dict
 | |
| 		if not len(self) == len(other):
 | |
| 			return False
 | |
| 		for elem in other:
 | |
| 			if self._dict.get(elem, 0) != 1:
 | |
| 				return False
 | |
| 		return True
 | |
| 
 | |
| 	def __ne__(self, other):
 | |
| 		return not (self == other)
 | |
| 
 | |
| 	# Operations - &, |, +, -, ^, * and isdisjoint
 | |
| 
 | |
| 	def __and__(self, other):
 | |
| 		"""Intersection is the minimum of corresponding counts.
 | |
| 
 | |
| 		This runs in O(l + n) where:
 | |
| 			n is self.num_unique_elements()
 | |
| 			if other is a bag:
 | |
| 				l = 1
 | |
| 			else:
 | |
| 				l = len(other)
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		values = dict()
 | |
| 		for elem in self._dict:
 | |
| 			values[elem] = min(other._dict.get(elem, 0), self._dict.get(elem, 0))
 | |
| 		return self.from_mapping(values)
 | |
| 
 | |
| 	def isdisjoint(self, other):
 | |
| 		"""Return if this bag is disjoint with the passed collection.
 | |
| 
 | |
| 		This runs in O(len(other))
 | |
| 
 | |
| 		TODO move isdisjoint somewhere more appropriate
 | |
| 		"""
 | |
| 		for value in other:
 | |
| 			if value in self:
 | |
| 				return False
 | |
| 		return True
 | |
| 
 | |
| 	def __or__(self, other):
 | |
| 		"""Union is the maximum of all elements.
 | |
| 
 | |
| 		This runs in O(m + n) where:
 | |
| 			n is self.num_unique_elements()
 | |
| 			if other is a bag:
 | |
| 				m = other.num_unique_elements()
 | |
| 			else:
 | |
| 				m = len(other)
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		values = dict()
 | |
| 		for elem in self.unique_elements() | other.unique_elements():
 | |
| 			values[elem] = max(self._dict.get(elem, 0), other._dict.get(elem, 0))
 | |
| 		return self.from_mapping(values)
 | |
| 
 | |
| 	def __add__(self, other):
 | |
| 		"""Return a new bag also containing all the elements of other.
 | |
| 
 | |
| 		self + other = self & other + self | other
 | |
| 
 | |
| 		This runs in O(m + n) where:
 | |
| 			n is self.num_unique_elements()
 | |
| 			m is len(other)
 | |
| 		Args:
 | |
| 			other (Iterable): elements to add to self
 | |
| 		"""
 | |
| 		out = self.copy()
 | |
| 		for value in other:
 | |
| 			out._dict[value] = out._dict.get(value, 0) + 1
 | |
| 			out._size += 1
 | |
| 		return out
 | |
| 
 | |
| 	def __sub__(self, other):
 | |
| 		"""Difference between the sets.
 | |
| 
 | |
| 		For normal sets this is all x s.t. x in self and x not in other.
 | |
| 		For bags this is count(x) = max(0, self.count(x)-other.count(x))
 | |
| 
 | |
| 		This runs in O(m + n) where:
 | |
| 			n is self.num_unique_elements()
 | |
| 			m is len(other)
 | |
| 		Args:
 | |
| 			other (Iterable): elements to remove
 | |
| 		"""
 | |
| 		out = self.copy()
 | |
| 		for value in other:
 | |
| 			old_count = out._dict.get(value, 0)
 | |
| 			if old_count == 1:
 | |
| 				del out._dict[value]
 | |
| 				out._size -= 1
 | |
| 			elif old_count > 1:
 | |
| 				out._dict[value] = old_count - 1
 | |
| 				out._size -= 1
 | |
| 		return out
 | |
| 
 | |
| 	def __mul__(self, other):
 | |
| 		"""Cartesian product of the two sets.
 | |
| 
 | |
| 		other can be any iterable.
 | |
| 		Both self and other must contain elements that can be added together.
 | |
| 
 | |
| 		This should run in O(m*n+l) where:
 | |
| 			m is the number of unique elements in self
 | |
| 			n is the number of unique elements in other
 | |
| 			if other is a bag:
 | |
| 				l is 0
 | |
| 			else:
 | |
| 				l is the len(other)
 | |
| 		The +l will only really matter when other is an iterable with MANY
 | |
| 		repeated elements.
 | |
| 		For example: {'a'^2} * 'bbbbbbbbbbbbbbbbbbbbbbbbbb'
 | |
| 		The algorithm will be dominated by counting the 'b's
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		values = dict()
 | |
| 		for elem, count in self._dict.items():
 | |
| 			for other_elem, other_count in other._dict.items():
 | |
| 				new_elem = elem + other_elem
 | |
| 				new_count = count * other_count
 | |
| 				values[new_elem] = new_count
 | |
| 		return self.from_mapping(values)
 | |
| 
 | |
| 	def __xor__(self, other):
 | |
| 		"""Symmetric difference between the sets.
 | |
| 
 | |
| 		other can be any iterable.
 | |
| 
 | |
| 		This runs in O(m + n) where:
 | |
| 			m = len(self)
 | |
| 			n = len(other)
 | |
| 		"""
 | |
| 		return (self - other) | (other - self)
 | |
| 
 | |
| 
 | |
| class bag(_basebag, MutableSet):
 | |
| 	"""bag is a mutable unhashable bag."""
 | |
| 
 | |
| 	def pop(self):
 | |
| 		"""Remove and return an element of self."""
 | |
| 		# TODO can this be done more efficiently (no need to create an iterator)?
 | |
| 		it = iter(self)
 | |
| 		try:
 | |
| 			value = next(it)
 | |
| 		except StopIteration:
 | |
| 			raise KeyError
 | |
| 		self.discard(value)
 | |
| 		return value
 | |
| 
 | |
| 	def add(self, elem):
 | |
| 		"""Add elem to self."""
 | |
| 		self._dict[elem] = self._dict.get(elem, 0) + 1
 | |
| 		self._size += 1
 | |
| 
 | |
| 	def discard(self, elem):
 | |
| 		"""Remove elem from this bag, silent if it isn't present."""
 | |
| 		try:
 | |
| 			self.remove(elem)
 | |
| 		except ValueError:
 | |
| 			pass
 | |
| 
 | |
| 	def remove(self, elem):
 | |
| 		"""Remove elem from this bag, raising a ValueError if it isn't present.
 | |
| 
 | |
| 		Args:
 | |
| 			elem: object to remove from self
 | |
| 		Raises:
 | |
| 			ValueError: if the elem isn't present
 | |
| 		"""
 | |
| 		old_count = self._dict.get(elem, 0)
 | |
| 		if old_count == 0:
 | |
| 			raise ValueError
 | |
| 		elif old_count == 1:
 | |
| 			del self._dict[elem]
 | |
| 		else:
 | |
| 			self._dict[elem] -= 1
 | |
| 		self._size -= 1
 | |
| 
 | |
| 	def discard_all(self, other):
 | |
| 		"""Discard all of the elems from other."""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		for elem, other_count in other._dict.items():
 | |
| 			old_count = self._dict.get(elem, 0)
 | |
| 			new_count = old_count - other_count
 | |
| 			if new_count >= 0:
 | |
| 				if new_count == 0:
 | |
| 					if elem in self:
 | |
| 						del self._dict[elem]
 | |
| 				else:
 | |
| 					self._dict[elem] = new_count
 | |
| 				self._size += new_count - old_count
 | |
| 
 | |
| 	def remove_all(self, other):
 | |
| 		"""Remove all of the elems from other.
 | |
| 
 | |
| 		Raises a ValueError if the multiplicity of any elem in other is greater
 | |
| 		than in self.
 | |
| 		"""
 | |
| 		if not self._is_superset(other):
 | |
| 			raise ValueError
 | |
| 		self.discard_all(other)
 | |
| 
 | |
| 	def clear(self):
 | |
| 		"""Remove all elements from this bag."""
 | |
| 		self._dict = dict()
 | |
| 		self._size = 0
 | |
| 
 | |
| 	# In-place operations
 | |
| 
 | |
| 	def __ior__(self, other):
 | |
| 		"""Set multiplicity of each element to the maximum of the two collections.
 | |
| 
 | |
| 		if isinstance(other, _basebag):
 | |
| 			This runs in O(other.num_unique_elements())
 | |
| 		else:
 | |
| 			This runs in O(len(other))
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		for elem, other_count in other._dict.items():
 | |
| 			old_count = self._dict.get(elem, 0)
 | |
| 			new_count = max(other_count, old_count)
 | |
| 			self._dict[elem] = new_count
 | |
| 			self._size += new_count - old_count
 | |
| 		return self
 | |
| 
 | |
| 	def __iand__(self, other):
 | |
| 		"""Set multiplicity of each element to the minimum of the two collections.
 | |
| 
 | |
| 		if isinstance(other, _basebag):
 | |
| 			This runs in O(other.num_unique_elements())
 | |
| 		else:
 | |
| 			This runs in O(len(other))
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		for elem, old_count in set(self._dict.items()):
 | |
| 			other_count = other._dict.get(elem, 0)
 | |
| 			new_count = min(other_count, old_count)
 | |
| 			if new_count == 0:
 | |
| 				del self._dict[elem]
 | |
| 			else:
 | |
| 				self._dict[elem] = new_count
 | |
| 			self._size += new_count - old_count
 | |
| 		return self
 | |
| 
 | |
| 	def __ixor__(self, other):
 | |
| 		"""Set self to the symmetric difference between the sets.
 | |
| 
 | |
| 		if isinstance(other, _basebag):
 | |
| 			This runs in O(other.num_unique_elements())
 | |
| 		else:
 | |
| 			This runs in O(len(other))
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		other_minus_self = other - self
 | |
| 		self -= other
 | |
| 		self |= other_minus_self
 | |
| 		return self
 | |
| 
 | |
| 	def __isub__(self, other):
 | |
| 		"""Discard the elements of other from self.
 | |
| 
 | |
| 		if isinstance(it, _basebag):
 | |
| 			This runs in O(it.num_unique_elements())
 | |
| 		else:
 | |
| 			This runs in O(len(it))
 | |
| 		"""
 | |
| 		self.discard_all(other)
 | |
| 		return self
 | |
| 
 | |
| 	def __iadd__(self, other):
 | |
| 		"""Add all of the elements of other to self.
 | |
| 
 | |
| 		if isinstance(it, _basebag):
 | |
| 			This runs in O(it.num_unique_elements())
 | |
| 		else:
 | |
| 			This runs in O(len(it))
 | |
| 		"""
 | |
| 		if not isinstance(other, _basebag):
 | |
| 			other = self._from_iterable(other)
 | |
| 		for elem, other_count in other._dict.items():
 | |
| 			self._dict[elem] = self._dict.get(elem, 0) + other_count
 | |
| 			self._size += other_count
 | |
| 		return self
 | |
| 
 | |
| 
 | |
| class frozenbag(_basebag, Hashable):
 | |
| 	"""frozenbag is an immutable, hashable bab."""
 | |
| 
 | |
| 	def __hash__(self):
 | |
| 		"""Compute the hash value of a frozenbag.
 | |
| 
 | |
| 		This was copied directly from _collections_abc.Set._hash in Python3 which
 | |
| 		is identical to _abcoll.Set._hash
 | |
| 		We can't call it directly because Python2 raises a TypeError.
 | |
| 		"""
 | |
| 		if not hasattr(self, '_hash_value'):
 | |
| 			self._hash_value = self._hash()
 | |
| 		return self._hash_value
 |