Source code for ebonite.core.objects.dataset_type

import builtins
from abc import abstractmethod
from typing import Dict, List, Sized

from pyjackson import deserialize, serialize
from pyjackson.core import ArgList, Field
from pyjackson.decorators import type_field
from pyjackson.errors import DeserializationError, SerializationError

from ebonite.core.objects.base import EboniteParams
from ebonite.core.objects.requirements import InstallableRequirement, Requirements
from ebonite.core.objects.typing import SizedTypedListType, TypeWithSpec


[docs]@type_field('type') class DatasetType(EboniteParams, TypeWithSpec): """ Base class for dataset type metadata. Children of this class must be both pyjackson-serializable and be a pyjackson serializer for it's dataset type """ type = None @staticmethod def _check_type(obj, exp_type, exc_type): if not isinstance(obj, exp_type): raise exc_type(f'given dataset is of type: {type(obj)}, expected: {exp_type}') @property @abstractmethod def requirements(self) -> Requirements: """""" # TODO docs @abstractmethod def get_writer(self): """""" # TODO docs
[docs]class LibDatasetTypeMixin(DatasetType): """ :class:`.DatasetType` mixin which provides requirements list consisting of PIP packages represented by module objects in `libraries` field. """ libraries = None @property def requirements(self) -> Requirements: return Requirements([InstallableRequirement.from_module(lib) for lib in self.libraries])
PRIMITIVES = {int, str, bool, complex, float}
[docs]class PrimitiveDatasetType(DatasetType): """ DatasetType for int, str, bool, complex and float types """ type = 'primitive' def __init__(self, ptype: str): self.ptype = ptype
[docs] @classmethod def from_object(cls, obj): if type(obj) not in PRIMITIVES: raise ValueError('{} type is not primitive') return PrimitiveDatasetType(str(type(obj)))
@property def to_type(self): return getattr(builtins, self.ptype)
[docs] def get_spec(self) -> ArgList: return [Field(None, self.to_type, False)]
[docs] def deserialize(self, obj): try: return self.to_type(obj) except (ValueError, TypeError): raise DeserializationError(f'given object: {obj} could not be converted to {self.to_type}')
[docs] def serialize(self, instance): self._check_type(instance, self.to_type, SerializationError) return instance
@property def requirements(self) -> Requirements: return Requirements()
[docs] def get_writer(self): from ebonite.repository.dataset.artifact import PrimitiveDatasetWriter return PrimitiveDatasetWriter()
[docs]class ListDatasetType(DatasetType, SizedTypedListType): """ DatasetType for list type """ real_type = None type = 'list' def __init__(self, dtype: DatasetType, size: int): SizedTypedListType.__init__(self, size, dtype)
[docs] def deserialize(self, obj): _check_type_and_size(obj, list, self.size, DeserializationError) return [deserialize(o, self.dtype) for o in obj]
[docs] def serialize(self, instance: list): _check_type_and_size(instance, list, self.size, SerializationError) return [serialize(o, self.dtype) for o in instance]
@property def requirements(self) -> Requirements: return self.dtype.requirements
[docs] def get_writer(self): from ebonite.repository.dataset.artifact import PickleWriter return PickleWriter()
class _TupleLikeDatasetType(DatasetType): """ DatasetType for tuple-like collections """ real_type = None def __init__(self, items: List[DatasetType]): self.items = items def get_spec(self) -> ArgList: return [Field(str(i), t, False) for i, t in enumerate(self.items)] def deserialize(self, obj): _check_type_and_size(obj, self.actual_type, len(self.items), DeserializationError) return self.actual_type(deserialize(o, t) for t, o in zip(self.items, obj)) def serialize(self, instance: Sized): _check_type_and_size(instance, self.actual_type, len(self.items), SerializationError) return self.actual_type(serialize(o, t) for t, o in zip(self.items, instance)) @property def requirements(self) -> Requirements: return sum([i.requirements for i in self.items], Requirements()) def get_writer(self): from ebonite.repository.dataset.artifact import PickleWriter return PickleWriter() def _check_type_and_size(obj, dtype, size, exc_type): DatasetType._check_type(obj, dtype, exc_type) if size != -1 and len(obj) != size: raise exc_type(f'given {dtype.__name__} has len: {len(obj)}, expected: {size}')
[docs]class TupleLikeListDatasetType(_TupleLikeDatasetType): """ DatasetType for tuple-like list type """ actual_type = list type = 'tuple_like_list'
[docs]class TupleDatasetType(_TupleLikeDatasetType): """ DatasetType for tuple type """ actual_type = tuple type = 'tuple'
[docs]class DictDatasetType(DatasetType): """ DatasetType for dict type """ real_type = None type = 'dict' def __init__(self, item_types: Dict[str, DatasetType]): self.item_types = item_types
[docs] def get_spec(self) -> ArgList: return [Field(name, t, False) for name, t in self.item_types.items()]
[docs] def deserialize(self, obj): self._check_type_and_keys(obj, DeserializationError) return {k: deserialize(v, self.item_types[k]) for k, v in obj.items()}
[docs] def serialize(self, instance: dict): self._check_type_and_keys(instance, SerializationError) return { k: serialize(v, self.item_types[k]) for k, v in instance.items() }
def _check_type_and_keys(self, obj, exc_type): self._check_type(obj, dict, exc_type) if set(obj.keys()) != set(self.item_types.keys()): raise exc_type(f'given dict has keys: {set(obj.keys())}, expected: {set(self.item_types.keys())}') @property def requirements(self) -> Requirements: return sum([i.requirements for i in self.item_types.values()], Requirements())
[docs] def get_writer(self): from ebonite.repository.dataset.artifact import PickleWriter return PickleWriter()
[docs]class BytesDatasetType(DatasetType): """ DatasetType for bytes objects """ type = 'bytes' real_type = None def __init__(self): pass
[docs] def get_spec(self) -> ArgList: return [Field('file', bytes, False)]
[docs] def deserialize(self, obj) -> object: return obj
[docs] def serialize(self, instance: object) -> dict: return instance
@property def requirements(self) -> Requirements: return Requirements()
[docs] def get_writer(self): from ebonite.repository.dataset.artifact import PickleWriter return PickleWriter()