Module safeserializer.compression

Expand source code
#  Copyright (c) 2023. Davi Pereira dos Santos
#  This file is part of the safeserializer project.
#  Please respect the license - more about this in the section (*) below.
#
#  safeserializer is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  safeserializer is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with safeserializer.  If not, see <http://www.gnu.org/licenses/>.
#
#  (*) Removing authorship by any means, e.g. by distribution of derived
#  works or verbatim, obfuscated, compiled or rewritten versions of any
#  part of this work is illegal and unethical regarding the effort and
#  time spent here.
import pickle
from binascii import hexlify, unhexlify

import bson
from bson import InvalidDocument
from orjson import orjson


def topickle(obj, ensure_determinism):
    """
    >>> f = print
    >>> du = topickle({"a": [3, f]}, ensure_determinism=False)
    >>> res = frompickle(du)
    >>> res["a"][1]() is None
    <BLANKLINE>
    True
    >>> frompickle(topickle({"a": [3, None]}, ensure_determinism=True))
    {'a': [3, None]}
    """
    try:
        prefix = b"05pckl_"
        dump = pickle.dumps(obj, protocol=5)
    except Exception as e:  # pragma: no cover
        if ensure_determinism:
            print(e)
            raise NondeterminismException("Cannot serialize deterministically.")
        import dill

        try:
            prefix = b"05dill_"
            dump = dill.dumps(obj, protocol=5)
        except KeyError as e:
            if str(e) == "'__getstate__'":
                raise Exception("Unpickable value:", type(obj))
            else:
                raise e
    blob = prefix + dump
    return blob


def frompickle(blob):
    """
    >>> du = frompickle(b'05pckl_\\x80\\x05\\x95#\\x00\\x00\\x00\\x00\\x00\\x00\\x00}\\x94\\x8c\\x01a\\x94]\\x94(K\\x03\\x8c\\x08builtins\\x94\\x8c\\x05print\\x94\\x93\\x94es.')
    >>> du["a"][1]() is None
    <BLANKLINE>
    True
    """
    prefix = blob[:7]
    blob = blob[7:]
    if prefix == b"05pckl_":
        return pickle.loads(blob)
    elif prefix == b"05dill_":  # pragma: no cover
        import dill

        return dill.loads(blob)


def traversal_enc(obj, ensure_determinism, unsafe_fallback):
    """
    TODO: Fix nested tuples being converted to lists by json?
        'tuple' should make orjson/bson raise an exception like it would happen for hditc,
        it would be easy to handle like with other non built-in types.
    >>> unpack(pack(["a", ["3", 4], "b", 4], ensure_determinism=False, unsafe_fallback=False))
    ['a', ['3', 4], 'b', 4]
    >>> unpack(pack([{0: [{"3":4}], "b": b"b"}], ensure_determinism=False, unsafe_fallback=False))
    [{0: [{'3': 4}], 'b': b'b'}]
    >>> du = pack(True, ensure_determinism=False, unsafe_fallback=True, compressed=False)
    >>> du
    b'00json_true'
    >>> unpack(du)
    True
    >>> unpack(pack([True], ensure_determinism=False, unsafe_fallback=True, compressed=False))
    [True]
    >>> unpack(pack([{"a": b"some bytes", "b":print}], ensure_determinism=False, unsafe_fallback=True))[0]["a"]
    b'some bytes'
    >>> unpack(pack(b"some bytes", ensure_determinism=False, unsafe_fallback=True))
    b'some bytes'
    >>> unpack(pack(99999999999999999999999999999999999999999, ensure_determinism=False, unsafe_fallback=True))
    99999999999999999999999999999999999999999
    >>> from pandas import Series as S, DataFrame as DF
    >>> s = S({"a": 5, "b": 6}, name="column")
    >>> a = pack(s, ensure_determinism=True, unsafe_fallback=False)
    >>> a  # doctest: +SKIP
    b'00lz4__\\x04"M\\x18h@^\\x00\\x00\\x00\\x00\\x00\\x00\\x00@\\\\\\x00\\x00\\x00\\xf1\\x0e00bsos_W\\x00\\x00\\x00\\x04i\\x00\\x17\\x00\\x00\\x00\\x020\\x00\\x02\\x00\\x00\\x00a\\x00\\x021\\t\\x00\\xf0\\nb\\x00\\x00\\x05v\\x00"\\x00\\x00\\x00\\x0016\\xc2\\xa71\\xc2\\xa7int64\\xc2\\xa7&\\x00\\x10\\x05\\x17\\x00A\\x00\\x00\\x00\\x06\\x06\\x00\\xf0\\x02\\x00\\x00\\x02n\\x00\\x07\\x00\\x00\\x00column\\x00\\x00\\x00\\x00\\x00\\x00'
    >>> unpack(a)
    a    5
    b    6
    Name: column, dtype: int64
    >>> s = S({"a": "5", "b": "6"})
    >>> b = pack(s, ensure_determinism=True, unsafe_fallback=False)
    >>> b
    b'00lz4__\\x04"M\\x18h@\\x1d\\x08\\x00\\x00\\x00\\x00\\x00\\x00\\xec\\xe2\\x04\\x00\\x00\\xf0\\x1100prqs_PAR1\\x15\\x04\\x15\\x14\\x15\\x18L\\x15\\x04\\x15\\x00\\x12\\x00\\x00\\n$\\x01\\x00\\x00\\x005\\x05\\x00\\xf696\\x15\\x00\\x15\\x12\\x15\\x16,\\x15\\x04\\x15\\x10\\x15\\x06\\x15\\x06\\x1c6\\x00(\\x016\\x18\\x015\\x00\\x00\\x00\\t \\x02\\x00\\x00\\x00\\x04\\x01\\x01\\x03\\x02&\\x88\\x01\\x1c\\x15\\x0c\\x195\\x10\\x00\\x06\\x19\\x18\\x06_none_\\x15\\x02\\x16\\x04\\x16x\\x16\\x80\\x01&<&\\x088\\x00\\x10\\x19L\\x00\\x90\\x00\\x15\\x02\\x00\\x15\\x00\\x15\\x10\\x15B\\x00\\x0f}\\x00\\x01\\x10a}\\x00\\x1fb}\\x00\\x01Kb\\x18\\x01a}\\x00&\\x82\\x03}\\x00\\xf7\\x02\\x11__index_level_0_\\x88\\x00Q\\xb6\\x02&\\x82\\x02\\x8a\\x00\\x01E\\x00\\x0f\\x8a\\x00\\x01\\xf4\\x04\\x19<5\\x00\\x18\\x06schema\\x15\\x04\\x00\\x15\\x0c%\\x02\\xd0\\x00b%\\x00L\\x1c\\x00\\x00\\x13\\x00\\x0ef\\x00\\x03\\x1e\\x00o\\x16\\x04\\x19\\x1c\\x19,\\x0f\\x01*\\x0f\\xcf\\x007\\xf2\\r\\x16\\xf0\\x01\\x16\\x04&\\x08\\x16\\x80\\x02\\x14\\x00\\x00\\x19,\\x18\\x06pandas\\x18\\xfc\\x03{"%\\x01\\xcdcolumns": ["9\\x01R"], ""\\x00\\x02T\\x01\\x11e)\\x00\\xfa\\x07{"name": null, "field_\\x14\\x00\\x02i\\x00@_typ)\\x00\\xf5\\x02"unicode", "numpy\\x19\\x00`object\\x18\\x00\\xf6\\x12metadata": {"encoding": "UTF-8"}}\\x8d\\x00\\n\\x86\\x00\\x12"n\\x02\\x00D\\x00\\t\\x8a\\x00\\x07\\x18\\x00\\x0f\\x8e\\x00*\\x00\\xe6\\x00?}, \\xf6\\x00\\n\\x0f<\\x01\\x00\\x0fw\\x002\\x01\\xf4\\x00areator\\x18\\x01plibrary\\x17\\x01ppyarrow\\xf7\\x00pversion\\x16\\x00\\x8611.0.0"}~\\x00\\x08\\x1d\\x00\\xf2\\x00.5.3"}\\x00\\x18\\x0cARROW:\\xe6\\x02@\\x18\\xe0\\x07/\\x01\\x00\\x82+ACAAAQA\\x01\\x00\\xf1\\x00KAA4ABgAFAAgACg\\x15\\x00)BB \\x00\\x10w\\x15\\x00\\x15E \\x00 DQ@\\x00\\x10E\\x15\\x00\\x02F\\x00\\x01 \\x00\\x10I\\x08\\x00\\x11B\\x08\\x00\\x01E\\x00\\x10I \\x00\\x02%\\x00\\xf4FYAAABwYW5kYXMAAPwBAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\\x00\\xf0\\x01lcyI6IFt7Im5hbWUD\\x00\\xf3\\x15udWxsLCAiZmllbGRfbmFtZSI6IG51bGwsICJ\\x90\\x00aNfdHlw\\x1c\\x00\\xf0\\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\\x00\\xa2Aib2JqZWN0 \\x00\\xf0\\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\\x94\\x00\\xd9CJVVEYtOCJ9fV\\xbc\\x00\\x10z0\\x00EW3si\\x98\\x00\\xbfCJfbm9uZV8i\\xb8\\x00\\x02\\x0b \\x00\\x88cGFuZGFz\\x9c\\x00\\xb0dW5pY29kZSI\\xe0\\x00\\xd0udW1weV90eXBlp\\x00\\xa1Im9iamVjdC \\x00\\xa5tZXRhZGF0Y\\x18\\x01@x9LC\\x98\\x01\\x0fH\\x01\\x10TCJfX2\\xc0\\x01\\xc1xldmVsXzBfXy\\\\\\x00\\x0f\\\\\\x01>\\x80bnVsbH1d\\x1c\\x01\\xf0\\nY3JlYXRvciI6IHsibGlicmFye\\xc8\\x00\\xb1CJweWFycm93\\xa4\\x01\\xa1nZlcnNpb24\\xc0\\x01\\xa1MTEuMC4wIn\\x90\\x01\\x06\\xa8\\x00\\x80mVyc2lvbT\\x00\\xb0CIxLjUuMyJ9\\xc4\\x02\\x02\\xcb\\x02 BM\\n\\x00\\x10B\\x05\\x00\\xb1Mz///8AAAEF\\xe0\\x02\\x11CK\\x03\\x01\\x0b\\x00\\x01\\x02\\x00\\x10B\\x0b\\x00\\x1fB \\x01\\x03`wAAAMj@\\x00@QABQ\\x89\\x03`GAAcAD9\\x00\\x17BE\\x00!QUU\\x00\\x10H\\x18\\x00\\x02e\\x03\\x01\\x02\\x00\\x10B[\\x03\\x94F9ub25lXw3\\x00\\x01+\\x00\\x01\\x02\\x00\\xf1\\x00\\x00\\x18 parquet-cpp-9\\x04\\x13 \\x19\\x04\\x12 3\\x04P\\x19,\\x1c\\x00\\x00\\xdf\\x06\\x80\\x03\\x07\\x00\\x00PAR1\\x00\\x00\\x00\\x00'
    >>> unpack(b)
    a    5
    b    6
    dtype: object
    >>> s = S({"a": "5", "b": 6}, name="column")
    >>> unpack(pack(s, ensure_determinism=True, unsafe_fallback=True))
    a    5
    b    6
    Name: column, dtype: object
    >>> df = DF({"a": ["5","6","7"], "b": [1,2,3]}, index=["x","y","z"])
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
       a  b
    x  5  1
    y  6  2
    z  7  3
    >>> df = DF({"a": ["5",6,"7"], "b": ["1","2","3"]}, index=["x","y","z"])
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=True))
       a  b
    x  5  1
    y  6  2
    z  7  3
    """
    error = None
    if isinstance(obj, bytes):
        return obj
    if isinstance(obj, tuple):
        lst_of_binaries = tuple(traversal_enc(o, ensure_determinism, unsafe_fallback) for o in obj)
        return b"00tupl_" + bson.encode({"_": lst_of_binaries})

    try:
        return b"00json_" + orjson.dumps(obj)
    except TypeError as e:
        error = str(e)
    try:
        return b"00bson_" + bson.encode({"_": obj})
    except InvalidDocument as e:
        error = str(e)
    except OverflowError as o:
        if "8-byte ints" in str(o) and isinstance(obj, int):
            return b"00bint_" + str(obj).encode()

    if isinstance(obj, list):
        lst_of_binaries = [traversal_enc(o, ensure_determinism, unsafe_fallback) for o in obj]
        return b"00list_" + bson.encode({"_": lst_of_binaries})
    elif isinstance(obj, dict):
        dic_of_binaries = {}
        hexfy = any(not isinstance(k, str) for k in obj.keys())
        prefix = b"00dicB_" if hexfy else b"00dict_"
        for k, o in obj.items():
            if hexfy:
                bk = traversal_enc(k, ensure_determinism, unsafe_fallback)
                k = hexlify(bk).decode("utf-8")
            dic_of_binaries[k] = traversal_enc(o, ensure_determinism, unsafe_fallback)
        return prefix + bson.encode(dic_of_binaries)

    klass = str(obj.__class__)
    if klass in ["<class 'numpy.ndarray'>"]:
        return serialize_numpy(obj, ensure_determinism, unsafe_fallback)
    elif klass == "<class 'pandas.core.series.Series'>":
        try:
            idx = obj.index.values.tolist()
            vals = serialize_numpy(obj.to_numpy(), ensure_determinism, False, b"")
            dic = {"i": idx, "v": vals}
            if obj.name is not None:
                dic["n"] = obj.name
            return b"00bsos_" + bson.encode(dic)
        except Exception as e:
            if str(e).startswith("Please enable 'unsafe_fallback'"):
                from pandas import DataFrame

                try:
                    return b"00prqs_" + obj.to_frame(obj.name or "_none_").to_parquet()  # .convert_dtypes().to_parquet()
                except Exception as e:
                    error = str(e)
    elif klass == "<class 'pandas.core.frame.DataFrame'>":
        try:
            return serialize_numpy(obj.to_numpy(), ensure_determinism, unsafe_fallback=False, prefix=b"00npdf_")
        except Exception as e:
            if str(e).startswith("Please enable 'unsafe_fallback'"):
                try:
                    return b"00prqd_" + obj.to_parquet()
                except Exception as e:
                    error = str(e)
    if unsafe_fallback:
        return topickle(obj, ensure_determinism)
    raise Exception(f"Cannot safely pack {type(obj)}: {error}")  # pragma: no cover
    # TODO: handle hdict?


def traversal_dec(dump):
    if isinstance(dump, bytes):
        header = dump[2:7]
        blob = dump[7:]
        if header == b"json_":
            return orjson.loads(blob)
        if header == b"bson_":
            return bson.decode(blob)["_"]
        if header == b"bint_":
            return int(blob.decode())
        if header == b"nmpy_":
            return deserialize_numpy(blob)
        if header == b"prqs_":
            import pandas as pd
            from io import BytesIO

            obj = pd.read_parquet(BytesIO(blob)).squeeze()
            if obj.name == "_none_":
                obj.rename(None, inplace=True)
            return obj
        if header == b"bsos_":
            from pandas import Series

            dec = bson.decode(blob)
            obj = deserialize_numpy(dec["v"])
            kwargs = {"name": dec["n"]} if "n" in dec else {}
            return Series(obj, dec["i"], **kwargs)
        if header == b"prqd_":
            import pandas as pd
            from io import BytesIO

            return pd.read_parquet(BytesIO(blob))
        if header == b"npdf_":
            from pandas import DataFrame

            return DataFrame(deserialize_numpy(blob))
        if header == b"list_":
            return traversal_dec(bson.decode(blob)["_"])
        if header == b"tupl_":
            return traversal_dec(tuple(bson.decode(blob)["_"]))
        if header == b"dict_":
            return traversal_dec(bson.decode(blob))
        if header == b"dicB_":
            decoded = bson.decode(blob).items()
            return {traversal_dec(unhexlify(k.encode("utf-8"))): traversal_dec(v) for k, v in decoded}
        if header in [b"pckl_", b"dill_"]:
            return frompickle(dump)
        return dump
    # if isinstance(dump, (int, str, bool)):
    #     return dump
    if isinstance(dump, tuple):
        return tuple(traversal_dec(d) for d in dump)
    if isinstance(dump, list):
        return [traversal_dec(d) for d in dump]
    if isinstance(dump, dict):
        return {k: traversal_dec(v) for k, v in dump.items()}
    raise Exception(f"Cannot unpack {type(dump)}.")  # pragma: no cover


def pack(obj, ensure_determinism, unsafe_fallback, compressed=True):
    r"""
    Serialize 'obj' to bytes.

    Attempt to serialize using one of the following options, in this order:
        orjson
        bson
        bigints as str
        numpy ndarray as raw bytes
        pandas numeric Series/DataFrame as ndarray raw bytes
        pandas ill-behaved Series/DataFrame as parquet
        pickle when 'unsafe_fallback=True'
        dill when 'ensure_determinism=False'.

    >>> import numpy as np
    >>> d = [[np.array([[1, 2/3], [4, 5]]), {"x": b"dsa"}], [b"asd", 5]]
    >>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False)
    >>> unpack(blob)
    [[array([[1.        , 0.66666667],
           [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
    >>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False, compressed=False)
    >>> unpack(blob)
    [[array([[1.        , 0.66666667],
           [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
    >>> import pandas as pd
    >>> df = pd.DataFrame(np.array([[1, 2/3], [4, 5]]))
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
         0         1
    0  1.0  0.666667
    1  4.0  5.000000

    >>> unpack(pack({"0": 3, "b": print}, ensure_determinism=True, unsafe_fallback=True, compressed=False))
    {'0': 3, 'b': <built-in function print>}
    >>> unpack(pack({"0": 3, "b": b"b"}, ensure_determinism=True, unsafe_fallback=False, compressed=False))
    {'0': 3, 'b': b'b'}
    """
    dump = traversal_enc(obj, ensure_determinism, unsafe_fallback)
    if compressed:
        import lz4.frame as lz4

        return b"00lz4__" + lz4.compress(dump)
    return dump


def unpack(blob):
    """
    >>> from pandas import DataFrame as DF
    >>> df = DF({"a": ["5", "6", "7"], "b": [1, 2, 3]}, index=["x", "y", "z"])
    >>> complex_data = {"a": b"Some binary content", ("mixed-types tuple as a key", 4): 123, "df": df}
    >>> complex_data
    {'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
    x  5  1
    y  6  2
    z  7  3}
    >>> dump = pack(complex_data, ensure_determinism=False, unsafe_fallback=False)
    >>> dump
    b'00lz4__\\x04"M\\x18h@h\\x0b\\x00\\x00\\x00\\x00\\x00\\x00\\x94\\x95\\x06\\x00\\x00\\xf1*00dicB_a\\x0b\\x00\\x00\\x0530306a736f6e5f226122\\x00\\x13\\x00\\x00\\x00\\x00Some binary content.\\x00\\xd27475706c5f480\\x01\\x00b45f004\\x0c\\x00\\x8000530002\\x05\\x00\\x01\\x02\\x00\\rb\\x00\\xf5\\x07d697865642d74797065732X\\x00`652061\\x12\\x00\\xf4\\x0461206b6579220531000r\\x00\\x0bV\\x00\\x113}\\x00 \\x00\\n\\xb8\\x00\\xa100json_123\\xaf\\x00\\t\\xdd\\x00p46622\\x00b(\\x00\\xf0\\x1100prqd_PAR1\\x15\\x04\\x15\\x1e\\x15"L\\x15\\x06\\x15\\x00\\x12\\x00\\x00\\x0f8\\x01\\x00\\x00\\x005\\x05\\x00\\x106\\x05\\x00\\xf667\\x15\\x00\\x15\\x14\\x15\\x18,\\x15\\x06\\x15\\x10\\x15\\x06\\x15\\x06\\x1c6\\x00(\\x017\\x18\\x015\\x00\\x00\\x00\\n$\\x02\\x00\\x00\\x00\\x06\\x01\\x02\\x03$\\x00&\\x94\\x01\\x1c\\x15\\x0c\\x195\\x10\\x00\\x06\\x19\\x18\\x01a\\x15\\x02\\x16\\x06\\x16\\x84\\x01\\x16\\x8c\\x01&F&\\x085\\x00\\xe0\\x19,\\x15\\x04\\x15\\x00\\x15\\x02\\x00\\x15\\x00\\x15\\x10\\x15?\\x00d\\x15\\x04\\x150\\x15.\\x7f\\x00p\\x18\\x04\\x01\\x00\\t\\x01<\\x19\\x00\\x00\\x02\\x00\\x10\\x03\\x05\\x00 \\x00\\x00.\\x00\\t\\x85\\x00$\\x18\\x08\\x1a\\x00 \\x18\\x08\\xa6\\x00\\x00\\x02\\x00?\\x16\\x00(\\x16\\x00\\x00\\x0c\\xa7\\x00T\\xe2\\x03\\x1c\\x15\\x04\\xa7\\x00\\x11b\\xa7\\x00\\xbf\\xda\\x01\\x16\\xdc\\x01&\\xd0\\x02&\\x86\\x02Y\\x00\\x19\\x0f\\xcb\\x00\\x02\\rJ\\x01\\x10x\\x9f\\x00\\x10y\\x05\\x00\\x1fzJ\\x01\\x01Lz\\x18\\x01x\\xa3\\x00&\\xa8\\x06J\\x01\\xf1\\x03\\x11__index_level_0__\\xb3\\x00\\x02Z\\x01Q\\xda\\x05&\\x9c\\x05\\x91\\x01\\x01G\\x00\\x0f\\x91\\x00\\x01\\xf0\\x0b\\x19L5\\x00\\x18\\x06schema\\x15\\x06\\x00\\x15\\x0c%\\x02\\x18\\x01a%\\x00L\\x1cV\\x01\\x10\\x04\\x0e\\x00\\x12b\\x16\\x00\\x0ej\\x00\\x03&\\x00o\\x16\\x06\\x19\\x1c\\x19<\\xe0\\x01&\\x0fr\\x01J\\x0f,\\x018\\xb0\\x16\\xe2\\x03\\x16\\x06&\\x08\\x16\\xf4\\x03\\x14\\xdc\\x01\\xd2\\x18\\x06pandas\\x18\\xd5\\x04{"\\x83\\x01\\xcdcolumns": ["\\x97\\x01R"], ""\\x00\\x02\\xb2\\x01\\x11e)\\x00\\xfa\\x07{"name": null, "field_\\x14\\x00\\x02i\\x00@_typ)\\x00\\xf5\\x02"unicode", "numpy\\x19\\x00`object\\x18\\x00\\xf6\\x12metadata": {"encoding": "UTF-8"}}\\x8d\\x00\\n\\x86\\x00 "a?\\x00\\t\\x85\\x00\\x02\\x13\\x00\\x0f\\x84\\x00*\\x00\\xdc\\x005}, \\xec\\x00."bf\\x00\\x01\\x13\\x00\\x0bf\\x00Pint64+\\x00\\n\\xe8\\x00\\x05\\x17\\x00\\x07\\xe7\\x00\\x0cc\\x00\\x00\\x10\\x00\\x0cO\\x01\\x0f\\x95\\x01\\x00\\x0et\\x00\\x0f^\\x01\\x1b\\x00g\\x00\\x02M\\x01areatorq\\x01plibraryp\\x01ppyarrow\\xc4\\x00pversion\\x16\\x00\\x8611.0.0"}~\\x00\\x08\\x1d\\x00\\xf2\\x00.5.3"}\\x00\\x18\\x0cARROW:\\x9c\\x03@\\x18\\x98\\t/\\x01\\x00\\x822gDAAAQA\\x01\\x00\\xf1\\x00KAA4ABgAFAAgACg\\x15\\x00)BB \\x00\\x10w\\x15\\x00\\x15E \\x002IwC\\x10\\x00\\x04F\\x00\\x01 \\x00\\x10I\\x08\\x00\\x11B\\x08\\x00\\x01E\\x00\\x10I \\x00\\x10E\\x05\\x00\\xf4GAYAAABwYW5kYXMAAFUCAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\\x00\\xf0\\x01lcyI6IFt7Im5hbWUD\\x00\\xf0\\x11udWxsLCAiZmllbGRfbmFtZSI6IG51bGwH\\x00\\x03\\x90\\x00aNfdHlw\\x1c\\x00\\xf0\\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\\x00\\xa2Aib2JqZWN0 \\x00\\xf0\\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\\x94\\x00\\xe0CJVVEYtOCJ9fV0t\\x00\\x04\\xbc\\x00\\x10z0\\x00EW3si\\x98\\x002CJhT\\x00\\x84ZpZWxkX2\\xcc\\x00PAiYSI<\\x00\\x0f\\xb0\\x00>\\xa9bnVsbH0sIH\\x88\\x00\\x1fi\\x88\\x00\\x06\\x1fi\\x88\\x00\\x06qpbnQ2NC \\x00\\xd0udW1weV90eXBl\\xe8\\x00\\x00\\xe8\\x01?dDY4\\x01\\x02\\x0f\\x84\\x00\\x02\\x06\\xa4\\x01\\xc2maWVsZF9uYW1L\\x00\\x0f\\x1c\\x02\\x05\\x00\\xb0\\x01\\x97nBhbmRhc1|\\x00PnVuaW\\x94\\x01 Ui\\x10\\x02xbnVtcHl\\xf4\\x01\\x82vYmplY3Q \\x00\\xa5WV0YWRhdGEH\\x02\\x04\\xbc\\x01\\x80cmVhdG9y\\xd4\\x00\\xc0eyJsaWJyYXJ5\\x10\\x00\\xb1InB5YXJyb3cH\\x00\\xf9\\x0bdmVyc2lvbiI6ICIxMS4wLjAifS\\xa8\\x00\\x912ZXJzaW9uD\\x00\\xa0jEuNS4zIn06\\x03\\x00\\xa4\\x03!Ah\\n\\x00\\x01`\\x03\\x01K\\x03PmP///\\x0f\\x00!QU\\xc0\\x03\\x10J \\x00\\x05\\xcb\\x030AAE\\x16\\x00\\x1fF$\\x01\\x03\\x00 \\x00\\x01@\\x00`8z///8\\xc3\\x03\\x11CU\\x00\\x10BQ\\x00\\x11A\\x0b\\x00\\x02\\x02\\x00\\x00\\x0b\\x00 Bi\\x0c\\x00@CAAM\\xd0\\x03"Bw\\xd0\\x03\\x01\\x02\\x00\\x11U\\x06\\x00\\xc2QABQACAAGAAc\\xad\\x00\\x12B:\\x00\\x01\\x02\\x00\\x03\\xa0\\x00\\x12G\\r\\x00\\x01\\x06\\x00\\x01\\x02\\x00\\x00\\xa0\\x00\\x10G`\\x00\\x00p\\x001QAB\\x15\\x00\\xf1\\x02==\\x00\\x18 parquet-cpp-\\xf1\\x04\\x13 \\xd1\\x04\\x12 \\xeb\\x04R\\x19<\\x1c\\x00\\x00\\x03\\x00\\xa0\\x00t\\x08\\x00\\x00PAR1\\x00\\x00\\x00\\x00\\x00'
    >>> unpack(dump)
    {'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
    x  5  1
    y  6  2
    z  7  3}
    """
    if blob[:7] == b"00lz4__":
        import lz4.frame as lz4

        blob = lz4.decompress(blob[7:])
    return traversal_dec(blob)


class NondeterminismException(Exception):
    pass


def serialize_numpy(obj, ensure_determinism, unsafe_fallback, prefix=b"00nmpy_"):
    """
    >>> from pandas import Series as S, DataFrame as DF, Series as S
    >>> df = DF({"a": ["5","6","7"], "b": ["1","2","3"]}, index=["x","y","z"]).to_numpy()
    >>> serialize_numpy(df, ensure_determinism=True, unsafe_fallback=True)
    b'05pckl_\\x80\\x05\\x95\\xa3\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x15numpy.core.multiarray\\x94\\x8c\\x0c_reconstruct\\x94\\x93\\x94\\x8c\\x05numpy\\x94\\x8c\\x07ndarray\\x94\\x93\\x94K\\x00\\x85\\x94C\\x01b\\x94\\x87\\x94R\\x94(K\\x01K\\x03K\\x02\\x86\\x94h\\x03\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02O8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01|\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK?t\\x94b\\x88]\\x94(\\x8c\\x015\\x94\\x8c\\x011\\x94\\x8c\\x016\\x94\\x8c\\x012\\x94\\x8c\\x017\\x94\\x8c\\x013\\x94et\\x94b.'
    """
    import numpy as np

    if isinstance(obj, np.ndarray):
        if obj.dtype in [np.dtype(object)]:
            if unsafe_fallback:
                return topickle(obj, ensure_determinism)
            raise Exception(f"Please enable 'unsafe_fallback' or handle numpy types." f"Cannot handle this ndarray dtype: '{obj.dtype}'")
        dims = str(len(obj.shape))
        dtype = str(obj.dtype)
        rest_of_header = f"§{dims}§{dtype}§".encode() + integers2bytes(obj.shape)
        rest_of_header_len = str(len(rest_of_header)).encode()
        header = rest_of_header_len + rest_of_header
        # return header + lz4.compress(ascontiguousarray(obj).data)
        return prefix + header + obj.data.tobytes()
    if unsafe_fallback:  # pragma: no cover
        return topickle(obj, ensure_determinism)
    raise Exception(f"Please enable 'unsafe_fallback'. Cannot handle this type '{type(obj)}'.")  # pragma: no cover


def deserialize_numpy(blob):
    import numpy as np

    rest_of_header_len = blob[:10].split(b"\xc2\xa7")[0]
    first_len = len(rest_of_header_len)
    header_len = first_len + int(rest_of_header_len)
    dims, dtype, hw = blob[first_len + 2 : header_len].split(b"\xc2\xa7")
    dims = int(dims.decode())
    dtype = dtype.decode().rstrip()
    shape = bytes2integers(hw.ljust(4 * dims))

    dump = memoryview(blob)[header_len:]
    # dump = lz4.decompress(dump)
    m = np.frombuffer(dump, dtype=dtype)
    if dims > 1:
        m = np.reshape(m, newshape=shape)
    return m


def integers2bytes(lst, n=4) -> bytes:
    """Each int becomes N bytes. max=4294967294 for 4 bytes"""
    return b"".join(d.to_bytes(n, byteorder="little") for d in lst)


def bytes2integers(bytes_content: bytes, n=4):
    """Each 4 bytes become an int."""
    return [int.from_bytes(bytes_content[i : i + n], "little") for i in range(0, len(bytes_content), n)]


########################################################################################
########################################################################################
########################################################################################
########################################################################################


# def import_dependence(dep):
#     try:
#         return import_module(dep)
#     except ImportError as e:
#         raise Exception(f"Missing {dep} library. Need a complete install\n" "pip install -U safeserializer[full]")


# def custom_orjson_encoder(obj):
#     # E.g., pandas dataframes.
#     typ = str(type(obj))
#     if typ == "<class 'pandas.core.frame.DataFrame'>":
#         return obj.to_numpy()
#     if typ == "<class 'pandas.core.series.Series'>":
#         return obj.to_numpy()
#     # if hasattr(obj, 'to_json'):
#     #     # REMINDER: default_handler=str is to avoid infinite recursion, e.g., on iris.arff
#     #     txt = obj.to_json(force_ascii=False, default_handler=str)
#     #     return {"_type_orjson": str(type(obj)), "_obj.to_json()": txt}
#
#     # Numpy objects generic type and ndarray, keeping dtype.
#     if typ == "<class 'numpy.ndarray'>":
#         print(typ)
#         try:
#             return serialize_numpy(obj,ensure_determinism,unsafe_fallback) is None ???
#         except Exception as e:
#             print(e)
#             exit()
#
#     # try:
#     #     import numpy
#     #     if isinstance(obj, numpy.generic):
#     #         return {"_type_orjson": str(obj.dtype), "_numpy.asscalar(obj)": numpy.asscalar(obj)}
#     #     if isinstance(obj, numpy.ndarray):
#     #         return {"_type_orjson": str(obj.dtype), "_numpy.ndarray.tolist()": obj.tolist()}
#     # except ImportError as e:
#     #     pass
#
#     if isinstance(obj, bytes):
#         return obj.decode()  # nem qq byte vira string!
#     raise TypeError


# def json_object_hook_decoder(dic):
#     if "_type_orjson" in dic:
#         if "_obj.to_json()" in dic:
#             if dic["_type_orjson"] == "<class 'pandas.core.frame.DataFrame'>":
#                 m = import_dependence("pandas")
#                 return m.read_json(dic["_obj.to_json()"])  # , default_handler=str)
#             if dic["_type_orjson"] == "<class 'pandas.core.series.Series'>":
#                 m = import_dependence("pandas")
#                 # default_handler=callable
#                 return m.read_json(dic["_obj.to_json()"], typ=dic["_type_orjson"])
#             else:  # pragma: no cover
#                 raise Exception(f"Cannot desserialize object of type '{dic['_type_orjson']}'")
#         if (c := "_numpy.asscalar(obj)") in dic or (c := "_numpy.ndarray.tolist()") in dic:
#             m = import_dependence("numpy")
#             dtype = "str" if len(dic["_type_orjson"]) > 10 else dic["_type_orjson"]
#             return m.array(dic[c], dtype=dtype)
#     return dic


# def serialize_json(obj):
#     # r"""
#     # >>> import numpy as np
#     # >>> import math
#     # >>> a = np.array([[1/3, 5/4], [1.3**6, "text"]])
#     # >>> a
#     # array([['0.3333333333333333', '1.25'],
#     #        ['4.826809000000001', 'text']], dtype='<U32')
#     # >>> b = np.array([[1/3,5/4], [1.3**6, 4]], dtype = np.int64)
#     # >>> b
#     # array([[0, 1],
#     #        [4, 4]])
#     # >>> c = np.array([[1/3,5/4], [1.3**6, 4]], dtype = np.int8)
#     # >>> c
#     # array([[0, 1],
#     #        [4, 4]], dtype=int8)
#     # >>> serialize_json([math.inf, a, b, c])
#     # b'[null,{"_numpy.ndarray.tolist()":[["0.3333333333333333","1.25"],["4.826809000000001","text"]],"_type_orjson":"<U32"},{"_numpy.ndarray.tolist()":[[0,1],[4,4]],"_type_orjson":"int64"},{"_numpy.ndarray.tolist()":[[0,1],[4,4]],"_type_orjson":"int8"}]'
#     # >>> import pandas as pd
#     # >>> df = pd.DataFrame(
#     # ...     [[1/3, 5/4], [1.3**54, "text"]],
#     # ...     index=["row 1", "row 2"],
#     # ...     columns=["col 1", "col 2"],
#     # ... )
#     # >>> df
#     #               col 1 col 2
#     # row 1  3.333333e-01  1.25
#     # row 2  1.422136e+06  text
#     # >>> serialize_json(df)
#     # b'{"_obj.to_json()":"{\\"col 1\\":{\\"row 1\\":0.3333333333,\\"row 2\\":1422135.6537506874},\\"col 2\\":{\\"row 1\\":1.25,\\"row 2\\":\\"text\\"}}","_type_orjson":"<class \'pandas.core.frame.DataFrame\'>"}'
#     # >>> s = pd.Series(
#     # ...     [1/3, 5/4, (1.3)**54, "text"],
#     # ...     index=["row 1", "row 2", "row 3", "row 4"],
#     # ... )
#     # >>> s
#     # row 1          0.333333
#     # row 2              1.25
#     # row 3    1422135.653751
#     # row 4              text
#     # dtype: object
#     # >>> serialize_json(s)
#     # b'{"_obj.to_json()":"{\\"row 1\\":0.3333333333,\\"row 2\\":1.25,\\"row 3\\":1422135.6537506874,\\"row 4\\":\\"text\\"}","_type_orjson":"<class \'pandas.core.series.Series\'>"}'
#     # """
#     return dumps(obj, default=custom_orjson_encoder, option=OPT_SORT_KEYS)


# def deserialize_json(blob):
#     return json.loads(blob, object_hook=json_object_hook_decoder)

Functions

def bytes2integers(bytes_content: bytes, n=4)

Each 4 bytes become an int.

Expand source code
def bytes2integers(bytes_content: bytes, n=4):
    """Each 4 bytes become an int."""
    return [int.from_bytes(bytes_content[i : i + n], "little") for i in range(0, len(bytes_content), n)]
def deserialize_numpy(blob)
Expand source code
def deserialize_numpy(blob):
    import numpy as np

    rest_of_header_len = blob[:10].split(b"\xc2\xa7")[0]
    first_len = len(rest_of_header_len)
    header_len = first_len + int(rest_of_header_len)
    dims, dtype, hw = blob[first_len + 2 : header_len].split(b"\xc2\xa7")
    dims = int(dims.decode())
    dtype = dtype.decode().rstrip()
    shape = bytes2integers(hw.ljust(4 * dims))

    dump = memoryview(blob)[header_len:]
    # dump = lz4.decompress(dump)
    m = np.frombuffer(dump, dtype=dtype)
    if dims > 1:
        m = np.reshape(m, newshape=shape)
    return m
def frompickle(blob)
>>> du = frompickle(b'05pckl_\x80\x05\x95#\x00\x00\x00\x00\x00\x00\x00}\x94\x8c\x01a\x94]\x94(K\x03\x8c\x08builtins\x94\x8c\x05print\x94\x93\x94es.')
>>> du["a"][1]() is None
<BLANKLINE>
True
Expand source code
def frompickle(blob):
    """
    >>> du = frompickle(b'05pckl_\\x80\\x05\\x95#\\x00\\x00\\x00\\x00\\x00\\x00\\x00}\\x94\\x8c\\x01a\\x94]\\x94(K\\x03\\x8c\\x08builtins\\x94\\x8c\\x05print\\x94\\x93\\x94es.')
    >>> du["a"][1]() is None
    <BLANKLINE>
    True
    """
    prefix = blob[:7]
    blob = blob[7:]
    if prefix == b"05pckl_":
        return pickle.loads(blob)
    elif prefix == b"05dill_":  # pragma: no cover
        import dill

        return dill.loads(blob)
def integers2bytes(lst, n=4) ‑> bytes

Each int becomes N bytes. max=4294967294 for 4 bytes

Expand source code
def integers2bytes(lst, n=4) -> bytes:
    """Each int becomes N bytes. max=4294967294 for 4 bytes"""
    return b"".join(d.to_bytes(n, byteorder="little") for d in lst)
def pack(obj, ensure_determinism, unsafe_fallback, compressed=True)

Serialize 'obj' to bytes.

Attempt to serialize using one of the following options, in this order: orjson bson bigints as str numpy ndarray as raw bytes pandas numeric Series/DataFrame as ndarray raw bytes pandas ill-behaved Series/DataFrame as parquet pickle when 'unsafe_fallback=True' dill when 'ensure_determinism=False'.

>>> import numpy as np
>>> d = [[np.array([[1, 2/3], [4, 5]]), {"x": b"dsa"}], [b"asd", 5]]
>>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False)
>>> unpack(blob)
[[array([[1.        , 0.66666667],
       [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
>>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False, compressed=False)
>>> unpack(blob)
[[array([[1.        , 0.66666667],
       [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
>>> import pandas as pd
>>> df = pd.DataFrame(np.array([[1, 2/3], [4, 5]]))
>>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
     0         1
0  1.0  0.666667
1  4.0  5.000000
>>> unpack(pack({"0": 3, "b": print}, ensure_determinism=True, unsafe_fallback=True, compressed=False))
{'0': 3, 'b': <built-in function print>}
>>> unpack(pack({"0": 3, "b": b"b"}, ensure_determinism=True, unsafe_fallback=False, compressed=False))
{'0': 3, 'b': b'b'}
Expand source code
def pack(obj, ensure_determinism, unsafe_fallback, compressed=True):
    r"""
    Serialize 'obj' to bytes.

    Attempt to serialize using one of the following options, in this order:
        orjson
        bson
        bigints as str
        numpy ndarray as raw bytes
        pandas numeric Series/DataFrame as ndarray raw bytes
        pandas ill-behaved Series/DataFrame as parquet
        pickle when 'unsafe_fallback=True'
        dill when 'ensure_determinism=False'.

    >>> import numpy as np
    >>> d = [[np.array([[1, 2/3], [4, 5]]), {"x": b"dsa"}], [b"asd", 5]]
    >>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False)
    >>> unpack(blob)
    [[array([[1.        , 0.66666667],
           [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
    >>> blob = pack(d, ensure_determinism=True, unsafe_fallback=False, compressed=False)
    >>> unpack(blob)
    [[array([[1.        , 0.66666667],
           [4.        , 5.        ]]), {'x': b'dsa'}], [b'asd', 5]]
    >>> import pandas as pd
    >>> df = pd.DataFrame(np.array([[1, 2/3], [4, 5]]))
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
         0         1
    0  1.0  0.666667
    1  4.0  5.000000

    >>> unpack(pack({"0": 3, "b": print}, ensure_determinism=True, unsafe_fallback=True, compressed=False))
    {'0': 3, 'b': <built-in function print>}
    >>> unpack(pack({"0": 3, "b": b"b"}, ensure_determinism=True, unsafe_fallback=False, compressed=False))
    {'0': 3, 'b': b'b'}
    """
    dump = traversal_enc(obj, ensure_determinism, unsafe_fallback)
    if compressed:
        import lz4.frame as lz4

        return b"00lz4__" + lz4.compress(dump)
    return dump
def serialize_numpy(obj, ensure_determinism, unsafe_fallback, prefix=b'00nmpy_')
>>> from pandas import Series as S, DataFrame as DF, Series as S
>>> df = DF({"a": ["5","6","7"], "b": ["1","2","3"]}, index=["x","y","z"]).to_numpy()
>>> serialize_numpy(df, ensure_determinism=True, unsafe_fallback=True)
b'05pckl_\x80\x05\x95\xa3\x00\x00\x00\x00\x00\x00\x00\x8c\x15numpy.core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x03K\x02\x86\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b\x88]\x94(\x8c\x015\x94\x8c\x011\x94\x8c\x016\x94\x8c\x012\x94\x8c\x017\x94\x8c\x013\x94et\x94b.'
Expand source code
def serialize_numpy(obj, ensure_determinism, unsafe_fallback, prefix=b"00nmpy_"):
    """
    >>> from pandas import Series as S, DataFrame as DF, Series as S
    >>> df = DF({"a": ["5","6","7"], "b": ["1","2","3"]}, index=["x","y","z"]).to_numpy()
    >>> serialize_numpy(df, ensure_determinism=True, unsafe_fallback=True)
    b'05pckl_\\x80\\x05\\x95\\xa3\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x15numpy.core.multiarray\\x94\\x8c\\x0c_reconstruct\\x94\\x93\\x94\\x8c\\x05numpy\\x94\\x8c\\x07ndarray\\x94\\x93\\x94K\\x00\\x85\\x94C\\x01b\\x94\\x87\\x94R\\x94(K\\x01K\\x03K\\x02\\x86\\x94h\\x03\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02O8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01|\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK?t\\x94b\\x88]\\x94(\\x8c\\x015\\x94\\x8c\\x011\\x94\\x8c\\x016\\x94\\x8c\\x012\\x94\\x8c\\x017\\x94\\x8c\\x013\\x94et\\x94b.'
    """
    import numpy as np

    if isinstance(obj, np.ndarray):
        if obj.dtype in [np.dtype(object)]:
            if unsafe_fallback:
                return topickle(obj, ensure_determinism)
            raise Exception(f"Please enable 'unsafe_fallback' or handle numpy types." f"Cannot handle this ndarray dtype: '{obj.dtype}'")
        dims = str(len(obj.shape))
        dtype = str(obj.dtype)
        rest_of_header = f"§{dims}§{dtype}§".encode() + integers2bytes(obj.shape)
        rest_of_header_len = str(len(rest_of_header)).encode()
        header = rest_of_header_len + rest_of_header
        # return header + lz4.compress(ascontiguousarray(obj).data)
        return prefix + header + obj.data.tobytes()
    if unsafe_fallback:  # pragma: no cover
        return topickle(obj, ensure_determinism)
    raise Exception(f"Please enable 'unsafe_fallback'. Cannot handle this type '{type(obj)}'.")  # pragma: no cover
def topickle(obj, ensure_determinism)
>>> f = print
>>> du = topickle({"a": [3, f]}, ensure_determinism=False)
>>> res = frompickle(du)
>>> res["a"][1]() is None
<BLANKLINE>
True
>>> frompickle(topickle({"a": [3, None]}, ensure_determinism=True))
{'a': [3, None]}
Expand source code
def topickle(obj, ensure_determinism):
    """
    >>> f = print
    >>> du = topickle({"a": [3, f]}, ensure_determinism=False)
    >>> res = frompickle(du)
    >>> res["a"][1]() is None
    <BLANKLINE>
    True
    >>> frompickle(topickle({"a": [3, None]}, ensure_determinism=True))
    {'a': [3, None]}
    """
    try:
        prefix = b"05pckl_"
        dump = pickle.dumps(obj, protocol=5)
    except Exception as e:  # pragma: no cover
        if ensure_determinism:
            print(e)
            raise NondeterminismException("Cannot serialize deterministically.")
        import dill

        try:
            prefix = b"05dill_"
            dump = dill.dumps(obj, protocol=5)
        except KeyError as e:
            if str(e) == "'__getstate__'":
                raise Exception("Unpickable value:", type(obj))
            else:
                raise e
    blob = prefix + dump
    return blob
def traversal_dec(dump)
Expand source code
def traversal_dec(dump):
    if isinstance(dump, bytes):
        header = dump[2:7]
        blob = dump[7:]
        if header == b"json_":
            return orjson.loads(blob)
        if header == b"bson_":
            return bson.decode(blob)["_"]
        if header == b"bint_":
            return int(blob.decode())
        if header == b"nmpy_":
            return deserialize_numpy(blob)
        if header == b"prqs_":
            import pandas as pd
            from io import BytesIO

            obj = pd.read_parquet(BytesIO(blob)).squeeze()
            if obj.name == "_none_":
                obj.rename(None, inplace=True)
            return obj
        if header == b"bsos_":
            from pandas import Series

            dec = bson.decode(blob)
            obj = deserialize_numpy(dec["v"])
            kwargs = {"name": dec["n"]} if "n" in dec else {}
            return Series(obj, dec["i"], **kwargs)
        if header == b"prqd_":
            import pandas as pd
            from io import BytesIO

            return pd.read_parquet(BytesIO(blob))
        if header == b"npdf_":
            from pandas import DataFrame

            return DataFrame(deserialize_numpy(blob))
        if header == b"list_":
            return traversal_dec(bson.decode(blob)["_"])
        if header == b"tupl_":
            return traversal_dec(tuple(bson.decode(blob)["_"]))
        if header == b"dict_":
            return traversal_dec(bson.decode(blob))
        if header == b"dicB_":
            decoded = bson.decode(blob).items()
            return {traversal_dec(unhexlify(k.encode("utf-8"))): traversal_dec(v) for k, v in decoded}
        if header in [b"pckl_", b"dill_"]:
            return frompickle(dump)
        return dump
    # if isinstance(dump, (int, str, bool)):
    #     return dump
    if isinstance(dump, tuple):
        return tuple(traversal_dec(d) for d in dump)
    if isinstance(dump, list):
        return [traversal_dec(d) for d in dump]
    if isinstance(dump, dict):
        return {k: traversal_dec(v) for k, v in dump.items()}
    raise Exception(f"Cannot unpack {type(dump)}.")  # pragma: no cover
def traversal_enc(obj, ensure_determinism, unsafe_fallback)

TODO: Fix nested tuples being converted to lists by json? 'tuple' should make orjson/bson raise an exception like it would happen for hditc, it would be easy to handle like with other non built-in types.

>>> unpack(pack(["a", ["3", 4], "b", 4], ensure_determinism=False, unsafe_fallback=False))
['a', ['3', 4], 'b', 4]
>>> unpack(pack([{0: [{"3":4}], "b": b"b"}], ensure_determinism=False, unsafe_fallback=False))
[{0: [{'3': 4}], 'b': b'b'}]
>>> du = pack(True, ensure_determinism=False, unsafe_fallback=True, compressed=False)
>>> du
b'00json_true'
>>> unpack(du)
True
>>> unpack(pack([True], ensure_determinism=False, unsafe_fallback=True, compressed=False))
[True]
>>> unpack(pack([{"a": b"some bytes", "b":print}], ensure_determinism=False, unsafe_fallback=True))[0]["a"]
b'some bytes'
>>> unpack(pack(b"some bytes", ensure_determinism=False, unsafe_fallback=True))
b'some bytes'
>>> unpack(pack(99999999999999999999999999999999999999999, ensure_determinism=False, unsafe_fallback=True))
99999999999999999999999999999999999999999
>>> from pandas import Series as S, DataFrame as DF
>>> s = S({"a": 5, "b": 6}, name="column")
>>> a = pack(s, ensure_determinism=True, unsafe_fallback=False)
>>> a  # doctest: +SKIP
b'00lz4__\x04"M\x18h@^\x00\x00\x00\x00\x00\x00\x00@\\\x00\x00\x00\xf1\x0e00bsos_W\x00\x00\x00\x04i\x00\x17\x00\x00\x00\x020\x00\x02\x00\x00\x00a\x00\x021\t\x00\xf0\nb\x00\x00\x05v\x00"\x00\x00\x00\x0016\xc2\xa71\xc2\xa7int64\xc2\xa7&\x00\x10\x05\x17\x00A\x00\x00\x00\x06\x06\x00\xf0\x02\x00\x00\x02n\x00\x07\x00\x00\x00column\x00\x00\x00\x00\x00\x00'
>>> unpack(a)
a    5
b    6
Name: column, dtype: int64
>>> s = S({"a": "5", "b": "6"})
>>> b = pack(s, ensure_determinism=True, unsafe_fallback=False)
>>> b
b'00lz4__\x04"M\x18h@\x1d\x08\x00\x00\x00\x00\x00\x00\xec\xe2\x04\x00\x00\xf0\x1100prqs_PAR1\x15\x04\x15\x14\x15\x18L\x15\x04\x15\x00\x12\x00\x00\n$\x01\x00\x00\x005\x05\x00\xf696\x15\x00\x15\x12\x15\x16,\x15\x04\x15\x10\x15\x06\x15\x06\x1c6\x00(\x016\x18\x015\x00\x00\x00\t \x02\x00\x00\x00\x04\x01\x01\x03\x02&\x88\x01\x1c\x15\x0c\x195\x10\x00\x06\x19\x18\x06_none_\x15\x02\x16\x04\x16x\x16\x80\x01&<&\x088\x00\x10\x19L\x00\x90\x00\x15\x02\x00\x15\x00\x15\x10\x15B\x00\x0f}\x00\x01\x10a}\x00\x1fb}\x00\x01Kb\x18\x01a}\x00&\x82\x03}\x00\xf7\x02\x11__index_level_0_\x88\x00Q\xb6\x02&\x82\x02\x8a\x00\x01E\x00\x0f\x8a\x00\x01\xf4\x04\x19<5\x00\x18\x06schema\x15\x04\x00\x15\x0c%\x02\xd0\x00b%\x00L\x1c\x00\x00\x13\x00\x0ef\x00\x03\x1e\x00o\x16\x04\x19\x1c\x19,\x0f\x01*\x0f\xcf\x007\xf2\r\x16\xf0\x01\x16\x04&\x08\x16\x80\x02\x14\x00\x00\x19,\x18\x06pandas\x18\xfc\x03{"%\x01\xcdcolumns": ["9\x01R"], ""\x00\x02T\x01\x11e)\x00\xfa\x07{"name": null, "field_\x14\x00\x02i\x00@_typ)\x00\xf5\x02"unicode", "numpy\x19\x00`object\x18\x00\xf6\x12metadata": {"encoding": "UTF-8"}}\x8d\x00\n\x86\x00\x12"n\x02\x00D\x00\t\x8a\x00\x07\x18\x00\x0f\x8e\x00*\x00\xe6\x00?}, \xf6\x00\n\x0f<\x01\x00\x0fw\x002\x01\xf4\x00areator\x18\x01plibrary\x17\x01ppyarrow\xf7\x00pversion\x16\x00\x8611.0.0"}~\x00\x08\x1d\x00\xf2\x00.5.3"}\x00\x18\x0cARROW:\xe6\x02@\x18\xe0\x07/\x01\x00\x82+ACAAAQA\x01\x00\xf1\x00KAA4ABgAFAAgACg\x15\x00)BB \x00\x10w\x15\x00\x15E \x00 DQ@\x00\x10E\x15\x00\x02F\x00\x01 \x00\x10I\x08\x00\x11B\x08\x00\x01E\x00\x10I \x00\x02%\x00\xf4FYAAABwYW5kYXMAAPwBAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\x00\xf0\x01lcyI6IFt7Im5hbWUD\x00\xf3\x15udWxsLCAiZmllbGRfbmFtZSI6IG51bGwsICJ\x90\x00aNfdHlw\x1c\x00\xf0\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\x00\xa2Aib2JqZWN0 \x00\xf0\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\x94\x00\xd9CJVVEYtOCJ9fV\xbc\x00\x10z0\x00EW3si\x98\x00\xbfCJfbm9uZV8i\xb8\x00\x02\x0b \x00\x88cGFuZGFz\x9c\x00\xb0dW5pY29kZSI\xe0\x00\xd0udW1weV90eXBlp\x00\xa1Im9iamVjdC \x00\xa5tZXRhZGF0Y\x18\x01@x9LC\x98\x01\x0fH\x01\x10TCJfX2\xc0\x01\xc1xldmVsXzBfXy\\\x00\x0f\\\x01>\x80bnVsbH1d\x1c\x01\xf0\nY3JlYXRvciI6IHsibGlicmFye\xc8\x00\xb1CJweWFycm93\xa4\x01\xa1nZlcnNpb24\xc0\x01\xa1MTEuMC4wIn\x90\x01\x06\xa8\x00\x80mVyc2lvbT\x00\xb0CIxLjUuMyJ9\xc4\x02\x02\xcb\x02 BM\n\x00\x10B\x05\x00\xb1Mz///8AAAEF\xe0\x02\x11CK\x03\x01\x0b\x00\x01\x02\x00\x10B\x0b\x00\x1fB \x01\x03`wAAAMj@\x00@QABQ\x89\x03`GAAcAD9\x00\x17BE\x00!QUU\x00\x10H\x18\x00\x02e\x03\x01\x02\x00\x10B[\x03\x94F9ub25lXw3\x00\x01+\x00\x01\x02\x00\xf1\x00\x00\x18 parquet-cpp-9\x04\x13 \x19\x04\x12 3\x04P\x19,\x1c\x00\x00\xdf\x06\x80\x03\x07\x00\x00PAR1\x00\x00\x00\x00'
>>> unpack(b)
a    5
b    6
dtype: object
>>> s = S({"a": "5", "b": 6}, name="column")
>>> unpack(pack(s, ensure_determinism=True, unsafe_fallback=True))
a    5
b    6
Name: column, dtype: object
>>> df = DF({"a": ["5","6","7"], "b": [1,2,3]}, index=["x","y","z"])
>>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
   a  b
x  5  1
y  6  2
z  7  3
>>> df = DF({"a": ["5",6,"7"], "b": ["1","2","3"]}, index=["x","y","z"])
>>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=True))
   a  b
x  5  1
y  6  2
z  7  3
Expand source code
def traversal_enc(obj, ensure_determinism, unsafe_fallback):
    """
    TODO: Fix nested tuples being converted to lists by json?
        'tuple' should make orjson/bson raise an exception like it would happen for hditc,
        it would be easy to handle like with other non built-in types.
    >>> unpack(pack(["a", ["3", 4], "b", 4], ensure_determinism=False, unsafe_fallback=False))
    ['a', ['3', 4], 'b', 4]
    >>> unpack(pack([{0: [{"3":4}], "b": b"b"}], ensure_determinism=False, unsafe_fallback=False))
    [{0: [{'3': 4}], 'b': b'b'}]
    >>> du = pack(True, ensure_determinism=False, unsafe_fallback=True, compressed=False)
    >>> du
    b'00json_true'
    >>> unpack(du)
    True
    >>> unpack(pack([True], ensure_determinism=False, unsafe_fallback=True, compressed=False))
    [True]
    >>> unpack(pack([{"a": b"some bytes", "b":print}], ensure_determinism=False, unsafe_fallback=True))[0]["a"]
    b'some bytes'
    >>> unpack(pack(b"some bytes", ensure_determinism=False, unsafe_fallback=True))
    b'some bytes'
    >>> unpack(pack(99999999999999999999999999999999999999999, ensure_determinism=False, unsafe_fallback=True))
    99999999999999999999999999999999999999999
    >>> from pandas import Series as S, DataFrame as DF
    >>> s = S({"a": 5, "b": 6}, name="column")
    >>> a = pack(s, ensure_determinism=True, unsafe_fallback=False)
    >>> a  # doctest: +SKIP
    b'00lz4__\\x04"M\\x18h@^\\x00\\x00\\x00\\x00\\x00\\x00\\x00@\\\\\\x00\\x00\\x00\\xf1\\x0e00bsos_W\\x00\\x00\\x00\\x04i\\x00\\x17\\x00\\x00\\x00\\x020\\x00\\x02\\x00\\x00\\x00a\\x00\\x021\\t\\x00\\xf0\\nb\\x00\\x00\\x05v\\x00"\\x00\\x00\\x00\\x0016\\xc2\\xa71\\xc2\\xa7int64\\xc2\\xa7&\\x00\\x10\\x05\\x17\\x00A\\x00\\x00\\x00\\x06\\x06\\x00\\xf0\\x02\\x00\\x00\\x02n\\x00\\x07\\x00\\x00\\x00column\\x00\\x00\\x00\\x00\\x00\\x00'
    >>> unpack(a)
    a    5
    b    6
    Name: column, dtype: int64
    >>> s = S({"a": "5", "b": "6"})
    >>> b = pack(s, ensure_determinism=True, unsafe_fallback=False)
    >>> b
    b'00lz4__\\x04"M\\x18h@\\x1d\\x08\\x00\\x00\\x00\\x00\\x00\\x00\\xec\\xe2\\x04\\x00\\x00\\xf0\\x1100prqs_PAR1\\x15\\x04\\x15\\x14\\x15\\x18L\\x15\\x04\\x15\\x00\\x12\\x00\\x00\\n$\\x01\\x00\\x00\\x005\\x05\\x00\\xf696\\x15\\x00\\x15\\x12\\x15\\x16,\\x15\\x04\\x15\\x10\\x15\\x06\\x15\\x06\\x1c6\\x00(\\x016\\x18\\x015\\x00\\x00\\x00\\t \\x02\\x00\\x00\\x00\\x04\\x01\\x01\\x03\\x02&\\x88\\x01\\x1c\\x15\\x0c\\x195\\x10\\x00\\x06\\x19\\x18\\x06_none_\\x15\\x02\\x16\\x04\\x16x\\x16\\x80\\x01&<&\\x088\\x00\\x10\\x19L\\x00\\x90\\x00\\x15\\x02\\x00\\x15\\x00\\x15\\x10\\x15B\\x00\\x0f}\\x00\\x01\\x10a}\\x00\\x1fb}\\x00\\x01Kb\\x18\\x01a}\\x00&\\x82\\x03}\\x00\\xf7\\x02\\x11__index_level_0_\\x88\\x00Q\\xb6\\x02&\\x82\\x02\\x8a\\x00\\x01E\\x00\\x0f\\x8a\\x00\\x01\\xf4\\x04\\x19<5\\x00\\x18\\x06schema\\x15\\x04\\x00\\x15\\x0c%\\x02\\xd0\\x00b%\\x00L\\x1c\\x00\\x00\\x13\\x00\\x0ef\\x00\\x03\\x1e\\x00o\\x16\\x04\\x19\\x1c\\x19,\\x0f\\x01*\\x0f\\xcf\\x007\\xf2\\r\\x16\\xf0\\x01\\x16\\x04&\\x08\\x16\\x80\\x02\\x14\\x00\\x00\\x19,\\x18\\x06pandas\\x18\\xfc\\x03{"%\\x01\\xcdcolumns": ["9\\x01R"], ""\\x00\\x02T\\x01\\x11e)\\x00\\xfa\\x07{"name": null, "field_\\x14\\x00\\x02i\\x00@_typ)\\x00\\xf5\\x02"unicode", "numpy\\x19\\x00`object\\x18\\x00\\xf6\\x12metadata": {"encoding": "UTF-8"}}\\x8d\\x00\\n\\x86\\x00\\x12"n\\x02\\x00D\\x00\\t\\x8a\\x00\\x07\\x18\\x00\\x0f\\x8e\\x00*\\x00\\xe6\\x00?}, \\xf6\\x00\\n\\x0f<\\x01\\x00\\x0fw\\x002\\x01\\xf4\\x00areator\\x18\\x01plibrary\\x17\\x01ppyarrow\\xf7\\x00pversion\\x16\\x00\\x8611.0.0"}~\\x00\\x08\\x1d\\x00\\xf2\\x00.5.3"}\\x00\\x18\\x0cARROW:\\xe6\\x02@\\x18\\xe0\\x07/\\x01\\x00\\x82+ACAAAQA\\x01\\x00\\xf1\\x00KAA4ABgAFAAgACg\\x15\\x00)BB \\x00\\x10w\\x15\\x00\\x15E \\x00 DQ@\\x00\\x10E\\x15\\x00\\x02F\\x00\\x01 \\x00\\x10I\\x08\\x00\\x11B\\x08\\x00\\x01E\\x00\\x10I \\x00\\x02%\\x00\\xf4FYAAABwYW5kYXMAAPwBAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\\x00\\xf0\\x01lcyI6IFt7Im5hbWUD\\x00\\xf3\\x15udWxsLCAiZmllbGRfbmFtZSI6IG51bGwsICJ\\x90\\x00aNfdHlw\\x1c\\x00\\xf0\\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\\x00\\xa2Aib2JqZWN0 \\x00\\xf0\\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\\x94\\x00\\xd9CJVVEYtOCJ9fV\\xbc\\x00\\x10z0\\x00EW3si\\x98\\x00\\xbfCJfbm9uZV8i\\xb8\\x00\\x02\\x0b \\x00\\x88cGFuZGFz\\x9c\\x00\\xb0dW5pY29kZSI\\xe0\\x00\\xd0udW1weV90eXBlp\\x00\\xa1Im9iamVjdC \\x00\\xa5tZXRhZGF0Y\\x18\\x01@x9LC\\x98\\x01\\x0fH\\x01\\x10TCJfX2\\xc0\\x01\\xc1xldmVsXzBfXy\\\\\\x00\\x0f\\\\\\x01>\\x80bnVsbH1d\\x1c\\x01\\xf0\\nY3JlYXRvciI6IHsibGlicmFye\\xc8\\x00\\xb1CJweWFycm93\\xa4\\x01\\xa1nZlcnNpb24\\xc0\\x01\\xa1MTEuMC4wIn\\x90\\x01\\x06\\xa8\\x00\\x80mVyc2lvbT\\x00\\xb0CIxLjUuMyJ9\\xc4\\x02\\x02\\xcb\\x02 BM\\n\\x00\\x10B\\x05\\x00\\xb1Mz///8AAAEF\\xe0\\x02\\x11CK\\x03\\x01\\x0b\\x00\\x01\\x02\\x00\\x10B\\x0b\\x00\\x1fB \\x01\\x03`wAAAMj@\\x00@QABQ\\x89\\x03`GAAcAD9\\x00\\x17BE\\x00!QUU\\x00\\x10H\\x18\\x00\\x02e\\x03\\x01\\x02\\x00\\x10B[\\x03\\x94F9ub25lXw3\\x00\\x01+\\x00\\x01\\x02\\x00\\xf1\\x00\\x00\\x18 parquet-cpp-9\\x04\\x13 \\x19\\x04\\x12 3\\x04P\\x19,\\x1c\\x00\\x00\\xdf\\x06\\x80\\x03\\x07\\x00\\x00PAR1\\x00\\x00\\x00\\x00'
    >>> unpack(b)
    a    5
    b    6
    dtype: object
    >>> s = S({"a": "5", "b": 6}, name="column")
    >>> unpack(pack(s, ensure_determinism=True, unsafe_fallback=True))
    a    5
    b    6
    Name: column, dtype: object
    >>> df = DF({"a": ["5","6","7"], "b": [1,2,3]}, index=["x","y","z"])
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=False))
       a  b
    x  5  1
    y  6  2
    z  7  3
    >>> df = DF({"a": ["5",6,"7"], "b": ["1","2","3"]}, index=["x","y","z"])
    >>> unpack(pack(df, ensure_determinism=True, unsafe_fallback=True))
       a  b
    x  5  1
    y  6  2
    z  7  3
    """
    error = None
    if isinstance(obj, bytes):
        return obj
    if isinstance(obj, tuple):
        lst_of_binaries = tuple(traversal_enc(o, ensure_determinism, unsafe_fallback) for o in obj)
        return b"00tupl_" + bson.encode({"_": lst_of_binaries})

    try:
        return b"00json_" + orjson.dumps(obj)
    except TypeError as e:
        error = str(e)
    try:
        return b"00bson_" + bson.encode({"_": obj})
    except InvalidDocument as e:
        error = str(e)
    except OverflowError as o:
        if "8-byte ints" in str(o) and isinstance(obj, int):
            return b"00bint_" + str(obj).encode()

    if isinstance(obj, list):
        lst_of_binaries = [traversal_enc(o, ensure_determinism, unsafe_fallback) for o in obj]
        return b"00list_" + bson.encode({"_": lst_of_binaries})
    elif isinstance(obj, dict):
        dic_of_binaries = {}
        hexfy = any(not isinstance(k, str) for k in obj.keys())
        prefix = b"00dicB_" if hexfy else b"00dict_"
        for k, o in obj.items():
            if hexfy:
                bk = traversal_enc(k, ensure_determinism, unsafe_fallback)
                k = hexlify(bk).decode("utf-8")
            dic_of_binaries[k] = traversal_enc(o, ensure_determinism, unsafe_fallback)
        return prefix + bson.encode(dic_of_binaries)

    klass = str(obj.__class__)
    if klass in ["<class 'numpy.ndarray'>"]:
        return serialize_numpy(obj, ensure_determinism, unsafe_fallback)
    elif klass == "<class 'pandas.core.series.Series'>":
        try:
            idx = obj.index.values.tolist()
            vals = serialize_numpy(obj.to_numpy(), ensure_determinism, False, b"")
            dic = {"i": idx, "v": vals}
            if obj.name is not None:
                dic["n"] = obj.name
            return b"00bsos_" + bson.encode(dic)
        except Exception as e:
            if str(e).startswith("Please enable 'unsafe_fallback'"):
                from pandas import DataFrame

                try:
                    return b"00prqs_" + obj.to_frame(obj.name or "_none_").to_parquet()  # .convert_dtypes().to_parquet()
                except Exception as e:
                    error = str(e)
    elif klass == "<class 'pandas.core.frame.DataFrame'>":
        try:
            return serialize_numpy(obj.to_numpy(), ensure_determinism, unsafe_fallback=False, prefix=b"00npdf_")
        except Exception as e:
            if str(e).startswith("Please enable 'unsafe_fallback'"):
                try:
                    return b"00prqd_" + obj.to_parquet()
                except Exception as e:
                    error = str(e)
    if unsafe_fallback:
        return topickle(obj, ensure_determinism)
    raise Exception(f"Cannot safely pack {type(obj)}: {error}")  # pragma: no cover
    # TODO: handle hdict?
def unpack(blob)
>>> from pandas import DataFrame as DF
>>> df = DF({"a": ["5", "6", "7"], "b": [1, 2, 3]}, index=["x", "y", "z"])
>>> complex_data = {"a": b"Some binary content", ("mixed-types tuple as a key", 4): 123, "df": df}
>>> complex_data
{'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
x  5  1
y  6  2
z  7  3}
>>> dump = pack(complex_data, ensure_determinism=False, unsafe_fallback=False)
>>> dump
b'00lz4__\x04"M\x18h@h\x0b\x00\x00\x00\x00\x00\x00\x94\x95\x06\x00\x00\xf1*00dicB_a\x0b\x00\x00\x0530306a736f6e5f226122\x00\x13\x00\x00\x00\x00Some binary content.\x00\xd27475706c5f480\x01\x00b45f004\x0c\x00\x8000530002\x05\x00\x01\x02\x00\rb\x00\xf5\x07d697865642d74797065732X\x00`652061\x12\x00\xf4\x0461206b6579220531000r\x00\x0bV\x00\x113}\x00 \x00\n\xb8\x00\xa100json_123\xaf\x00\t\xdd\x00p46622\x00b(\x00\xf0\x1100prqd_PAR1\x15\x04\x15\x1e\x15"L\x15\x06\x15\x00\x12\x00\x00\x0f8\x01\x00\x00\x005\x05\x00\x106\x05\x00\xf667\x15\x00\x15\x14\x15\x18,\x15\x06\x15\x10\x15\x06\x15\x06\x1c6\x00(\x017\x18\x015\x00\x00\x00\n$\x02\x00\x00\x00\x06\x01\x02\x03$\x00&\x94\x01\x1c\x15\x0c\x195\x10\x00\x06\x19\x18\x01a\x15\x02\x16\x06\x16\x84\x01\x16\x8c\x01&F&\x085\x00\xe0\x19,\x15\x04\x15\x00\x15\x02\x00\x15\x00\x15\x10\x15?\x00d\x15\x04\x150\x15.\x7f\x00p\x18\x04\x01\x00\t\x01<\x19\x00\x00\x02\x00\x10\x03\x05\x00 \x00\x00.\x00\t\x85\x00$\x18\x08\x1a\x00 \x18\x08\xa6\x00\x00\x02\x00?\x16\x00(\x16\x00\x00\x0c\xa7\x00T\xe2\x03\x1c\x15\x04\xa7\x00\x11b\xa7\x00\xbf\xda\x01\x16\xdc\x01&\xd0\x02&\x86\x02Y\x00\x19\x0f\xcb\x00\x02\rJ\x01\x10x\x9f\x00\x10y\x05\x00\x1fzJ\x01\x01Lz\x18\x01x\xa3\x00&\xa8\x06J\x01\xf1\x03\x11__index_level_0__\xb3\x00\x02Z\x01Q\xda\x05&\x9c\x05\x91\x01\x01G\x00\x0f\x91\x00\x01\xf0\x0b\x19L5\x00\x18\x06schema\x15\x06\x00\x15\x0c%\x02\x18\x01a%\x00L\x1cV\x01\x10\x04\x0e\x00\x12b\x16\x00\x0ej\x00\x03&\x00o\x16\x06\x19\x1c\x19<\xe0\x01&\x0fr\x01J\x0f,\x018\xb0\x16\xe2\x03\x16\x06&\x08\x16\xf4\x03\x14\xdc\x01\xd2\x18\x06pandas\x18\xd5\x04{"\x83\x01\xcdcolumns": ["\x97\x01R"], ""\x00\x02\xb2\x01\x11e)\x00\xfa\x07{"name": null, "field_\x14\x00\x02i\x00@_typ)\x00\xf5\x02"unicode", "numpy\x19\x00`object\x18\x00\xf6\x12metadata": {"encoding": "UTF-8"}}\x8d\x00\n\x86\x00 "a?\x00\t\x85\x00\x02\x13\x00\x0f\x84\x00*\x00\xdc\x005}, \xec\x00."bf\x00\x01\x13\x00\x0bf\x00Pint64+\x00\n\xe8\x00\x05\x17\x00\x07\xe7\x00\x0cc\x00\x00\x10\x00\x0cO\x01\x0f\x95\x01\x00\x0et\x00\x0f^\x01\x1b\x00g\x00\x02M\x01areatorq\x01plibraryp\x01ppyarrow\xc4\x00pversion\x16\x00\x8611.0.0"}~\x00\x08\x1d\x00\xf2\x00.5.3"}\x00\x18\x0cARROW:\x9c\x03@\x18\x98\t/\x01\x00\x822gDAAAQA\x01\x00\xf1\x00KAA4ABgAFAAgACg\x15\x00)BB \x00\x10w\x15\x00\x15E \x002IwC\x10\x00\x04F\x00\x01 \x00\x10I\x08\x00\x11B\x08\x00\x01E\x00\x10I \x00\x10E\x05\x00\xf4GAYAAABwYW5kYXMAAFUCAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\x00\xf0\x01lcyI6IFt7Im5hbWUD\x00\xf0\x11udWxsLCAiZmllbGRfbmFtZSI6IG51bGwH\x00\x03\x90\x00aNfdHlw\x1c\x00\xf0\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\x00\xa2Aib2JqZWN0 \x00\xf0\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\x94\x00\xe0CJVVEYtOCJ9fV0t\x00\x04\xbc\x00\x10z0\x00EW3si\x98\x002CJhT\x00\x84ZpZWxkX2\xcc\x00PAiYSI<\x00\x0f\xb0\x00>\xa9bnVsbH0sIH\x88\x00\x1fi\x88\x00\x06\x1fi\x88\x00\x06qpbnQ2NC \x00\xd0udW1weV90eXBl\xe8\x00\x00\xe8\x01?dDY4\x01\x02\x0f\x84\x00\x02\x06\xa4\x01\xc2maWVsZF9uYW1L\x00\x0f\x1c\x02\x05\x00\xb0\x01\x97nBhbmRhc1|\x00PnVuaW\x94\x01 Ui\x10\x02xbnVtcHl\xf4\x01\x82vYmplY3Q \x00\xa5WV0YWRhdGEH\x02\x04\xbc\x01\x80cmVhdG9y\xd4\x00\xc0eyJsaWJyYXJ5\x10\x00\xb1InB5YXJyb3cH\x00\xf9\x0bdmVyc2lvbiI6ICIxMS4wLjAifS\xa8\x00\x912ZXJzaW9uD\x00\xa0jEuNS4zIn06\x03\x00\xa4\x03!Ah\n\x00\x01`\x03\x01K\x03PmP///\x0f\x00!QU\xc0\x03\x10J \x00\x05\xcb\x030AAE\x16\x00\x1fF$\x01\x03\x00 \x00\x01@\x00`8z///8\xc3\x03\x11CU\x00\x10BQ\x00\x11A\x0b\x00\x02\x02\x00\x00\x0b\x00 Bi\x0c\x00@CAAM\xd0\x03"Bw\xd0\x03\x01\x02\x00\x11U\x06\x00\xc2QABQACAAGAAc\xad\x00\x12B:\x00\x01\x02\x00\x03\xa0\x00\x12G\r\x00\x01\x06\x00\x01\x02\x00\x00\xa0\x00\x10G`\x00\x00p\x001QAB\x15\x00\xf1\x02==\x00\x18 parquet-cpp-\xf1\x04\x13 \xd1\x04\x12 \xeb\x04R\x19<\x1c\x00\x00\x03\x00\xa0\x00t\x08\x00\x00PAR1\x00\x00\x00\x00\x00'
>>> unpack(dump)
{'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
x  5  1
y  6  2
z  7  3}
Expand source code
def unpack(blob):
    """
    >>> from pandas import DataFrame as DF
    >>> df = DF({"a": ["5", "6", "7"], "b": [1, 2, 3]}, index=["x", "y", "z"])
    >>> complex_data = {"a": b"Some binary content", ("mixed-types tuple as a key", 4): 123, "df": df}
    >>> complex_data
    {'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
    x  5  1
    y  6  2
    z  7  3}
    >>> dump = pack(complex_data, ensure_determinism=False, unsafe_fallback=False)
    >>> dump
    b'00lz4__\\x04"M\\x18h@h\\x0b\\x00\\x00\\x00\\x00\\x00\\x00\\x94\\x95\\x06\\x00\\x00\\xf1*00dicB_a\\x0b\\x00\\x00\\x0530306a736f6e5f226122\\x00\\x13\\x00\\x00\\x00\\x00Some binary content.\\x00\\xd27475706c5f480\\x01\\x00b45f004\\x0c\\x00\\x8000530002\\x05\\x00\\x01\\x02\\x00\\rb\\x00\\xf5\\x07d697865642d74797065732X\\x00`652061\\x12\\x00\\xf4\\x0461206b6579220531000r\\x00\\x0bV\\x00\\x113}\\x00 \\x00\\n\\xb8\\x00\\xa100json_123\\xaf\\x00\\t\\xdd\\x00p46622\\x00b(\\x00\\xf0\\x1100prqd_PAR1\\x15\\x04\\x15\\x1e\\x15"L\\x15\\x06\\x15\\x00\\x12\\x00\\x00\\x0f8\\x01\\x00\\x00\\x005\\x05\\x00\\x106\\x05\\x00\\xf667\\x15\\x00\\x15\\x14\\x15\\x18,\\x15\\x06\\x15\\x10\\x15\\x06\\x15\\x06\\x1c6\\x00(\\x017\\x18\\x015\\x00\\x00\\x00\\n$\\x02\\x00\\x00\\x00\\x06\\x01\\x02\\x03$\\x00&\\x94\\x01\\x1c\\x15\\x0c\\x195\\x10\\x00\\x06\\x19\\x18\\x01a\\x15\\x02\\x16\\x06\\x16\\x84\\x01\\x16\\x8c\\x01&F&\\x085\\x00\\xe0\\x19,\\x15\\x04\\x15\\x00\\x15\\x02\\x00\\x15\\x00\\x15\\x10\\x15?\\x00d\\x15\\x04\\x150\\x15.\\x7f\\x00p\\x18\\x04\\x01\\x00\\t\\x01<\\x19\\x00\\x00\\x02\\x00\\x10\\x03\\x05\\x00 \\x00\\x00.\\x00\\t\\x85\\x00$\\x18\\x08\\x1a\\x00 \\x18\\x08\\xa6\\x00\\x00\\x02\\x00?\\x16\\x00(\\x16\\x00\\x00\\x0c\\xa7\\x00T\\xe2\\x03\\x1c\\x15\\x04\\xa7\\x00\\x11b\\xa7\\x00\\xbf\\xda\\x01\\x16\\xdc\\x01&\\xd0\\x02&\\x86\\x02Y\\x00\\x19\\x0f\\xcb\\x00\\x02\\rJ\\x01\\x10x\\x9f\\x00\\x10y\\x05\\x00\\x1fzJ\\x01\\x01Lz\\x18\\x01x\\xa3\\x00&\\xa8\\x06J\\x01\\xf1\\x03\\x11__index_level_0__\\xb3\\x00\\x02Z\\x01Q\\xda\\x05&\\x9c\\x05\\x91\\x01\\x01G\\x00\\x0f\\x91\\x00\\x01\\xf0\\x0b\\x19L5\\x00\\x18\\x06schema\\x15\\x06\\x00\\x15\\x0c%\\x02\\x18\\x01a%\\x00L\\x1cV\\x01\\x10\\x04\\x0e\\x00\\x12b\\x16\\x00\\x0ej\\x00\\x03&\\x00o\\x16\\x06\\x19\\x1c\\x19<\\xe0\\x01&\\x0fr\\x01J\\x0f,\\x018\\xb0\\x16\\xe2\\x03\\x16\\x06&\\x08\\x16\\xf4\\x03\\x14\\xdc\\x01\\xd2\\x18\\x06pandas\\x18\\xd5\\x04{"\\x83\\x01\\xcdcolumns": ["\\x97\\x01R"], ""\\x00\\x02\\xb2\\x01\\x11e)\\x00\\xfa\\x07{"name": null, "field_\\x14\\x00\\x02i\\x00@_typ)\\x00\\xf5\\x02"unicode", "numpy\\x19\\x00`object\\x18\\x00\\xf6\\x12metadata": {"encoding": "UTF-8"}}\\x8d\\x00\\n\\x86\\x00 "a?\\x00\\t\\x85\\x00\\x02\\x13\\x00\\x0f\\x84\\x00*\\x00\\xdc\\x005}, \\xec\\x00."bf\\x00\\x01\\x13\\x00\\x0bf\\x00Pint64+\\x00\\n\\xe8\\x00\\x05\\x17\\x00\\x07\\xe7\\x00\\x0cc\\x00\\x00\\x10\\x00\\x0cO\\x01\\x0f\\x95\\x01\\x00\\x0et\\x00\\x0f^\\x01\\x1b\\x00g\\x00\\x02M\\x01areatorq\\x01plibraryp\\x01ppyarrow\\xc4\\x00pversion\\x16\\x00\\x8611.0.0"}~\\x00\\x08\\x1d\\x00\\xf2\\x00.5.3"}\\x00\\x18\\x0cARROW:\\x9c\\x03@\\x18\\x98\\t/\\x01\\x00\\x822gDAAAQA\\x01\\x00\\xf1\\x00KAA4ABgAFAAgACg\\x15\\x00)BB \\x00\\x10w\\x15\\x00\\x15E \\x002IwC\\x10\\x00\\x04F\\x00\\x01 \\x00\\x10I\\x08\\x00\\x11B\\x08\\x00\\x01E\\x00\\x10I \\x00\\x10E\\x05\\x00\\xf4GAYAAABwYW5kYXMAAFUCAAB7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5$\\x00\\xf0\\x01lcyI6IFt7Im5hbWUD\\x00\\xf0\\x11udWxsLCAiZmllbGRfbmFtZSI6IG51bGwH\\x00\\x03\\x90\\x00aNfdHlw\\x1c\\x00\\xf0\\x0eCJ1bmljb2RlIiwgIm51bXB5X3R5cGX\\x00\\xa2Aib2JqZWN0 \\x00\\xf0\\x0c1ldGFkYXRhIjogeyJlbmNvZGluZ\\x94\\x00\\xe0CJVVEYtOCJ9fV0t\\x00\\x04\\xbc\\x00\\x10z0\\x00EW3si\\x98\\x002CJhT\\x00\\x84ZpZWxkX2\\xcc\\x00PAiYSI<\\x00\\x0f\\xb0\\x00>\\xa9bnVsbH0sIH\\x88\\x00\\x1fi\\x88\\x00\\x06\\x1fi\\x88\\x00\\x06qpbnQ2NC \\x00\\xd0udW1weV90eXBl\\xe8\\x00\\x00\\xe8\\x01?dDY4\\x01\\x02\\x0f\\x84\\x00\\x02\\x06\\xa4\\x01\\xc2maWVsZF9uYW1L\\x00\\x0f\\x1c\\x02\\x05\\x00\\xb0\\x01\\x97nBhbmRhc1|\\x00PnVuaW\\x94\\x01 Ui\\x10\\x02xbnVtcHl\\xf4\\x01\\x82vYmplY3Q \\x00\\xa5WV0YWRhdGEH\\x02\\x04\\xbc\\x01\\x80cmVhdG9y\\xd4\\x00\\xc0eyJsaWJyYXJ5\\x10\\x00\\xb1InB5YXJyb3cH\\x00\\xf9\\x0bdmVyc2lvbiI6ICIxMS4wLjAifS\\xa8\\x00\\x912ZXJzaW9uD\\x00\\xa0jEuNS4zIn06\\x03\\x00\\xa4\\x03!Ah\\n\\x00\\x01`\\x03\\x01K\\x03PmP///\\x0f\\x00!QU\\xc0\\x03\\x10J \\x00\\x05\\xcb\\x030AAE\\x16\\x00\\x1fF$\\x01\\x03\\x00 \\x00\\x01@\\x00`8z///8\\xc3\\x03\\x11CU\\x00\\x10BQ\\x00\\x11A\\x0b\\x00\\x02\\x02\\x00\\x00\\x0b\\x00 Bi\\x0c\\x00@CAAM\\xd0\\x03"Bw\\xd0\\x03\\x01\\x02\\x00\\x11U\\x06\\x00\\xc2QABQACAAGAAc\\xad\\x00\\x12B:\\x00\\x01\\x02\\x00\\x03\\xa0\\x00\\x12G\\r\\x00\\x01\\x06\\x00\\x01\\x02\\x00\\x00\\xa0\\x00\\x10G`\\x00\\x00p\\x001QAB\\x15\\x00\\xf1\\x02==\\x00\\x18 parquet-cpp-\\xf1\\x04\\x13 \\xd1\\x04\\x12 \\xeb\\x04R\\x19<\\x1c\\x00\\x00\\x03\\x00\\xa0\\x00t\\x08\\x00\\x00PAR1\\x00\\x00\\x00\\x00\\x00'
    >>> unpack(dump)
    {'a': b'Some binary content', ('mixed-types tuple as a key', 4): 123, 'df':    a  b
    x  5  1
    y  6  2
    z  7  3}
    """
    if blob[:7] == b"00lz4__":
        import lz4.frame as lz4

        blob = lz4.decompress(blob[7:])
    return traversal_dec(blob)

Classes

class NondeterminismException (*args, **kwargs)

Common base class for all non-exit exceptions.

Expand source code
class NondeterminismException(Exception):
    pass

Ancestors

  • builtins.Exception
  • builtins.BaseException