tfrecords

tfrecords: fast and simple reader and writer

These details have not been verified by PyPI

Project links

Homepage

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Project description

tfrecords

simplify and transplant the tfrecord and table

update information

    2023-07-01:  Add arrow parquet
    2022-10-30:  Add lmdb leveldb read and writer and add record batch write
    2022-10-17:  Add shared memory for record to read mode with more accelerated Reading.
    2022-02-01:  simplify and transplant the tfrecord dataset

1. record read and write demo , with_share_memory flags will Accelerated Reading

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

import tfrecords

options = tfrecords.TFRecordOptions(compression_type=tfrecords.TFRecordCompressionType.NONE)


def test_write(filename, N=3, context='aaa'):
    with tfrecords.TFRecordWriter(filename, options=options) as file_writer:
        batch_data = []
        for i in range(N):
            d = context + '____' + str(i)
            batch_data.append(d)
            if (i + 1) % 100 == 0:
                file_writer.write_batch(batch_data)
                batch_data.clear()
        if len(batch_data):
            file_writer.write_batch(batch_data)
            batch_data.clear()


def test_record_iterator(example_paths):
    print('test_record_iterator')
    for example_path in example_paths:
        iterator = tfrecords.tf_record_iterator(example_path, options=options, skip_bytes=0, with_share_memory=True)
        offset_list = iterator.read_offsets(0)
        count = iterator.read_count(0)
        print(count)
        num = 0
        for iter in iterator:
            num += 1
            print(iter)


def test_random_reader(example_paths):
    print('test_random_reader')
    for example_path in example_paths:
        file_reader = tfrecords.tf_record_random_reader(example_path, options=options, with_share_memory=True)
        last_pos = 0
        while True:
            try:
                x, pos = file_reader.read(last_pos)
                print(x, pos)
                last_pos = pos

            except Exception as e:
                break


def test_random_reader2(example_paths):
    print('test_random_reader2')
    for example_path in example_paths:
        file_reader = tfrecords.tf_record_random_reader(example_path, options=options, with_share_memory=True)
        skip_bytes = 0
        offset_list = file_reader.read_offsets(skip_bytes)
        for offset, length in offset_list:
            x, _ = file_reader.read(offset)
            print(x)


test_write('d:/example.tfrecords0', 3, 'file0')

example_paths = tfrecords.glob('d:/example.tfrecords*')
print(example_paths)
test_record_iterator(example_paths)
print()
test_random_reader(example_paths)
print()
test_random_reader2(example_paths)
print()

2. leveldb read and write demo

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

from tfrecords import LEVELDB

db_path = 'd:/example_leveldb'


def test_write(db_path):
    options = LEVELDB.LeveldbOptions(create_if_missing=True, error_if_exists=False)
    file_writer = LEVELDB.Leveldb(db_path, options)

    keys, values = [], []
    for i in range(30):
        keys.append(b"input_" + str(i).encode())
        keys.append(b"label_" + str(i).encode())
        values.append(b"xiaoming" + str(i).encode())
        values.append(b"zzs" + str(i).encode())
        if (i + 1) % 1000 == 0:
            file_writer.put_batch(keys, values)
            keys.clear()
            values.clear()
    if len(keys):
        file_writer.put_batch(keys, values)
        keys.clear()
        values.clear()

    file_writer.close()


def test_read(db_path):
    options = LEVELDB.LeveldbOptions(create_if_missing=False, error_if_exists=False)
    reader = LEVELDB.Leveldb(db_path, options)

    def show():
        it = reader.get_iterater(reverse=False)
        i = 0
        for item in it:
            print(i, item)
            i += 1

    def test_find(key):
        value = reader.get(key)
        print('find', type(value), value)

    show()

    test_find(b'input_0')
    test_find(b'input_5')
    test_find(b'input_10')

    reader.close()


test_write(db_path)
test_read(db_path)

3. lmdb read and write demo

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

from tfrecords import LMDB

db_path = 'd:/example_lmdb'


def test_write(db_path):
    options = LMDB.LmdbOptions(env_open_flag=0,
                               env_open_mode=0o664,  # 8进制表示
                               txn_flag=0,
                               dbi_flag=0,
                               put_flag=0)
    file_writer = LMDB.Lmdb(db_path, options, map_size=1024 * 1024 * 10)
    keys, values = [], []
    for i in range(30):
        keys.append(b"input_" + str(i).encode())
        keys.append(b"label_" + str(i).encode())
        values.append(b"xiaoming_" + str(i).encode())
        values.append(b"zzs_" + str(i).encode())
        if (i + 1) % 1000 == 0:
            file_writer.put_batch(keys, values)
            keys.clear()
            values.clear()
    if len(keys):
        file_writer.put_batch(keys, values)
    file_writer.close()


def test_read(db_path):
    options = LMDB.LmdbOptions(env_open_flag=LMDB.LmdbFlag.MDB_RDONLY,
                               env_open_mode=0o664,  # 8进制表示
                               txn_flag = 0, # LMDB.LmdbFlag.MDB_RDONLY
                               dbi_flag=0,
                               put_flag=0)
    reader = LMDB.Lmdb(db_path, options, map_size=0)

    def show():
        it = reader.get_iterater(reverse=False)
        i = 0
        for item in it:
            print(i, item)
            i += 1

    def test_find(key):
        value = reader.get(key)
        print('find', type(value), value)

    show()
    test_find('input0')
    test_find('input5')
    test_find(b'input10')
    reader.close()


test_write(db_path)
test_read(db_path)

4. arrow demo

Stream

from tfrecords.python.io.arrow import IPC_Writer,IPC_StreamReader,arrow

path_file = "d:/tmp/data.arrow"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0,1,4])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa","你是谁","张三"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema = schema,arrays=[a,b])
    fs = IPC_Writer(path_file,schema,with_stream = True)
    fs.write_table(table)
    fs.close()

def test_read():
    fs = IPC_StreamReader(path_file)
    table = fs.read_all()
    fs.close()
    print(table)

    col = table.GetColumnByName('text')
    text_list = col.chunk(0)
    for i in range(text_list.length()):
        x = text_list.Value(i)
        print(type(x), x)


test_write()
test_read()

file

from tfrecords.python.io.arrow import IPC_Writer,IPC_StreamReader,IPC_MemoryMappedFileReader,arrow

path_file = "d:/tmp/data.arrow"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0,1,4])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa","你是谁","张三"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema = schema,arrays=[a,b])
    fs = IPC_Writer(path_file,schema,with_stream = False)
    fs.write_table(table)
    fs.close()


def test_read():

    fs = IPC_MemoryMappedFileReader(path_file)
    for i in range(fs.num_record_batches()):
        batch = fs.read_batch(i)
        print(batch)
    fs.close()


test_write()
test_read()

4. parquet demo

from tfrecords.python.io.arrow import ParquetWriter,IPC_StreamReader,ParquetReader,arrow
path_file = "d:/tmp/data.parquet"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0, 1, 4, 5])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa", "你是谁", "张三", "李赛"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema=schema, arrays=[a, b])

    fs = ParquetWriter(path_file, schema)
    fs.write_table(table)
    fs.close()

def test_read():

    fs = ParquetReader(path_file,options=dict(buffer_size=2))
    table = fs.read_table()
    fs.close()
    table = table.Flatten().Value()
    print(table)

    col = table.GetColumnByName('text')
    text_list = col.chunk(0)
    for i in range(text_list.length()):
        x = text_list.Value(i)
        print(type(x),x)


test_write()
test_read()

Project details

These details have not been verified by PyPI

Project links

Homepage

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Release history Release notifications | RSS feed

This version

0.2.19

Mar 25, 2024

0.2.17

Nov 11, 2023

0.2.16

Oct 28, 2023

0.2.15

Aug 13, 2023

0.2.14

Jul 8, 2023

0.2.13

Jul 6, 2023

0.2.12

Jul 6, 2023

0.2.11

Jul 3, 2023

0.2.9

Jul 2, 2023

0.2.6

Apr 27, 2023

0.2.5

Apr 2, 2023

0.2.4

Dec 15, 2022

0.2.3

Dec 13, 2022

0.2.2

Nov 4, 2022

0.2.1

Oct 31, 2022

0.0.8

Oct 17, 2022

0.0.4

Sep 21, 2022

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distributions

No source distribution files available for this release.See tutorial on generating distribution archives.

Built Distributions

tfrecords-0.2.19-cp312-cp312-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.12 Windows x86-64

tfrecords-0.2.19-cp312-cp312-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.12

tfrecords-0.2.19-cp312-cp312-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.12

tfrecords-0.2.19-cp311-cp311-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.11 Windows x86-64

tfrecords-0.2.19-cp311-cp311-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.11

tfrecords-0.2.19-cp311-cp311-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.11

tfrecords-0.2.19-cp310-cp310-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.10 Windows x86-64

tfrecords-0.2.19-cp310-cp310-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.10

tfrecords-0.2.19-cp310-cp310-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.10

tfrecords-0.2.19-cp39-cp39-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.9 Windows x86-64

tfrecords-0.2.19-cp39-cp39-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.9

tfrecords-0.2.19-cp39-cp39-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.9

tfrecords-0.2.19-cp38-cp38-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.8 Windows x86-64

tfrecords-0.2.19-cp38-cp38-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.8

tfrecords-0.2.19-cp38-cp38-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.8

tfrecords-0.2.19-cp37-cp37m-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.7m Windows x86-64

tfrecords-0.2.19-cp37-cp37m-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.7m

tfrecords-0.2.19-cp37-cp37m-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.7m

tfrecords-0.2.19-cp36-cp36m-win_amd64.whl (8.2 MB view hashes)

Uploaded Mar 25, 2024 CPython 3.6m Windows x86-64

tfrecords-0.2.19-cp36-cp36m-manylinux2014_x86_64.whl (17.2 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.6m

tfrecords-0.2.19-cp36-cp36m-manylinux2014_aarch64.whl (13.5 MB view hashes)

Uploaded Mar 26, 2024 CPython 3.6m

Hashes for tfrecords-0.2.19-cp312-cp312-win_amd64.whl

Hashes for tfrecords-0.2.19-cp312-cp312-win_amd64.whl
Algorithm	Hash digest
SHA256	`a589f244b8832940aa5ceb60337658f9bbf538868a988d2681e1794775569800`
MD5	`6edc9f1ecb7a96b5a2328a06d060e1b7`
BLAKE2b-256	`3ce080fd8a275ead5c020a808ca0d7ef2e90f9e4a2cd3428564f779d1fceb1ff`

Hashes for tfrecords-0.2.19-cp312-cp312-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp312-cp312-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`96972405697a13dfaea54f2ce839e4aacd3ebbb993e1f77b391667255027fc84`
MD5	`99a0eafe57d9623dde8e1605dccb0f28`
BLAKE2b-256	`ad0f9deb5a9b0b686021a6941f6a1a38abf0b6c3d4f2314d4a461d493f2772bf`

Hashes for tfrecords-0.2.19-cp312-cp312-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp312-cp312-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`409679727ce124eec6510e5328bffbf880437edda5471b68445501e1ddae3845`
MD5	`15ee77e4dc5bf5960d648a4027669c17`
BLAKE2b-256	`1873663574ec94a09313403d1e22c76d7fdea6094c6f605be781b32ba9670d05`

Hashes for tfrecords-0.2.19-cp311-cp311-win_amd64.whl

Hashes for tfrecords-0.2.19-cp311-cp311-win_amd64.whl
Algorithm	Hash digest
SHA256	`c054e70e77b339e9449a906b0a86aff562dc363c6d60e2a8131a94f34534f78d`
MD5	`7e81a009df39b1d5b70a5b7688df211c`
BLAKE2b-256	`637dac0ad731c0c596247738106c075bfd2f8c557e97753c2c6f0820b059cd34`

Hashes for tfrecords-0.2.19-cp311-cp311-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp311-cp311-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`4c91f33ea6c725ba4109e6cd54f30fe9b5f55b998a8706146def5c6632c4d61b`
MD5	`e3ccc5e73cf8549c38cac05383f099c2`
BLAKE2b-256	`c5d66b8ed4bd6212b4a5e58de9d4dc9ee958e38f54695c51062d6deb79a37945`

Hashes for tfrecords-0.2.19-cp311-cp311-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp311-cp311-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`d932c9e08daf74417a5966f27a25b561f7c0234b00994490dc77ea00b63b3e93`
MD5	`884db36a18f3c8c03aea2e2cf40ffaf9`
BLAKE2b-256	`9f3b5bb5e5afd9af1a89351d5c424b2060c326db90fb4168f67518b9a8f2d41e`

Hashes for tfrecords-0.2.19-cp310-cp310-win_amd64.whl

Hashes for tfrecords-0.2.19-cp310-cp310-win_amd64.whl
Algorithm	Hash digest
SHA256	`b75f32f26afa003ce031003e4e0b330ea2e734e98d17225ba9e7aa208bddb30e`
MD5	`f10781aa13a9da5a82c6f83c42f7a380`
BLAKE2b-256	`cf0a1fbba65b87f19f58a60d83039a3f9525bb70cc2058a8efdf62828028027c`

Hashes for tfrecords-0.2.19-cp310-cp310-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp310-cp310-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`1528a76481c2a80d949aef7c7436f8310b69bc232c514522ae52aecfc884b284`
MD5	`4dd34fd15ba1df90a8f489322a32ef38`
BLAKE2b-256	`5d88b5730f4d64189eb0ce421a4f144570322c6533ed9e77e76ebefc733e26fa`

Hashes for tfrecords-0.2.19-cp310-cp310-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp310-cp310-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`50fe7f91fd9e66d3c7d91705bd7bb28d8d528ecd77b77aab83e648f46f586182`
MD5	`06cc3f3d61e3c1247dee4f745b6bb3cb`
BLAKE2b-256	`4ae76035581e6f359a2f30e454d9163d7ab974832c928a2fe266d539f62668ae`

Hashes for tfrecords-0.2.19-cp39-cp39-win_amd64.whl

Hashes for tfrecords-0.2.19-cp39-cp39-win_amd64.whl
Algorithm	Hash digest
SHA256	`ee07188cbb1787d40ce4cb7d3f813fde28d81959a591d2eb823a8642cc6dacc1`
MD5	`1ee4fd723dff3df2cdc60727f029b5f9`
BLAKE2b-256	`3882d4adf711a7ee635dae8c24280b8f3ff050ca270e758fa1204b174f67ef8a`

Hashes for tfrecords-0.2.19-cp39-cp39-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp39-cp39-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`69b6d371eb66fc734cc3681d7996b9d5ca20a87f68245fb88a7f4b8513f1cae9`
MD5	`0cfb37a17d7c2a0272a369607ac1d162`
BLAKE2b-256	`d081c3a1a8d70bb6f2741ec3938c6a1ef2f7126fa5d9adbacca8a8a0a510e768`

Hashes for tfrecords-0.2.19-cp39-cp39-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp39-cp39-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`2d260f415d60ee2691a577f2c360ef5419dc4c091a39bda10f765b2d54e83d19`
MD5	`c0862e85403378e8d71596c9eee8ed31`
BLAKE2b-256	`04935191fe629c996b5a486dddf81fbf1cd5c920c5c9d482e06d4168d1ac2ee5`

Hashes for tfrecords-0.2.19-cp38-cp38-win_amd64.whl

Hashes for tfrecords-0.2.19-cp38-cp38-win_amd64.whl
Algorithm	Hash digest
SHA256	`8b76abaa53cb589d0a3109139e3fa9b82b0b48dc588e73878c859d8d8aa02775`
MD5	`f1a8c6cbdc292bd1afe5844063c1c8ff`
BLAKE2b-256	`8a189c2a4e436659a02c150e8f38d52c4a11a01fa1a6f4a541211598578e3a91`

Hashes for tfrecords-0.2.19-cp38-cp38-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp38-cp38-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`57d6175612b3c9eadfdba61a6b6acaefcab1bb9f1372d6ea444d81be5422dbc7`
MD5	`43b73869807c62f2c8e576ead31eb957`
BLAKE2b-256	`b55db64ed63d9fb618996b20d7011062d938fc408f131955463c95abfa1a8867`

Hashes for tfrecords-0.2.19-cp38-cp38-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp38-cp38-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`28d8bb0d7892fb1706623faf3a52fb0d35eda56e15179e89db9f8c72821503cf`
MD5	`5e7fa47a88a28b28a260d61edd14a368`
BLAKE2b-256	`203f7da40981cb402cbb5a6ceb445a3f2cea0b6d13fec5cde736e1ab597d6374`

Hashes for tfrecords-0.2.19-cp37-cp37m-win_amd64.whl

Hashes for tfrecords-0.2.19-cp37-cp37m-win_amd64.whl
Algorithm	Hash digest
SHA256	`f9baec7388dc086c61340f57d6356470a30c71631b02f6a52d5d23c660315279`
MD5	`c2853b20b3fdef4a8065486bb9a951f3`
BLAKE2b-256	`d3bbae79eb8027ffaa7bbf5ae8fc7c3645bdb781d305222d476ea9105444d84d`

Hashes for tfrecords-0.2.19-cp37-cp37m-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp37-cp37m-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`d8307bfcd63a78a9180e1d311b782a3bef889fa9840cb61d3424b07404986584`
MD5	`09b22fc37ec563aa8f2a16e65069c2bf`
BLAKE2b-256	`5fc8119ccd30bafa815a6e8d07c51aec2b335ebcf1d550cfd9f01eb9e63085f4`

Hashes for tfrecords-0.2.19-cp37-cp37m-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp37-cp37m-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`be35cab5e42f4c85e350079da4091479b5dd77918a02d1741a47d5a7e1a130c3`
MD5	`070884f7b76f5656c0fe1ecfaa27151a`
BLAKE2b-256	`dce0a67f5c9c376c03d461b8dcc88429a3e35c3e7896ce454e2882c0e6f89132`

Hashes for tfrecords-0.2.19-cp36-cp36m-win_amd64.whl

Hashes for tfrecords-0.2.19-cp36-cp36m-win_amd64.whl
Algorithm	Hash digest
SHA256	`6b15ebcf9ba975ac81c519bd0a3e41e56cdff1991405d3d448e9d847782956da`
MD5	`9f46064ac28952dcb8e19053f22f1924`
BLAKE2b-256	`b45204d0090aa3ef4cabec3d519bce3d4c625b87ba8cb1197ec7204a893047bc`

Hashes for tfrecords-0.2.19-cp36-cp36m-manylinux2014_x86_64.whl

Hashes for tfrecords-0.2.19-cp36-cp36m-manylinux2014_x86_64.whl
Algorithm	Hash digest
SHA256	`859b6e99a89415d1c9a7ce2991003cbc8fc5c8c7e15fa3da2063ccbb4aef5b07`
MD5	`7b754302bb10a408ae8f8c26053e4c1c`
BLAKE2b-256	`096cf830e5de0236ae5f01d582a0661334b55159e4c1c1ac9934b5e146ae0126`

Hashes for tfrecords-0.2.19-cp36-cp36m-manylinux2014_aarch64.whl

Hashes for tfrecords-0.2.19-cp36-cp36m-manylinux2014_aarch64.whl
Algorithm	Hash digest
SHA256	`153021e14116101bbece19d11803b72b22b1c7d8369bc79b17137970c18bc827`
MD5	`976508550f2c5d5a411ba195052c4188`
BLAKE2b-256	`2b41d286b0fb533a7d78553b62d1a3d8d151180d1b89d100f46b2ea46f14217b`