Python client for oka repository
Project description
oka - Client for OKA repository
Overview
oka
is a client for Oka repository.
It also provides utilities to process data.
Installation
...as a standalone lib
# Set up a virtualenv.
python3 -m venv venv
source venv/bin/activate
# Install from PyPI...
pip install --upgrade pip
pip install -U oka
pip install -U oka[full] # use the flag 'full' for extra functionality (recommended)
# ...or, install from updated source code.
pip install git+https://github.com/rabizao/oka
...from source
sudo apt install python3.8-venv python3.8-dev python3.8-distutils # For Debian-like systems.
git clone https://github.com/rabizao/oka
cd oka
python3.8 -m venv venv
source venv/bin/activate
pip install -e .
Usage
Hello world
from oka import Oka, generate_token, toy_df
# Create a pandas dataframe.
df = toy_df()
print(df.head())
"""
attr1 attr2 class
0 5.1 6.4 0
1 1.1 2.5 1
2 6.1 3.6 0
3 1.1 3.5 1
4 3.1 2.5 0
"""
# Login.
token = generate_token("http://localhost:5000")
client = Oka(token, "http://localhost:5000")
# Store.
id = client.send(df)
# Store again.
id = client.send(df)
"""
Content already stored for id iJ_e4463c51904e9efb800533d25082af2a7bf77
"""
# Fetch.
df = client.get(id)
print(df.head())
"""
attr1 attr2 class
0 5.1 6.4 0
1 1.1 2.5 1
2 6.1 3.6 0
3 1.1 3.5 1
4 3.1 2.5 0
"""
DataFrame by hand
import pandas as pd
from oka import Oka, generate_token
# Create a pandas dataframe.
df = pd.DataFrame(
[[1, 2, "+"],
[3, 4, "-"]],
index=["row 1", "row 2"],
columns=["col 1", "col 2", "class"],
)
print(df.head())
"""
col 1 col 2 class
row 1 1 2 +
row 2 3 4 -
"""
# Login.
token = generate_token("http://localhost:5000")
client = Oka(token, "http://localhost:5000")
# Store.
id = client.send(df)
# Store again.
id = client.send(df)
"""
Content already stored for id f7_6b9deafec2562edde56bfdc573b336b55cb16
"""
# Fetch.
df = client.get(id)
print(df.head())
"""
col 1 col 2 class
row 1 1 2 +
row 2 3 4 -
"""
Machine Learning workflow
from pprint import pprint
from idict import let, idict
from idict.function.classification import fit, predict
from idict.function.evaluation import split
from sklearn.ensemble import RandomForestClassifier as RF
d = (
idict.fromtoy()
>> split
>> let(fit, algorithm=RF, config={"n_estimators": 55}, Xin="Xtr", yin="ytr")
>> let(predict, Xin="Xts")
)
print(d.z)
"""
[1 0 1 0 1 1 1]
"""
pprint(d.history)
"""
{'fit--------------------------------idict': {'code': 'def f(algorithm=None, '
"config={}, Xin='X', "
"yin='y', "
"output='model', "
'version=0, **kwargs):\n'
'obj = '
'algorithm(**config)\n'
'obj.fit(kwargs[Xin], '
'kwargs[yin])\n'
'return {output: obj, '
"'_history': ...}",
'description': 'Induce a model.',
'name': 'fit',
'parameters': {'Xin': 'Xtr',
'algorithm': <class 'sklearn.ensemble._forest.RandomForestClassifier'>,
'config': {'n_estimators': 55},
'output': 'model',
'version': 0,
'yin': 'ytr'}},
'predict----------------------------idict': {'code': "def f(input='model', "
"Xin='X', yout='z', "
'version=0, **kwargs):\n'
'return {yout: '
'kwargs[input].predict(kwargs[Xin]), '
"'_history': ...}",
'description': 'Predict values '
'according to a '
'model.',
'name': 'predict',
'parameters': {'Xin': 'Xts',
'input': 'model',
'version': 0,
'yout': 'z'}},
'split----------------------sklearn-1.0.1': {'code': "def f(input=['X', 'y'], "
"config={'test_size': "
"0.33, 'shuffle': True, "
"'stratify': 'y', "
"'random_state': 0}, "
'**kwargs):\n'
"if input != ['X', "
"'y']:\n"
' raise '
'Exception(f"Not '
'implemented for '
'input/output different '
'from default values: '
'{input}")\n'
'from '
'sklearn.model_selection '
'import '
'train_test_split\n'
'args = {}\n'
'for i, _ in '
'enumerate(input):\n'
' args[input[i]] = '
'kwargs[input[i]]\n'
'else:\n'
" if 'stratify' in "
'config:\n'
' if '
"isinstance(config['stratify'], "
'str):\n'
' if '
"config['stratify'] not "
'in input:\n'
' raise '
'Exception(f"Missing '
'field '
"{config['stratify']} "
'for stratification.")\n'
' '
"config['stratify'] = "
"args[config['stratify']]\n"
' Xtr, Xts, ytr, yts '
'= '
'train_test_split(*(args.values)(), '
'**config)\n'
" return {'Xtr':Xtr, "
"'ytr':ytr, 'Xts':Xts, "
"'yts':yts, "
"'_history':...}",
'description': 'Split data in '
'two sets.',
'name': 'split',
'parameters': {'config': {'random_state': 0,
'shuffle': True,
'stratify': array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]),
'test_size': 0.33},
'input': ['X',
'y']}}}
"""
More info
Aside from the papers on identification and on similarity (not ready yet), the PyPI package and GitHub repository,
A lower level perspective is provided in the API documentation.
Grants
This work was supported by Fapesp under supervision of Prof. André C. P. L. F. de Carvalho at CEPID-CeMEAI (Grants 2013/07375-0 – 2019/01735-0).
.>>>>>>>>> outros <<<<<<<<<<<.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
No source distribution files available for this release.See tutorial on generating distribution archives.
Built Distribution
oka-0.211126.5-py3-none-any.whl
(19.8 kB
view hashes)