Skip to main content

One-stop time series analysis tool, supporting time series data preprocessing, feature engineering, model training, model evaluation, and model prediction.

Project description

PipelineTS

一站式时间序列分析工具,支持时序数据预处理、特征工程、模型训练、模型评估、模型预测等。

安装

conda install -c conda-forge prophet

python -m pip install PipelineTS

快速开始

from PipelineTS.dataset import LoadWebSales

init_data = LoadWebSales()[['date', 'type_a']]

valid_data = init_data.iloc[-30:, :]
data = init_data.iloc[:-30, :]
device = 'cpu'

from PipelineTS.pipeline import ModelPipeline

# list all models
ModelPipeline.list_models()

from sklearn.metrics import mean_absolute_error

pipeline = ModelPipeline(
    time_col='date',
    target_col='type_a',
    lags=30,
    random_state=42,
    metric=mean_absolute_error,
    metric_less_is_better=True,
    device=device
)

# training all models
pipeline.fit(data, valid_df=valid_data)

# use best model to predict next 30 steps data point
res = pipeline.predict(30)

数据准备

from PipelineTS.dataset import LoadMessagesSentDataSets
import pandas as pd
# convert time col, the date column is assumed to be date_col
time_col = 'date_col'
target_col = 'ta'
lags = 30  # 往前的窗口大小,数据将会被切割成lags天的多条序列进行训练
n = 30 # 需要预测多少步,在这个例子里为需要预测多少天

# you can also load data with pandas
# init_data = pd.read_csv('/path/to/your/data.csv')
init_data = LoadMessagesSentDataSets()[[time_col, target_col]]

init_data[time_col] = pd.to_datetime(init_data[time_col], format='%Y-%m-%d')

# 划分训练集和测试集
valid_data = init_data.iloc[-n:, :]
data = init_data.iloc[:-n, :]
print("data shape: ", data.shape, ", valid data shape: ", valid_data.shape)
data.tail(5)

# 数据可视化
from PipelineTS.plot import plot_data_period
plot_data_period(
    data.iloc[-300:, :], 
    valid_data, 
    time_col=time_col, 
    target_col=target_col, 
    labels=['Train data', 'Valid_data']
)

image1

单个模型的训练和预测

from PipelineTS.nn_model import TiDEModel
tide = TiDEModel(
    time_col=time_col, target_col=target_col, lags=lags, random_state=42, 
    quantile=0.9, enable_progress_bar=False, enable_model_summary=False
)
tide.fit(data)
tide.predict(n)

PipelineTS 模块

# 如果需要配置模型
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from PipelineTS.pipeline import ModelPipeline, PipelineConfigs

# list all models
print(ModelPipeline.list_models())

# 第一个为模型的名称,需要在PipelineTS.list_models()列表中,第二个为dict类型
# dict可以有三个key: 'init_configs', 'fit_configs', 'predict_configs',也可以任意一个,剩余的会自动补全为默认参数
# 其中init_configs为模型初始化参数,fit_configs为模型训练时参数,predict_configs为模型预测时参数
pipeline_configs = PipelineConfigs([
    ('lightgbm', {'init_configs': {'verbose': -1, 'linear_tree': True}}),
    ('multi_output_model', {'init_configs': {'verbose': -1}}),
    ('multi_step_model', {'init_configs': {'verbose': -1}}),
    ('multi_output_model', {
        'init_configs': {'estimator': XGBRegressor, 'random_state': 42, 'kwargs': {'verbosity': 0}}
    }
     ),
    ('multi_output_model', {
        'init_configs': {'estimator': CatBoostRegressor, 'random_state': 42, 'verbose': False}
    }
     ),
])
model_name model_name_with_index model_configs
0lightgbm lightgbm_1 {'init_configs': {'verbose': -1, 'linear_tree': True}, 'fit_configs': {}, 'predict_configs': {}}
1multi_output_modelmulti_output_model_1 {'init_configs': {'verbose': -1}, 'fit_configs': {}, 'predict_configs': {}}
2multi_output_modelmulti_output_model_2 {'init_configs': {'estimator': <class 'xgboost.sklearn.XGBRegressor'>, 'random_state': 42, 'kwargs': {'verbosity': 0}}, 'fit_configs': {}, 'predict_configs': {}}
3multi_output_modelmulti_output_model_3 {'init_configs': {'estimator': <class 'catboost.core.CatBoostRegressor'>, 'random_state': 42, 'verbose': False}, 'fit_configs': {}, 'predict_configs': {}}
4multi_step_model multi_step_model_1 {'init_configs': {'verbose': -1}, 'fit_configs': {}, 'predict_configs': {}}
from sklearn.metrics import mean_absolute_error

from PipelineTS.pipeline import ModelPipeline

pipeline = ModelPipeline(
    time_col=time_col,
    target_col=target_col,
    lags=lags,
    random_state=42,
    metric=mean_absolute_error,
    metric_less_is_better=True,
    configs=pipeline_configs,
    include_init_config_model=False,
    use_standard_scale=False,
    with_quantile_prediction=True,  # turn on the quantile prediction switch, if you like
    device=device,
    # models=['wide_gbrt']  # 支持指定模型
)

pipeline.fit(data, valid_data)

获取PipelineTS中的模型参数

# Gets all configurations for the specified model, default to best model
pipeline.get_models().all_configs

绘制预测结果

# use best model to predict next 30 steps data point
prediction = pipeline.predict(n, model_name=None)  # 可以使用model_name指定pipeline中已训练好的模型

plot_data_period(init_data.iloc[-100:, :], prediction, 
                 time_col=time_col, target_col=target_col)

image1

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

PipelineTS-0.3.1.tar.gz (25.8 kB view hashes)

Uploaded Source

Built Distribution

PipelineTS-0.3.1-py3-none-any.whl (42.5 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page