1 回归:锂电池温度预测top2 代码部分(一) Tabnet

慈云数据 2024-04-23 技术支持 46 0

2024 iFLYTEK A.I.开发者大赛-讯飞开放平台

TabNet: 模型也是我在这个比赛一个意外收获,这个模型在比赛之中可用。但是需要GPU资源,否则运行真的是太慢了。后面针对这个模型我会写出如何使用的方法策略。

        比赛结束后有与其他两位选手聊天,他们都是对数据做了很多分析,有的甚至直接使用Lasso就work了,效果还挺不错的。特征工程无敌呀。        

        真个代码部分,了解下有关特征工程的部分就行了,模型部分可以慢慢消化。当作一个新的知识点学习吧。

直接上代码

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import KFold
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor
import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from sklearn.metrics import mean_absolute_error
import traceback
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 用来正常显示中文标签
plt.rcParams["axes.Unicode_minus"] = False  # 该语句解决图像中的“-”负号的乱码问题
pd.set_option('precision', 10)
pd.set_option('display.max_rows', None)
# 时间解析模块
def parse_date(train_df=None):
    train_df['datetime'] = pd.to_datetime(train_df['时间'])
    train_df['timestamp'] = train_df['datetime'].astype('int64') / 10000000
    train_df['year'] = train_df['datetime'].dt.year
    train_df['month'] = train_df['datetime'].dt.month
    train_df['day'] = train_df['datetime'].dt.day
    train_df['hour'] = train_df['datetime'].dt.hour
    train_df["minute"] = train_df['datetime'].dt.minute
    train_df['dayofweek'] = train_df['datetime'].dt.dayofweek
    # train_df['datetime'].dt.dayofmonth
    return train_df
def same_position_tempture_resid(train_df, index=[]):
    for i in index:
        train_df[f'下部温度{i}_resid'] = train_df[f'下部温度{i}'] - train_df[f'下部温度设定{i}']
        train_df[f'下部温度{i}_dist_4'] = train_df[f'下部温度设定4'] - train_df[f'下部温度设定{i}']
        train_df[f'下部温度{i}_dist_4_moth_100'] = (train_df[f'下部温度{i}_dist_4'] >= 99) * 1
    return train_df
df_train = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")
submit = pd.read_csv("../data/submit.csv")
df_train = parse_date(df_train)
df_test = parse_date(df_test)
df_train = df_train.sort_values("datetime")
df_train = df_train.reset_index(drop=True)
df_train['train'] = 1
df_train.loc[1057, '下部温度9'] = 829
df_test = df_test.sort_values("datetime")
df_test = df_test.reset_index(drop=True)
df_test['train'] = 0
flow_cols = [col for col in df_train.columns if "流量" in col]
up_temp_sets = [col for col in df_train.columns if "上部温度设定" in col]
down_temp_sets = [col for col in df_train.columns if "下部温度设定" in col]
up_tempture = [col for col in df_train.columns if "上部温度" in col and col not in up_temp_sets]
down_tempture = [col for col in df_train.columns if "下部温度" in col and col not in down_temp_sets]
# train_df.columns.tolist()
import re
small_cols = ['下部温度5', '上部温度8', '上部温度9',
              '上部温度10',
              '上部温度11',
              '上部温度12',
              '上部温度13',
              '上部温度14',
              '上部温度15',
              '上部温度16',
              '上部温度17',
              '下部温度3',
              '下部温度4',
              '下部温度6',
              '下部温度7',
              '下部温度8',
              '下部温度9',
              '下部温度10',
              '下部温度11',
              '下部温度12',
              '下部温度13',
              '下部温度14',
              '下部温度15',
              '下部温度16',
              '下部温度17'] + [
                 '上部温度1',
                 '上部温度2',
                 '上部温度3',
                 '上部温度4',
                 '上部温度5',
                 '上部温度6',
                 '上部温度7',
                 '下部温度1',
                 '下部温度2',
             ]
def get_same_temp(test_df, cols):
    for col in cols:
        nums = re.findall("\d+", col)
        num = nums[0]
        if "上部温度" in col:
            print(num, col)
            test_df[col] = test_df[f'上部温度设定{num}']
        elif "下部温度" in col:
            test_df[col] = test_df[f'下部温度设定{num}']
    return test_df
df_test = get_same_temp(df_test, small_cols)
df = pd.concat([df_train, df_test])
df = df.sort_values(['year', 'month', 'day', 'hour', "minute"])
df = df.reset_index(drop=True)
down_label = ['下部温度1', '下部温度2', '下部温度3']
up_label = ['上部温度7', '上部温度1', '上部温度2', '上部温度3', '上部温度4', '上部温度5', '上部温度6']
cat_cols = ['year', 'month', 'day', 'hour', 'minute', 'dayofweek']
keep_cols = df_test.columns.tolist()
def resid_model(y, y_pred):
    # residual plots
    y_pred = pd.Series(y_pred, index=y.index)
    resid = y - y_pred
    mean_resid = resid.mean()
    std_resid = resid.std()
    z = abs(resid) / (y + 0.01)
    # print(z)
    n_outliers = sum(abs(resid) > 10000)
    outliers = y[(abs(resid) > 10000)].index
    print(outliers)
    plt.figure(figsize=(15, 5))
    ax_131 = plt.subplot(1, 3, 1)
    plt.plot(y, y_pred, '.')
    plt.xlabel('y')
    plt.ylabel('y_pred');
    plt.title('corr = {:.3f}'.format(np.corrcoef(y, y_pred)[0][1]))
    ax_132 = plt.subplot(1, 3, 2)
    plt.plot(y, y - y_pred, '.')
    plt.xlabel('y')
    plt.ylabel('y - y_pred');
    plt.title('std resid = {:.3f}'.format(std_resid))
    ax_133 = plt.subplot(1, 3, 3)
    z.plot.hist(bins=50, ax=ax_133)
    plt.xlabel('z')
    plt.title('{:.0f} samples with z>3'.format(n_outliers))
    plt.show()
    # return outliers
def get_down_tempture_sets_resid(df, diffed_col="下部温度设定4",
                                 diff_col='下部温度设定1'):
    distacnce = 0
    if "上部" in diff_col:
        print(f"----- {diff_col}_diff_{diffed_col}")
        df['上部温度设定4_diff_上部温度设定1'] = df['上部温度设定4'] - df['上部温度设定1']
        df[f'{diffed_col}_diff_{diff_col}'] = df[diffed_col] - df[diff_col]
        df['上部温度设定4_div_上部温度设定1'] = df['上部温度设定4'] / df['上部温度设定1']
        df[f'{diffed_col}_div_{diff_col}'] = df[diffed_col] / df[diff_col]
        df['flag'] = (df['上部温度设定4_diff_上部温度设定1'] > 300) * 1
    else:
        df['下部温度设定4_diff_下部温度设定1'] = df['下部温度设定4'] - df['下部温度设定1']
        df['下部温度设定4_div_下部温度设定1'] = df['下部温度设定4'] / df['下部温度设定1']
        df[f'{diffed_col}_diff_{diff_col}'] = df[diffed_col] - df[diff_col]
        df[f'{diffed_col}_div_{diff_col}'] = df[diffed_col] / df[diff_col]
        distacnce = 300
        df['flag'] = (df['下部温度设定4_diff_下部温度设定1'] > 300) * 1
    return df
def get_same_type_tempure(row, df_train, label, woindows):
    try:
        heads = woindows
        train_flag = int(row['train'])
        hour = row['hour']
        minute = row['minute']
        timesamp = row['timestamp']
        flag = row['flag']
        nums = re.findall("\d+", label)
        num = int(nums[0])
        chars = re.findall("(\w+)(\d+)", label)[0][0]
        label_map_set_col = f"{chars}设定{num}"
        set_temps = row[label_map_set_col]
        # (df_train[label_map_set_col]==set_temps)&
        df_temp_ = df_train[
            (df_train[label_map_set_col] == set_temps) & (df_train['flag'] == flag) & (df_train['hour'] == hour) & (
                        df_train['timestamp']  
微信扫一扫加客服

微信扫一扫加客服

点击启动AI问答
Draggable Icon