Sep-06-2022, 09:37 PM
# Create a pipeline that standardizes (prepares) the data then evaluates a model import pandas as pd import numpy as np from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.discriminant_analysis import LinearDiscriminantAnalysis filename = 'pima-indians-diabetes.data.csv' names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] dataframe = pd.read_csv(filename, names=names) array = dataframe.values X = array[:,0:8] y = array[:,8] # create pipeline estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('lda', LinearDiscriminantAnalysis())) model = Pipeline(estimators) # evaluate the model seed = 7 kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(model, X, y, cv=kfold) print(results.mean())