#coding:gbk """ 利用决策树算法进行分类 作者:梁鹤逸 日期:2019.12.14 """
import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm import seaborn as sb #%matplotlib inline
df = pd.read_csv('G:\zy\python‘s程序\frenchwine.csv') df.columns = ['species','alcohol', 'malic_acid', 'ash', 'alcalinity ash', 'magnesium']
df.describe() print(df.describe()) def scatter_plot_by_category(feat, x, y): #数据的可视化
alpha = 0.5
gs = df.groupby(feat)
cs = cm.rainbow(np.linspace(0, 1, len(gs)))
for g, c in zip(gs, cs):
plt.scatter(g[1][x], g[1][y], color=c, alpha=alpha)
plt.figure(figsize=(20,5)) plt.subplot(131) scatter_plot_by_category('species', 'alcohol', 'ash') plt.xlabel('alcohol') plt.ylabel('ash') plt.title('species') plt.show()
plt.figure(figsize=(20, 10)) #利用seaborn库绘制三种Iris花不同参数图
for column_index, column in enumerate(df.columns):
if column == 'species':
continue
plt.subplot(2, 3, column_index + 1)
sb.violinplot(x='species', y=column, data=df)
plt.show()
from sklearn.model_selection import train_test_split #调入sklearn库中交叉检验,划分训练集和测试集
all_inputs = df[['alcohol', 'malic_acid','ash', 'alcalinity ash','magnesium']].values
all_species = df['species'].values
(X_train,
X_test,
Y_train,
Y_test) = train_test_split(all_inputs, all_species, train_size=0.85, random_state=1)#80%的数据选为训练集
from sklearn.tree import DecisionTreeClassifier #调入sklearn库中的DecisionTreeClassifier来构建决策树
decision_tree_classifier = DecisionTreeClassifier()
model = decision_tree_classifier.fit(X_train, Y_train)
print(decision_tree_classifier.score(X_test, Y_test))
print(X_test[0:3])#利用3个数据进行测试,即取3个数据作为模型的输入端
model.predict(X_test[0:3])
print(model.predict(X_test[0:3]))#输出测试的结果,即输出模型预测的结
for all_species in range(1):
print('仙粉,黛西拉,赤霞珠')