Giter Site home page Giter Site logo

muskanmahajan37 / predictive-maintenance-for-air-quality Goto Github PK

View Code? Open in Web Editor NEW

This project forked from ashishpatel26/predictive-maintenance-for-air-quality

0.0 0.0 0.0 16.01 MB

Air Quality Index Predict the weather is good or bad

Jupyter Notebook 99.99% Python 0.01%

predictive-maintenance-for-air-quality's Introduction

Predictive Maintenance of Air Quality Data

# Load libraries
import pandas
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
# Load dataset
sensor_file = "./data/sensor_data.csv"
quality_file = "./data/quality_control_data.csv"
# names = ['weight', 'humidity', 'temperature', 'quality']
sensor_data = pandas.read_csv(sensor_file)
quality_data = pandas.read_csv(quality_file)
sensor_data.head(10)
weight humidity temperature prod_id
0 1030.871118 29.687881 71.995808 1
1 1044.961148 28.862453 68.468664 2
2 972.710479 37.951588 65.121344 3
3 1010.182509 25.076383 67.821336 4
4 970.039236 27.137886 72.931800 5
5 990.154359 32.422428 71.406207 6
6 965.660243 42.603619 65.876158 7
7 969.221212 31.655071 74.430054 8
8 976.495532 26.499721 69.866121 9
9 974.993517 38.644055 69.891709 10
quality_data.head(10)
prod_id quality
0 1 good
1 2 good
2 3 good
3 4 good
4 5 good
5 6 good
6 7 poor
7 8 good
8 9 good
9 10 good
rawdataset = sensor_data.merge(quality_data, on="prod_id")
rawdataset.head(5)
weight humidity temperature prod_id quality
0 1030.871118 29.687881 71.995808 1 good
1 1044.961148 28.862453 68.468664 2 good
2 972.710479 37.951588 65.121344 3 good
3 1010.182509 25.076383 67.821336 4 good
4 970.039236 27.137886 72.931800 5 good
dataset = rawdataset.drop(columns='prod_id')
dataset.head(10)
weight humidity temperature quality
0 1030.871118 29.687881 71.995808 good
1 1044.961148 28.862453 68.468664 good
2 972.710479 37.951588 65.121344 good
3 1010.182509 25.076383 67.821336 good
4 970.039236 27.137886 72.931800 good
5 990.154359 32.422428 71.406207 good
6 965.660243 42.603619 65.876158 poor
7 969.221212 31.655071 74.430054 good
8 976.495532 26.499721 69.866121 good
9 974.993517 38.644055 69.891709 good
# shape
print(dataset.shape)
(3000, 4)
# descriptions
print(dataset.describe())
            weight     humidity  temperature
count  3000.000000  3000.000000  3000.000000
mean    999.940363    34.863581    69.962969
std      28.765904     5.755869     2.857898
min     950.017007    25.008023    65.000514
25%     975.552942    29.783650    67.522238
50%     998.875197    34.825848    69.890808
75%    1025.649219    39.887405    72.414522
max    1049.954013    44.986735    74.999312
# quality distribution
print(dataset.groupby('quality').size())
quality
good    2907
poor      93
dtype: int64
# box and whisker plots to show data distribution
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()

png

# check the histograms
dataset.hist()
plt.show()

png

# scatter plot matrix - anything useful here?
scatter_matrix(dataset)
plt.show()

png

# Split-out validation dataset
array = dataset.values
X = array[:,0:3]
Y = array[:,3]
validation_size = 0.20
seed = 8
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
# Test options and evaluation metric
seed = 7
scoring = 'accuracy'
# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression(solver='lbfgs')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
LR: 0.976667 (0.008375)
LDA: 0.973750 (0.007229)
KNN: 0.992083 (0.005417)
CART: 0.998750 (0.002668)
NB: 0.994167 (0.003333)
SVM: 0.985417 (0.005966)
# Compare Algorithms
fig = plt.figure()
fig.suptitle('Comparison of ML Models')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

png

# Make predictions on validation dataset
#knn = KNeighborsClassifier()
CART = DecisionTreeClassifier()
CART.fit(X_train, Y_train)
predictions = CART.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
0.9983333333333333
[[581   0]
 [  1  18]]
              precision    recall  f1-score   support

        good       1.00      1.00      1.00       581
        poor       1.00      0.95      0.97        19

    accuracy                           1.00       600
   macro avg       1.00      0.97      0.99       600
weighted avg       1.00      1.00      1.00       600

โ€‹

Now test some values of your own

testWeight = 1200
testHumidity = 60
testTemperature = 65
testPrediction = CART.predict([[testWeight,testHumidity,testTemperature]])
print(testPrediction)
['good']

predictive-maintenance-for-air-quality's People

Contributors

sw-ot-ashishpatel avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.