Pythonライブラリsklearnによる機械学習入門有名手法をまとめて紹介編

機械学習のライブラリsklearnというものがあるそうですが、
具体的にどんなことができるのでしょうか?

このような疑問にお答えします。

sklearnによる機械学習の利用
出力結果

sklearnによる機械学習の利用

sklearnは、様々な機械学習手法を簡単に使用することができるツールです。
今回は、sklearnを利用した機械学習入門として下記の手法を利用したいと思います。

Logistic Regression
Support Vector Machines
k-Nearest Neighbors
Naive Bayes classifier
Perceptron
Linear SVC
Decision Tree
Random Forrest

以下のライブラリを使用します。

pandas
numpy
sklearn

ライブラリがない場合は、以下のコマンド等でインストールします。

pip install pandas
pip install numpy
pip install scikit-learn

pip install pandas

pip install numpy

pip install scikit-learn

環境としては、Anaconda prompt経由でjupyter notebookを使用します。データは、機械学習の分野では入門用のデータセットとして頻繁に使用されるirisデータを使用します。

Anaconda環境の導入方法は、以下の記事を参照ください。

以下に、各機械学習手法を簡単に利用するソースコードを載せます。

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
import pandas as pd

def fn_start_learning():
    iris = datasets.load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)

    y_label = iris.target.flatten()

    x_train, x_test, y_train, y_test = train_test_split(df, y_label, test_size=0.2)

    # Logistic Regression
    logreg = LogisticRegression()
    logreg.fit(x_train, y_train)
    y_pred = logreg.predict(x_test)
    print('[Logistic Regression]')
    print(classification_report(y_test, y_pred))

    # Support Vector Machines
    svc = SVC()
    svc.fit(x_train, y_train)
    y_pred = svc.predict(x_test)
    print('[Support Vector Machines]')
    print(classification_report(y_test, y_pred))

    # k-Nearest Neighbors
    knn = KNeighborsClassifier(n_neighbors = 3)
    knn.fit(x_train, y_train)
    y_pred = knn.predict(x_test)
    print('[k-Nearest Neighbors]')
    print(classification_report(y_test, y_pred))

    # Naive Bayes classifier
    gaussian = GaussianNB()
    gaussian.fit(x_train, y_train)
    y_pred = gaussian.predict(x_test)
    print('[Naive Bayes classifier]')
    print(classification_report(y_test, y_pred))

    # Perceptron
    perceptron = Perceptron()
    perceptron.fit(x_train, y_train)
    y_pred = perceptron.predict(x_test)
    print('[Perceptron]')
    print(classification_report(y_test, y_pred))

    # Linear SVC
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    y_pred = linear_svc.predict(x_test)
    print('[Linear SVC]')
    print(classification_report(y_test, y_pred))

    # Decision Tree
    decision_tree = DecisionTreeClassifier()
    decision_tree.fit(x_train, y_train)
    y_pred = decision_tree.predict(x_test)
    print('[Decision Tree]')
    print(classification_report(y_test, y_pred))

    # Random Forest
    random_forest = RandomForestClassifier(n_estimators=100)
    random_forest.fit(x_train, y_train)
    y_pred = random_forest.predict(x_test)
    print('[Random Forest]')
    print(classification_report(y_test, y_pred))

if __name__ == '__main__':
    fn_start_learning()

from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC, LinearSVC

from sklearn.ensemble import RandomForestClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.naive_bayes import GaussianNB

from sklearn.linear_model import Perceptron

from sklearn.linear_model import SGDClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

from sklearn import datasets

import pandas as pd

def fn_start_learning():

iris = datasets.load_iris()

df = pd.DataFrame(iris.data, columns=iris.feature_names)

y_label = iris.target.flatten()

x_train, x_test, y_train, y_test = train_test_split(df, y_label, test_size=0.2)

# Logistic Regression

logreg = LogisticRegression()

logreg.fit(x_train, y_train)

y_pred = logreg.predict(x_test)

print('[Logistic Regression]')