Neural Network(Deep Learning)

2018. 3. 17. 02:15

#!/usr/bin/env python3

신경망이라 알려진 algorithm은 최근 deep learning이란 이름으로 불림

간단하게 Classifier와 Regression에 쓸 수 있는 multilayer perceptrons, MLP

multilayer perceptrons은 feed-forward, 또는 신경망이라고도 함

여러개의 가중치 합을 계산하는 것은 수학적으로 보면 하나의 가중치 합을 계산하는 것과 같음

각 은닉 유닛의 가중치 합을 계산한 후 그 결과에 비선형 함수인 rectified linear unit, ReLU나

hyperbolic tangent, tanh를 적용, 그 결과 가중치(𝘸)를 계산하여 ŷ 를 만듬

ReLu : 0이하를 잘라버리고,

tanh: x가 -∞일수록 -1, x가 ∞일수록 +1에 수렴

비선형 함수를 이용해 신경망이 선형 모델에서보다 훨씬 더 복잡한 함수를 학습 가능

1. ReLu함수와 tanh함수 살펴보기

# library import

import numpy as np

import matplotlib.pyplot as plt

import matplotlib

# matplotlib 설정

matplotlib.rc('font', family='AppleGothic') # 한글출력

plt.rcParams['axes.unicode_minus'] = False # 축 -설정

line = np.linspace(-3, 3, num=100) # 시작, 끝, 갯수

plt.plot( # plot함수 호출

line, np.tanh(line), linestyle='--',label='tanh' # x, y, style, label

)

plt.plot(

line, np.maximum(line, 0), label='relu'

)

plt.xlabel('x') # x축 이름

plt.ylabel('tanh(x), relu(x)') # y축 이름

plt.legend(loc=2) # 범례위치 왼쪽 위

plt.show() # 그래프 출력

tanh(x), relu(x) 함수

2. Two moons Machine Learning(MLP, Multilayer perceptrons)

# library load

from sklearn.neural_network import MLPClassifier

from sklearn.datasets import make_moons

from sklearn.model_selection import train_test_split

import mglearn

# data load

x, y = make_moons( # moon data set 만들기

n_samples=100, noise=0.25, random_state=3 # 갯수, 퍼짐정도, 랜덤상태

)

# data 분할

x_train, x_test, y_train, y_test = \

train_test_split( # 데이터분할을 위해

x, y, # 분할할 데이터

stratify=y, random_state=42 # 그룹(binary 데이터로된 경우), 랜덤상태

)

# model 생성 및 학습

mlp= MLPClassifier( # model 호출

solver='lbfgs', # algorithm

random_state=42, # 랜덤 상태

activation='relu', # 적용 함수

hidden_layer_sizes=[100] # 은닉충 수 ex) [10 ,10] 유닛 10개짜리 은닉충 2개

)

mlp.fit(x_train, y_train) # model 학습

# visualization

mglearn.plots.plot_2d_separator( # 2차원 데이터셋 분할 평면 그리기

mlp, x_train, fill=True, alpha=0.3 # model 객체, train 데이터, 평면 칠하기, 투명도

)

mglearn.discrete_scatter( # 2차원 산점도 그리기 위해

x_train[:, 0], x_train[:, 1], y_train # x, y, 그룹

)

plt.xlabel('feature 0') # x축 이름

plt.ylabel('feature 1') # y축 이름

plt.show() # 그래프 출력

은닉 유닛이 100개인 neural로 학습한 two_moons 데이터셋의 결정 경계

neural_network는 매우 비 선형적이지만 비교적 매끄러운 decision boundary

3. Regularization 과 Visualization

Ridge Regression과 Linear Classifier에서 처럼 L2 penalty(계수를 0에 근접)로 alpha로 모델의 복잡도를 제어

기본값은 0.0001

# visualization

fig, axes = plt.subplots(2, 4) # figure객체를 fig에, plots객체를 axes에 2X4만큼 할당

n_hidden_nodes = [10, 100] # 배열생성

alpha_set = [0.0001, 0.01, 0.1, 1] # 배열 생성

for axe, n_node in zip(axes, n_hidden_nodes): # axes(2X4를 1X4, 1X4)와 n_hidden_nodes를 하나씩 axe와 n_node에 할당

for ax, alpha in zip(axe, alpha_set): # axe(1X4)와 alpha_set을 하나씩 ax와 alpha에 할당

mlp = MLPClassifier( # model 생성을 위해

solver='lbfgs', # algorithm

random_state=42, # 랜덤 상태

hidden_layer_sizes=[n_node, n_node], # 은닉충

alpha=alpha # 규제정도(클수록 강화)

)

mlp.fit(x_train, y_train) # 학습

mglearn.plots.plot_2d_separator(# 2차원 평면을 나누기 위해

mlp, x_train, # model 객체, train데이터

fill=True, alpha=0.3, ax=ax # 평면칠하기, 투명도, plot객체

)

mglearn.discrete_scatter( # 2차원 산점도 그래프를 위해

x_train[:, 0], x_train[:, 1], y_train, ax=ax # x, y, 그룹, plot객체

)

ax.set_title('n_hidden=[{}, {}]\nalpha={}'.format( # 타이틀

n_node, n_node, alpha

))

plt.show()

은닉 유닛과 alpha에 따라 변하는 decision boundary

4. Breast Cancer Dataset Machine Learning(MLPClassifier)

# load library

from sklearn.datasets import load_breast_cancer

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import matplotlib

# matplotlib 설정

matplotlib.rc('font', family='AppleGothic') # 한글 설정

plt.rcParams['axes.unicode_minus'] = False # -표시

# data load

cancer = load_breast_cancer()

# data 분할

x_train, x_test, y_train, y_test = \

train_test_split( # 데이터 분할을 위해

cancer.data, cancer.target, # 분할할 데이터

random_state=0, test_size=0.3 # 랜덤상태, 테스트 비율

)

# feature visualization

plt.boxplot(x_train, manage_xticks=False) # 데이터, 소눈금 표시 안하기

plt.yscale('symlog') # 축 스케일을 log 로

plt.xlabel('feature list') # x축 이름

plt.ylabel('feature') # y축 이름

plt.show() # 그래프 출력

breast cancer 데이터셋의 특성 값 범위(y축은 logscale)

# 전처리, pre-processing

# axis =0 ==> 열, axis =1 ==> 행

train_min = x_train.min(axis=0) # 열방향 최솟값

train_range = (x_train - train_min).max(axis=0) # 최솟값을 제거한 후 최댓값

x_train_scaled = (x_train-train_min)/train_range # train data 조정

x_test_scaled = (x_test-train_min)/train_range # test data 조정(train 데이터 범위로 조정)

x_train_scaled.min(axis=0) # 조정된 범위의 최솟값, 0

x_train_scaled.max(axis=0) # 조정된 범위의 최댓값, 1

print('x_train_scaled min \n{}'.format(x_train_scaled.min(axis=0))) # 0

print('x_train_scaled.max \n{}'.format(x_train_scaled.max(axis=0))) # 1

# model 생성 및 학습

mlp = MLPClassifier( # model 생성

solver='lbfgs', # algorithm

random_state=0, # 랜덤상태

hidden_layer_sizes=[100], # 은닉층 수

alpha=0.001 # 규제 정도

)

mlp.fit(x_train_scaled, y_train) # 학습

mlp.score(x_train_scaled, y_train) # train set 정확도

mlp.score(x_test_scaled, y_test) # 일반화 정확도

print('train set scaled accuracy \n{:.3f}'.format(mlp.score(x_train_scaled, y_train))) # 1.000

print('test set sclaed accuracy \n{:.3f}'.format(mlp.score(x_test_scaled, y_test))) # 0.965

5. Breast Cancer 데이터셋으로 학습된 가중치 확인

행 : 30개의 입력특성

열 : 100개의 은닉 유닛

밝은 색은 큰 양수 값

plt.figure(figsize=(20, 5)) # 그림 size

plt.imshow(

mlp.coefs_[0], interpolation='none', cmap='viridis' # 입력과 은닉층 사이의 가중치, 축, 그림 테마

)

plt.yticks(range(30), cancer.feature_names) # y축 포인트, label

plt.xlabel('은닉 유닛') # x축 이름

plt.ylabel('입력 특성') # y축 이름

plt.colorbar() # colorbar 생성

plt.show() # 그래프 출력

breast cancer 데이터셋으로 학습시킨 신경망의 첫번째 층의 가중치 히트맵

mlp.coefs_[0]은 입력과 은닉충 사이의 가중치가 저장되어있는 (30, 100) NumPy배열이고

mlp.coefs_[1]은 은닉충과 출력사이의 가중치가 저장되어있는 (100, 1) 크기의 NumPy배열

'python 머신러닝 -- 지도학습 > Classifier' 카테고리의 다른 글

Kernelized Support Vector Machines (0)	2018.03.15
Gradient Boosting Model (0)	2018.03.15
Random Forest (0)	2018.03.15
Decision Tree (0)	2018.03.14
Multi Linear Classification (0)	2018.03.14

게으른 우루루

Neural Network(Deep Learning)

'python 머신러닝 -- 지도학습 > Classifier' 카테고리의 다른 글

+ Recent posts

티스토리툴바