'Neural network' 태그의 글 목록

Neural network

Neural Network(Deep Learning) 2018.03.17

Neural Network(Deep Learning)

2018. 3. 17. 02:15

#!/usr/bin/env python3

Neural Network(Deep Learning)

신경망이라 알려진 algorithm은 최근 deep learning이란 이름으로 불림

간단하게 Classifier와 Regression에 쓸 수 있는 multilayer perceptrons, MLP

multilayer perceptrons은 feed-forward, 또는 신경망이라고도 함

여러개의 가중치 합을 계산하는 것은 수학적으로 보면 하나의 가중치 합을 계산하는 것과 같음

각 은닉 유닛의 가중치 합을 계산한 후 그 결과에 비선형 함수인 rectified linear unit, ReLU나

hyperbolic tangent, tanh를 적용, 그 결과 가중치(𝘸)를 계산하여 ŷ 를 만듬

ReLu : 0이하를 잘라버리고,

tanh: x가 -∞일수록 -1, x가 ∞일수록 +1에 수렴

비선형 함수를 이용해 신경망이 선형 모델에서보다 훨씬 더 복잡한 함수를 학습 가능

1. ReLu함수와 tanh함수 살펴보기

# library import

import numpy as np

import matplotlib.pyplot as plt

import matplotlib

# matplotlib 설정

matplotlib.rc('font', family='AppleGothic') # 한글출력

plt.rcParams['axes.unicode_minus'] = False # 축 -설정

line = np.linspace(-3, 3, num=100) # 시작, 끝, 갯수

plt.plot( # plot함수 호출

line, np.tanh(line), linestyle='--',label='tanh' # x, y, style, label

)

plt.plot(

line, np.maximum(line, 0), label='relu'

)

plt.xlabel('x') # x축 이름

plt.ylabel('tanh(x), relu(x)') # y축 이름

plt.legend(loc=2) # 범례위치 왼쪽 위

plt.show() # 그래프 출력

tanh(x), relu(x) 함수

2. Two moons Machine Learning(MLP, Multilayer perceptrons)

# library load

from sklearn.neural_network import MLPClassifier

from sklearn.datasets import make_moons

from sklearn.model_selection import train_test_split

import mglearn

# data load

x, y = make_moons( # moon data set 만들기

n_samples=100, noise=0.25, random_state=3 # 갯수, 퍼짐정도, 랜덤상태

)

# data 분할

x_train, x_test, y_train, y_test = \

train_test_split( # 데이터분할을 위해

x, y, # 분할할 데이터

stratify=y, random_state=42 # 그룹(binary 데이터로된 경우), 랜덤상태

)

# model 생성 및 학습

mlp= MLPClassifier( # model 호출

solver='lbfgs', # algorithm

random_state=42, # 랜덤 상태

activation='relu', # 적용 함수

hidden_layer_sizes=[100] # 은닉충 수 ex) [10 ,10] 유닛 10개짜리 은닉충 2개

)

mlp.fit(x_train, y_train) # model 학습

# visualization

mglearn.plots.plot_2d_separator( # 2차원 데이터셋 분할 평면 그리기

mlp, x_train, fill=True, alpha=0.3 # model 객체, train 데이터, 평면 칠하기, 투명도

)

mglearn.discrete_scatter( # 2차원 산점도 그리기 위해

x_train[:, 0], x_train[:, 1], y_train # x, y, 그룹

)

plt.xlabel('feature 0') # x축 이름

plt.ylabel('feature 1') # y축 이름

plt.show() # 그래프 출력

은닉 유닛이 100개인 neural로 학습한 two_moons 데이터셋의 결정 경계

neural_network는 매우 비 선형적이지만 비교적 매끄러운 decision boundary

3. Regularization 과 Visualization

Ridge Regression과 Linear Classifier에서 처럼 L2 penalty(계수를 0에 근접)로 alpha로 모델의 복잡도를 제어

기본값은 0.0001

# visualization

fig, axes = plt.subplots(2, 4) # figure객체를 fig에, plots객체를 axes에 2X4만큼 할당

n_hidden_nodes = [10, 100] # 배열생성

alpha_set = [0.0001, 0.01, 0.1, 1] # 배열 생성

for axe, n_node in zip(axes, n_hidden_nodes): # axes(2X4를 1X4, 1X4)와 n_hidden_nodes를 하나씩 axe와 n_node에 할당

for ax, alpha in zip(axe, alpha_set): # axe(1X4)와 alpha_set을 하나씩 ax와 alpha에 할당

mlp = MLPClassifier( # model 생성을 위해

solver='lbfgs', # algorithm

random_state=42, # 랜덤 상태

hidden_layer_sizes=[n_node, n_node], # 은닉충

alpha=alpha # 규제정도(클수록 강화)

)

mlp.fit(x_train, y_train) # 학습

mglearn.plots.plot_2d_separator(# 2차원 평면을 나누기 위해

mlp, x_train, # model 객체, train데이터

fill=True, alpha=0.3, ax=ax # 평면칠하기, 투명도, plot객체

)

mglearn.discrete_scatter( # 2차원 산점도 그래프를 위해

x_train[:, 0], x_train[:, 1], y_train, ax=ax # x, y, 그룹, plot객체

)

ax.set_title('n_hidden=[{}, {}]\nalpha={}'.format( # 타이틀

n_node, n_node, alpha

))

plt.show()

은닉 유닛과 alpha에 따라 변하는 decision boundary

4. Breast Cancer Dataset Machine Learning(MLPClassifier)

# load library

from sklearn.datasets import load_breast_cancer

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import matplotlib

# matplotlib 설정

matplotlib.rc('font', family='AppleGothic') # 한글 설정

plt.rcParams['axes.unicode_minus'] = False # -표시

# data load

cancer = load_breast_cancer()

# data 분할

x_train, x_test, y_train, y_test = \

train_test_split( # 데이터 분할을 위해

cancer.data, cancer.target, # 분할할 데이터

random_state=0, test_size=0.3 # 랜덤상태, 테스트 비율

)

# feature visualization

plt.boxplot(x_train, manage_xticks=False) # 데이터, 소눈금 표시 안하기

plt.yscale('symlog') # 축 스케일을 log 로

plt.xlabel('feature list') # x축 이름

plt.ylabel('feature') # y축 이름

plt.show() # 그래프 출력

breast cancer 데이터셋의 특성 값 범위(y축은 logscale)

# 전처리, pre-processing

# axis =0 ==> 열, axis =1 ==> 행

train_min = x_train.min(axis=0) # 열방향 최솟값

train_range = (x_train - train_min).max(axis=0) # 최솟값을 제거한 후 최댓값

x_train_scaled = (x_train-train_min)/train_range # train data 조정

x_test_scaled = (x_test-train_min)/train_range # test data 조정(train 데이터 범위로 조정)

x_train_scaled.min(axis=0) # 조정된 범위의 최솟값, 0

x_train_scaled.max(axis=0) # 조정된 범위의 최댓값, 1

print('x_train_scaled min \n{}'.format(x_train_scaled.min(axis=0))) # 0

print('x_train_scaled.max \n{}'.format(x_train_scaled.max(axis=0))) # 1

# model 생성 및 학습

mlp = MLPClassifier( # model 생성

solver='lbfgs', # algorithm

random_state=0, # 랜덤상태

hidden_layer_sizes=[100], # 은닉층 수

alpha=0.001 # 규제 정도

)

mlp.fit(x_train_scaled, y_train) # 학습

mlp.score(x_train_scaled, y_train) # train set 정확도

mlp.score(x_test_scaled, y_test) # 일반화 정확도

print('train set scaled accuracy \n{:.3f}'.format(mlp.score(x_train_scaled, y_train))) # 1.000

print('test set sclaed accuracy \n{:.3f}'.format(mlp.score(x_test_scaled, y_test))) # 0.965

5. Breast Cancer 데이터셋으로 학습된 가중치 확인

행 : 30개의 입력특성

열 : 100개의 은닉 유닛

밝은 색은 큰 양수 값

plt.figure(figsize=(20, 5)) # 그림 size

plt.imshow(

mlp.coefs_[0], interpolation='none', cmap='viridis' # 입력과 은닉층 사이의 가중치, 축, 그림 테마

)

plt.yticks(range(30), cancer.feature_names) # y축 포인트, label

plt.xlabel('은닉 유닛') # x축 이름

plt.ylabel('입력 특성') # y축 이름

plt.colorbar() # colorbar 생성

plt.show() # 그래프 출력

breast cancer 데이터셋으로 학습시킨 신경망의 첫번째 층의 가중치 히트맵

mlp.coefs_[0]은 입력과 은닉충 사이의 가중치가 저장되어있는 (30, 100) NumPy배열이고

mlp.coefs_[1]은 은닉충과 출력사이의 가중치가 저장되어있는 (100, 1) 크기의 NumPy배열

'python 머신러닝 -- 지도학습 > Classifier' 카테고리의 다른 글

Kernelized Support Vector Machines (0)	2018.03.15
Gradient Boosting Model (0)	2018.03.15
Random Forest (0)	2018.03.15
Decision Tree (0)	2018.03.14
Multi Linear Classification (0)	2018.03.14

PREV 1 NEXT

게으른 우루루

Neural network

Neural Network(Deep Learning)

'python 머신러닝 -- 지도학습 > Classifier' 카테고리의 다른 글

+ Recent posts

티스토리툴바