In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import os
In [2]:
# set working directory
HOME = os.getenv("HOME")
WORKDIR = os.path.join(HOME, "python", "deep_learning", "tensorflow")
os.chdir(WORKDIR)
In [3]:
# load_data
cancer = pd.read_csv("./datas/cancer.csv")
# cancer_target_names = np.unique(cancer.result.values)
# cancer_target_names = cancer_target_names[[1, 0]]
# for i, n in enumerate(cancer_target_names):
# cancer.replace(to_replace=cancer_target_names[i], value=i, inplace=True)
In [4]:
# data split
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(cancer, test_size=0.2, random_state=0)
train_labels = train_set["result"].values
test_labels = test_set["result"].values
In [5]:
# preprocessing
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
class DataframeSelector(BaseEstimator, TransformerMixin):
def __init__(self, attr_list):
self.attr_list = attr_list
def fit(self, X, y=None):
return self
def transform(self, X):
return X.iloc[:, self.attr_list].values
In [6]:
n_list = range(train_set.shape[1]-1)
c_list = [train_set.shape[1]-1]
In [7]:
num_pipeline = Pipeline([
["selector", DataframeSelector(n_list)],
["imputer", SimpleImputer(strategy="median")],
["scaler", StandardScaler()]
])
cat_pipeline = Pipeline([
["selector", DataframeSelector(c_list)],
["encoder", OneHotEncoder(sparse=False)]
])
full_pipeline = FeatureUnion(transformer_list=[
["nums", num_pipeline],
["cats", cat_pipeline]
])
In [8]:
scaled_train = full_pipeline.fit_transform(train_set)
scaled_test = full_pipeline.fit_transform(test_set)
In [9]:
x_train, y_train = scaled_train[:, :30], scaled_train[:, 30:]
x_test, y_test = scaled_test[:, :30], scaled_test[:, 30:]
In [10]:
x_train = x_train.astype("float32")
y_train = y_train.astype("float32")
In [11]:
global_step = tf.Variable(0, trainable=False, name="global_step")
In [12]:
X = tf.placeholder(tf.float32, name="X")
Y = tf.placeholder(tf.float32, name="Y")
In [13]:
# 30, 2
W1 = tf.Variable(tf.random_normal([30, 10], mean=0, stddev=1))
W2 = tf.Variable(tf.random_normal([10, 100], mean=0, stddev=1))
W3 = tf.Variable(tf.random_normal([100, 500], mean=0, stddev=1))
W4 = tf.Variable(tf.random_normal([500, 2], mean=0, stddev=1))
b1 = tf.zeros([10])
b2 = tf.zeros([100])
b3 = tf.zeros([500])
b4 = tf.zeros([2])
In [14]:
with tf.name_scope("layer1"):
L1 = tf.add(tf.matmul(X, W1), b1)
L1 = tf.nn.sigmoid(L1)
with tf.name_scope("layer2"):
L2 = tf.add(tf.matmul(L1, W2), b2)
L2 = tf.nn.sigmoid(L2)
with tf.name_scope("layer3"):
L3 = tf.add(tf.matmul(L2, W3), b3)
L3 = tf.nn.sigmoid(L3)
with tf.name_scope("layer4"):
model = tf.add(tf.matmul(L3, W4), b4)
In [15]:
with tf.name_scope("optimizer"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=model))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(cost, global_step=global_step)
tf.summary.scalar("cost", cost)
# tf.summary.scalar("W1", W1)
# tf.summary.scalar("W2", W2)
# tf.summary.scalar("W3", W3)
# tf.summary.scalar("W4", W4)
# tf.summary.scalar("b1", b1)
# tf.summary.scalar("b2", b2)
# tf.summary.scalar("b3", b3)
# tf.summary.scalar("b4", b4)
In [16]:
# tf.reset_default_graph()
sess = tf.Session()
saver = tf.train.Saver(tf.global_variables())
In [17]:
ckpt = tf.train.get_checkpoint_state("./model/cancer")
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
saver.restore(sess, ckpt.model_checkpoint_path)
else:
init = tf.global_variables_initializer()
sess.run(init)
In [18]:
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter("./logs/cancer", sess.graph)
In [19]:
for step in range(5000):
sess.run(train_op, feed_dict={X:x_train, Y:y_train})
if (step+1) % 200 == 0:
print("step: {}, cost: {}".\
format(sess.run(global_step),
sess.run(cost, feed_dict={X:x_train, Y:y_train})))
summary = sess.run(merged, feed_dict={X:x_train, Y:y_train})
writer.add_summary(summary, global_step=sess.run(global_step))
saver.save(sess, "./model/cancer/dnn.ckpt", global_step=global_step)
Out[19]:
In [20]:
prediction = tf.argmax(model, 1)
target = tf.argmax(Y, 1)
is_correct = tf.equal(prediction, target)
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("=================================")
print("train_prediction: \n{}".format(sess.run(prediction, feed_dict={X:x_train, Y:y_train})))
print("train_target: \n{}".format(sess.run(target, feed_dict={X:x_train, Y:y_train})))
print("train_accuracy: \n{:.3f}".format(sess.run(accuracy*100, feed_dict={X:x_train, Y:y_train})))
print("\n=================================")
print("test_prediction: \n{}".format(sess.run(prediction, feed_dict={X:x_test, Y:y_test})))
print("test_target: \n{}".format(sess.run(target, feed_dict={X:x_test, Y:y_test})))
print("test_accuracy: \n{:.3f}".format(sess.run(accuracy*100, feed_dict={X:x_test, Y:y_test})))
In [21]:
# tensorboard --logdir=./logs/cancer
In [22]:
from IPython.core.display import display, HTML
display(HTML("<style> .container{width:100% !important;}</style>"))
'Deep_Learning' 카테고리의 다른 글
00.write_csv (0) | 2018.12.09 |
---|---|
09.mnist_01_minibatch (0) | 2018.12.09 |
07.tensorboard02_example (0) | 2018.12.09 |
06.tensorboard01_example (0) | 2018.12.09 |
05.deep_neural_net_Costfun2 (0) | 2018.12.09 |