본문 바로가기
Dev/딥러닝

04-2. Tensor Flow 에서 csv 파일 읽기 및 queue 사용법

by bsion 2018. 8. 17.
04-2. Loading Data from File

출처 : 모두를위한 머신러닝 (http://hunkim.github.io/ml/)


Data manipulate using Numpy

List Slicing

Numpy Indexing


In [18]:
import numpy as np
import pandas as pd

xy = np.loadtxt('static/data-01-test-score.csv', delimiter=',', dtype=np.float32)

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

print(x_data)
print(y_data)
[[  73.   80.   75.]
 [  93.   88.   93.]
 [  89.   91.   90.]
 [  96.   98.  100.]
 [  73.   66.   70.]
 [  53.   46.   55.]
 [  69.   74.   77.]
 [  47.   56.   60.]
 [  87.   79.   90.]
 [  79.   70.   88.]
 [  69.   70.   73.]
 [  70.   65.   74.]
 [  93.   95.   91.]
 [  79.   80.   73.]
 [  70.   73.   78.]
 [  93.   89.   96.]
 [  78.   75.   68.]
 [  81.   90.   93.]
 [  88.   92.   86.]
 [  78.   83.   77.]
 [  82.   86.   90.]
 [  86.   82.   89.]
 [  78.   83.   85.]
 [  76.   83.   71.]
 [  96.   93.   95.]]
[[ 152.]
 [ 185.]
 [ 180.]
 [ 196.]
 [ 142.]
 [ 101.]
 [ 149.]
 [ 115.]
 [ 175.]
 [ 164.]
 [ 141.]
 [ 141.]
 [ 184.]
 [ 152.]
 [ 148.]
 [ 192.]
 [ 147.]
 [ 183.]
 [ 177.]
 [ 159.]
 [ 177.]
 [ 175.]
 [ 175.]
 [ 149.]
 [ 192.]]

Queue Runners

데이타 갯수가 매우 많을때 사용하는 툴


In [2]:
import tensorflow as tf

filename_queue = tf.train.string_input_producer(['static/data-01-test-score.csv'], 
                                                shuffle=False, 
                                                name='filename_queue')

# Reader 정의
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# 다음과 같은 data type 을 csv 로 decode
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)

# batch를 이용해서 data 읽어오기
train_x_batch, train_y_batch= tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b

cost = tf.reduce_mean(tf.square(hypothesis - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch session
sess = tf.Session()

sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()  # 통상적으로 쓰는것
threads = tf.train.start_queue_runners(sess=sess, coord=coord)  # 통상적으로 쓰는것

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run([cost, hypothesis, train],
                                  feed_dict={X: x_batch, Y: y_batch})
    if step % 500 == 0:
        print(step, "\nCost: ", cost_val, "\nPrediction:\n", hy_val)

coord.request_stop()  # 통상적으로 쓰는것
coord.join(threads)  # 통상적으로 쓰는것
0 
Cost:  41956.848 
Prediction:
 [[-42.57767 ]
 [-51.09384 ]
 [-50.015465]
 [-56.493603]
 [-37.728786]
 [-31.821585]
 [-46.009533]
 [-39.106228]
 [-50.670334]
 [-52.00693 ]]
500 
Cost:  23.56743 
Prediction:
 [[154.44485]
 [186.56921]
 [183.6626 ]
 [198.63165]
 [143.22037]
 [102.66114]
 [145.65979]
 [103.93766]
 [172.41483]
 [156.09872]]
1000 
Cost:  17.25401 
Prediction:
 [[153.98001 ]
 [186.59052 ]
 [183.34297 ]
 [198.79144 ]
 [142.96176 ]
 [103.322296]
 [146.37238 ]
 [105.27768 ]
 [173.19005 ]
 [157.99164 ]]
1500 
Cost:  13.130315 
Prediction:
 [[153.63617]
 [186.57343]
 [183.08797]
 [198.92769]
 [142.70569]
 [103.81883]
 [146.97919]
 [106.42151]
 [173.77197]
 [159.48143]]
2000 
Cost:  10.407609 
Prediction:
 [[153.38551 ]
 [186.52982 ]
 [182.88449 ]
 [199.04417 ]
 [142.4575  ]
 [104.18788 ]
 [147.49715 ]
 [107.400085]
 [174.20422 ]
 [160.6524  ]]


댓글