tokenizer = Tokenizer(nb_words=1000) X_train = tokenizer.sequences_to_matrix(X_train, mode="freq")
import keras import numpy as np from keras.datasets import imdb from keras.preprocessing.text import Tokenizer from keras.models import Sequential from keras.layers import Dense, Dropout, Embedding, LSTM (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=1000) from keras.preprocessing.sequence import pad_sequences X_train = pad_sequences(X_train, 1000) X_test = pad_sequences(X_test, 1000) model = Sequential() model.add(Embedding(1000, 64, input_length=1000)) model.add(LSTM(output_dim=32, activation='sigmoid', inner_activation='hard_sigmoid')) model.add(Dense(16, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(8, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(1, activation="sigmoid")) model.compile(loss="binary_crossentropy", optimizer="adagrad", metrics=["accuracy"]) model.fit(X_train, y_train, batch_size=500, nb_epoch=100) model.evaluate(X_test, y_test, batch_size=1000) pred = model.predict(X_test, batch_size=20000) print (pred[0], y_test[0]) print (pred[1], y_test[1]) print (pred[2], y_test[2])
X_train = pad_sequences(X_train, 1000)위의 코드는 X_train의 인풋 배열중 1000보다 길이가 짧은 배열에 0을 채워넣는다.
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=1000)다음과 같이 input length가 될 단어 인덱스 길이를 1000으로 제한해야 한다.
array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   1,  20,  28, 716,  48, 495,  79,  27, 493,   8,   2,
         7,  50,   5,   2,   2,  10,   5, 852, 157,  11,   5,   2,   2,
        10,   5, 500,   2,   6,  33, 256,  41,   2,   7,  17,  23,  48,
         2,   2,  26, 269, 929,  18,   2,   7,   2,   2,   8, 105,   5,
         2, 182, 314,  38,  98, 103,   7,  36,   2, 246, 360,   7,  19,
       396,  17,  26, 269, 929,  18,   2, 493,   6, 116,   7, 105,   5,
       575, 182,  27,   5,   2,   2, 130,  62,  17,  24,  89,  17,  13,
       381,   2,   8,   2,   7,   5,   2,  38, 325,   7,  17,  23,  93,
         9, 156, 252,  19, 235,  20,  28,   5, 104,  76,   7,  17, 169,
        35,   2,  17,  23,   2,   7,  36,   2, 934,  56,   2,   6,  17,
       891, 214,  11,   5,   2,   6,  92,   6,  33, 256,  82,   7], dtype=int32)
IndexError: index 4414 is out of bounds for size 1000
Using Theano backend. Epoch 1/10 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014 Epoch 2/10 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5010 Epoch 3/10 22500/22500 [==============================] - 114s - loss: 0.6932 - acc: 0.5014 Epoch 4/10 22500/22500 [==============================] - 115s - loss: 0.6931 - acc: 0.5014 Epoch 5/10 22500/22500 [==============================] - 115s - loss: 0.6931 - acc: 0.5014 Epoch 6/10 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014 Epoch 7/10 22500/22500 [==============================] - 114s - loss: 0.6931 - acc: 0.5014 Epoch 8/10 22500/22500 [==============================] - 114s - loss: 0.6932 - acc: 0.5016 Epoch 9/10 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014 Epoch 10/10 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014
Epoch 1/10 22500/22500 [==============================] - 14282s - loss: 0.6927 - acc: 0.5164 Epoch 2/10 22500/22500 [==============================] - 10235s - loss: 0.6864 - acc: 0.5618 Epoch 3/10 22500/22500 [==============================] - 3236s - loss: 0.6541 - acc: 0.6508 Epoch 4/10 22500/22500 [==============================] - 3230s - loss: 0.5829 - acc: 0.7528 Epoch 5/10 22500/22500 [==============================] - 3222s - loss: 0.5490 - acc: 0.7745 Epoch 6/10 22500/22500 [==============================] - 3229s - loss: 0.5250 - acc: 0.7946 Epoch 7/10 22500/22500 [==============================] - 3230s - loss: 0.5052 - acc: 0.8030 Epoch 8/10 22300/22500 [============================>.] - ETA: 28s - loss: 0.4963 - acc: 0.8046