In [0]:
from google.colab import drive
drive.mount('/content/drive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
In [0]:
import os
os.chdir('/content/drive/Shared drives/Тяжелые проекты/ИАД/intro-to-dl-seminars/hw_4_imgs')
In [0]:
%tensorflow_version 2.x
TensorFlow 2.x selected.
In [0]:
import pandas as pd
import os


import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
print(tf.__version__)
print(keras.__version__)
2.1.0-rc1
2.2.4-tf

Проверяем наличие GPU

In [0]:
tf.test.gpu_device_name()
Found GPU at: /device:GPU:0
In [0]:
!ls -l
total 3548497
-rw------- 1 root root     22226 Dec 17 07:20 kernel_03.ipynb
-rw------- 1 root root     18967 Dec 17 21:17 kernel_kaggle_best.ipynb
drwx------ 2 root root      4096 Dec 11 22:15 submit_01
drwx------ 2 root root      4096 Dec 12 07:54 submit_02
-rw------- 1 root root 867986698 Nov 28 10:01 test.npy
-rw------- 1 root root 476482044 Nov 28 10:02 train-1.npy
-rw------- 1 root root 693040854 Nov 28 10:03 train-2.npy
-rw------- 1 root root 773784254 Nov 28 10:04 train-3.npy
-rw------- 1 root root 822311876 Nov 28 10:05 train-4.npy
drwx------ 2 root root      4096 Dec 11 22:15 trash
In [0]:
INPUT_DIR = '.'
data_train = np.load(f"{INPUT_DIR}/train-1.npy", allow_pickle=True)
for i in range(2, 5):
    t = np.load(f"{INPUT_DIR}/train-{i}.npy", allow_pickle=True)
    data_train = np.concatenate([data_train, t])
test = np.load(f"{INPUT_DIR}/test.npy", allow_pickle=True)
In [0]:
h=[]
w=[]
for i in data_train[:,0]:
    h_, w_ = i.shape
    h.append(h_)
    w.append(w_)
h=np.array(h)
w=np.array(w)
np.percentile(h, 99.9), np.percentile(w, 99.9)
Out[0]:
(143.0, 128.0)

99.9% картинок будут иметь размеры менее 143x128px

In [0]:
val_size=0
u=np.unique(data_train[:,1])
NUM_CLASSES=len(u)

char_to_id=dict(zip(u, range(NUM_CLASSES)))
batch_size=32
RANDOM_SEED=42
HEIGHT=150
WIDTH=130
CHANNELS=1
INIT_LR=5e-3

Во время препроцессинга нормализуем картинку и обрежем ее по установленным размерам.

Для аугментации будем увеличивать изначальный размер картинки с последующим обрезанием.

Как показала практика, излишняя аугментация (небольшой поворот) не улучшает качество.

Так же данная модель не переобучается, поэтому обойдемся без валидации.

In [0]:
def train_gen():
    for img, label in data_train:
        img = img[..., None] 
        yield img, char_to_id[label]

def preprocess(x, y):
    x = tf.image.resize_with_crop_or_pad(x, HEIGHT, WIDTH)
    x = x/127.5 - 1
    return x, y


def augmentation(x, y):
    x = tf.image.resize_with_crop_or_pad(x, HEIGHT + 20, WIDTH + 20)
    x = tf.image.random_crop(x, [HEIGHT, WIDTH, CHANNELS])
    return x, y

def test_gen():
    for img in test:
        img = img[..., None] 
        yield img

def preprocess_test(x):
    x = tf.image.resize_with_crop_or_pad(x, HEIGHT, WIDTH)
    x = x/127.5 - 1
    return x
In [0]:
ds_train = tf.data.Dataset.from_generator(train_gen,
                                          output_types=(tf.float32, tf.int32),
                                          output_shapes=((None,None,1), ())
                                         ).map(preprocess, num_parallel_calls=-1).\
    map(augmentation, num_parallel_calls=-1).\
    prefetch(-1).shuffle(RANDOM_SEED).\
    batch(batch_size).repeat()

ds_test = tf.data.Dataset.from_generator(test_gen,
                                          output_types=(tf.float32),
                                          output_shapes=((None,None,1))
                                         ).map(preprocess_test, num_parallel_calls=-1).batch(batch_size)

Возьмем модель из домашнего задания №3 и улучшим полученную модель:

  1. Добавим еще пачку слоев
  2. Увеличим размер предпоследнего Dense слоя
  3. Уменьшим droupout, чтобы до последнего слоя данные доходили
  4. Добавим нормализации
In [0]:
model = tf.keras.models.Sequential() 
initializer = tf.keras.initializers.lecun_uniform(seed=RANDOM_SEED)
input_shape = (HEIGHT, WIDTH, 1)
img_input = tf.keras.layers.Input(shape=input_shape)
filters = 64
lrelu = 0.1
dropout = 0.2 

model.add(Conv2D(filters=filters, padding='same', kernel_size=(3,3), input_shape=input_shape, kernel_initializer=initializer))
model.add(LeakyReLU(lrelu))    
model.add(Conv2D(filters=filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(BatchNormalization())
model.add(LeakyReLU(lrelu))    
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(dropout)) 

model.add(Conv2D(filters=2*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))
model.add(LeakyReLU(lrelu))    
model.add(Conv2D(filters=2*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(BatchNormalization())
model.add(LeakyReLU(lrelu))    
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(dropout)) 

model.add(Conv2D(filters=4*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(LeakyReLU(lrelu))
model.add(Conv2D(filters=4*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(BatchNormalization())
model.add(LeakyReLU(lrelu))
model.add(MaxPooling2D(pool_size=(2,2), padding='same')) 
model.add(Dropout(dropout)) 

model.add(Conv2D(filters=8*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(LeakyReLU(lrelu))
model.add(Conv2D(filters=8*filters, padding='same', kernel_size=(3,3), kernel_initializer=initializer))  
model.add(BatchNormalization())
model.add(LeakyReLU(lrelu))
model.add(MaxPooling2D(pool_size=(2,2), padding='same')) 
model.add(Dropout(dropout)) 

model.add(Flatten())
model.add(Dense(768,kernel_initializer=initializer))                
model.add(BatchNormalization())
model.add(LeakyReLU(lrelu))
model.add(Dropout(0.5))    
model.add(Dense(NUM_CLASSES, kernel_initializer=initializer))             

model.add(Activation("softmax"))

model.compile(
            optimizer=keras.optimizers.Adamax(lr=INIT_LR),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
In [0]:
model.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_10 (Conv2D)           (None, 150, 130, 64)      640       
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   (None, 150, 130, 64)      0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 150, 130, 64)      36928     
_________________________________________________________________
batch_normalization_5 (Batch (None, 150, 130, 64)      256       
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 150, 130, 64)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 75, 65, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 75, 65, 64)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 75, 65, 128)       73856     
_________________________________________________________________
leaky_re_lu_12 (LeakyReLU)   (None, 75, 65, 128)       0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 75, 65, 128)       147584    
_________________________________________________________________
batch_normalization_6 (Batch (None, 75, 65, 128)       512       
_________________________________________________________________
leaky_re_lu_13 (LeakyReLU)   (None, 75, 65, 128)       0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 38, 33, 128)       0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 38, 33, 128)       0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 38, 33, 256)       295168    
_________________________________________________________________
leaky_re_lu_14 (LeakyReLU)   (None, 38, 33, 256)       0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 38, 33, 256)       590080    
_________________________________________________________________
batch_normalization_7 (Batch (None, 38, 33, 256)       1024      
_________________________________________________________________
leaky_re_lu_15 (LeakyReLU)   (None, 38, 33, 256)       0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 19, 17, 256)       0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 19, 17, 256)       0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 19, 17, 512)       1180160   
_________________________________________________________________
leaky_re_lu_16 (LeakyReLU)   (None, 19, 17, 512)       0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 19, 17, 512)       2359808   
_________________________________________________________________
batch_normalization_8 (Batch (None, 19, 17, 512)       2048      
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU)   (None, 19, 17, 512)       0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 10, 9, 512)        0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 10, 9, 512)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 46080)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 768)               35390208  
_________________________________________________________________
batch_normalization_9 (Batch (None, 768)               3072      
_________________________________________________________________
leaky_re_lu_18 (LeakyReLU)   (None, 768)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 768)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              769000    
_________________________________________________________________
activation_1 (Activation)    (None, 1000)              0         
=================================================================
Total params: 40,850,344
Trainable params: 40,846,888
Non-trainable params: 3,456
_________________________________________________________________
In [0]:
TAKE=3 # Порядковый номер решения
model_filename = '{0:02d}_kernel_opt_{{0:02d}}.hdf5'.format(TAKE)
predict_filename = '{0:02d}_kernel_opt_{{0:02d}}.csv'.format(TAKE)
In [0]:
class ModelSaveCallback(keras.callbacks.Callback):

    def __init__(self, file_name):
        super(ModelSaveCallback, self).__init__()
        self.file_name = file_name

    def on_epoch_end(self, epoch, logs=None):
        filename = self.file_name.format(epoch)
        print()
        keras.models.save_model(self.model, filename)
        
class ModelPredictCallback(keras.callbacks.Callback):

    def __init__(self, file_name):
        super(ModelPredictCallback, self).__init__()
        self.file_name = file_name

    def on_epoch_end(self, epoch, logs=None):
        filename = self.file_name.format(epoch)
        result = self.model.predict_classes(ds_test, batch_size=None, verbose=1)
        predictions=[]
        for i in result:
            predictions.append(u[i])
        df=pd.DataFrame({'Id': range(len(predictions)), 'Category': predictions}, columns=[ 'Id', 'Category'])
        df['Id'] += 1
        df.to_csv(filename, index=None)
In [0]:
def lr_scheduler(epoch):
    return INIT_LR * 0.9 ** epoch 
In [0]:
model.fit(ds_train,
        epochs=20,
        steps_per_epoch=int(data_train.shape[0]/batch_size),
        verbose=2,
        callbacks=[
            keras.callbacks.LearningRateScheduler(lr_scheduler), 
            ModelSaveCallback(model_filename),
            ModelPredictCallback(predict_filename),
        ],
)
Train for 10405 steps
Epoch 1/20

   2602/Unknown - 95s 37ms/step10405/10405 - 1157s - loss: 1.9893 - accuracy: 0.5991
Epoch 2/20

   2602/Unknown - 96s 37ms/step10405/10405 - 1160s - loss: 0.2436 - accuracy: 0.9312
Epoch 3/20

   2602/Unknown - 99s 38ms/step10405/10405 - 1160s - loss: 0.1435 - accuracy: 0.9591
Epoch 4/20

   2602/Unknown - 99s 38ms/step10405/10405 - 1171s - loss: 0.1043 - accuracy: 0.9703
Epoch 5/20

   2602/Unknown - 96s 37ms/step10405/10405 - 1156s - loss: 0.0807 - accuracy: 0.9769
Epoch 6/20

   2602/Unknown - 97s 37ms/step10405/10405 - 1162s - loss: 0.0668 - accuracy: 0.9810
Epoch 7/20

   2602/Unknown - 97s 37ms/step10405/10405 - 1163s - loss: 0.0558 - accuracy: 0.9843
Epoch 8/20

   2602/Unknown - 99s 38ms/step10405/10405 - 1168s - loss: 0.0475 - accuracy: 0.9865
Epoch 9/20

   2602/Unknown - 99s 38ms/step10405/10405 - 1166s - loss: 0.0415 - accuracy: 0.9878
Epoch 10/20

   2602/Unknown - 95s 37ms/step10405/10405 - 1157s - loss: 0.0363 - accuracy: 0.9897
Epoch 11/20

   2602/Unknown - 95s 37ms/step10405/10405 - 1146s - loss: 0.0323 - accuracy: 0.9907
Epoch 12/20

   2602/Unknown - 98s 38ms/step10405/10405 - 1156s - loss: 0.0292 - accuracy: 0.9917
Epoch 13/20

   2602/Unknown - 97s 37ms/step10405/10405 - 1158s - loss: 0.0266 - accuracy: 0.9923
Epoch 14/20

   2602/Unknown - 97s 37ms/step10405/10405 - 1158s - loss: 0.0252 - accuracy: 0.9928
Epoch 15/20

   2602/Unknown - 97s 37ms/step10405/10405 - 1156s - loss: 0.0230 - accuracy: 0.9935
Epoch 16/20

   2602/Unknown - 98s 38ms/step10405/10405 - 1160s - loss: 0.0215 - accuracy: 0.9938
Epoch 17/20

   2602/Unknown - 99s 38ms/step10405/10405 - 1163s - loss: 0.0198 - accuracy: 0.9942
Epoch 18/20

   2602/Unknown - 98s 38ms/step10405/10405 - 1165s - loss: 0.0184 - accuracy: 0.9949
Epoch 19/20

   2602/Unknown - 98s 37ms/step10405/10405 - 1163s - loss: 0.0176 - accuracy: 0.9950
Epoch 20/20

   2602/Unknown - 98s 38ms/step10405/10405 - 1160s - loss: 0.0168 - accuracy: 0.9952
Out[0]:
<tensorflow.python.keras.callbacks.History at 0x7fe4a5b9a978>

Качество модели улучшалось после каждой эпохе, аналогичный результат замечен и на публичном лидерборде.

Данную модель можно попытаться улучшить следующими способами:

  • Добавить больше эпох (качество растет)
  • Поэкспериментировать с самой моделью (гиперпараметры, межслойные связи)
  • Попробовать добавить дополнительную аугментацию, которая не ухудшит качество
In [0]: