Gallerypost | 2020.12.06
|
'''Neural style transfer with Keras. Modified as an experiment. # References - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508. 06576) ''' from __future__ import print_function from keras.preprocessing.image import load_img, save_img, img_to_array, array_to_img import numpy as np from scipy.optimize import fmin_l_bfgs_b import time import random import glob import argparse import os.path from os import path from keras.applications import vgg19 from keras import backend as K def preprocess_image(img): img = img_to_array(img) img = np.expand_dims(img, axis=0) img = vgg19.preprocess_input(img) return img def deprocess_image(x): if K.image_data_format() == 'channels_first': x = x.reshape((3, side_length, side_length)) x = x.transpose((1, 2, 0)) else: x = x.reshape((side_length, side_length, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x def gram_matrix(x): assert K.ndim(x) == 3 if K.image_data_format() == 'channels_first': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram # the "style loss" is designed to maintain # the style of the reference image in the generated image. # It is based on the gram matrices (which capture style) of # feature maps from the style reference image # and from the generated image def style_loss(style, combination): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = side_length * side_length return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2)) # an auxiliary loss function # designed to maintain the "content" of the # base image in the generated image def content_loss(base, combination): return K.sum(K.square(combination - base)) # edge detector - sum edges will be how busy it will look def total_variation_loss(x): assert K.ndim(x) == 4 if K.image_data_format() == 'channels_first': a = K.square( x[:, :, :side_length - 1, :side_length - 1] - x[:, :, 1:, : side_length - 1]) b = K.square( x[:, :, :side_length - 1, :side_length - 1] - x[:, :, : side_length - 1, 1:]) else: a = K.square( x[:, :side_length - 1, :side_length - 1, :] - x[:, 1:, : side_length - 1, :]) b = K.square( x[:, :side_length - 1, :side_length - 1, :] - x[:, : side_length - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25)) def fidelity_loss(x, y): assert K.ndim(x) == 3 assert K.ndim(y) == 3 if K.image_data_format() == 'channels_first': x_g = K.sum(x[:3, :, :]) y_g = K.sum(y[:3, :, :]) return K.square(x_g - y_g) else: x_g = K.sum(x[:, :, :3]) y_g = K.sum(y[:, :, :3]) return K.square(x_g - y_g) # Experiment with luminance #if K.image_data_format() == 'channels_first': # x_g = np.dot(x[0, :3, :, :], [0.2989, 0.5870, 0.1140]) # y_g = np.dot(y[0, :3, :, :], [0.2989, 0.5870, 0.1140]) # return K.square(x_g - y_g) #else: # x_g = np.dot(x[0, :, :, :3], [0.2989, 0.5870, 0.1140]) # y_g = np.dot(y[0, :, :, :3], [0.2989, 0.5870, 0.1140]) # return K.square(x_g - y_g) # Returns style layers - this is the default (all), I experimented with dropping random ones def get_random_style_layers(): return ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] def get_random_crop(image, width, height): '''Returns a random subimage with the given width and height.''' if (image is None or width is None or width > image.width or height is None or height > image.height): raise x_vals = image.width - width y_vals = image.height - height if (x_vals < 0 or y_vals < 0): raise # Crop to width and height, if specified. x = 0 if (x_vals > 0): x = int(random.randrange(x_vals)) y = 0 if (y_vals > 0): y = int(random.randrange(y_vals)) print('Rand: ',str(x),', ',str(y),' from ',str(x_vals),' by ', str(y_vals)) box = (x, y, x + width, y + width) crop = image.crop(box) return crop def eval_loss_and_grads(x): if K.image_data_format() == 'channels_first': x = x.reshape((1, 3, side_length, side_length)) else: x = x.reshape((1, side_length, side_length, 3)) outs = f_outputs([x]) loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values def random_style_tile(): image = style_images[random.randrange(len(style_images))] return get_random_crop(image, side_length, side_length) class Evaluator(object): def __init__(self): self.loss_value = None self.grads_values = None def loss(self, x): assert self.loss_value is None loss_value, grad_values = eval_loss_and_grads(x) self.loss_value = loss_value self.grad_values = grad_values return self.loss_value def grads(self, x): assert self.loss_value is not None grad_values = np.copy(self.grad_values) self.loss_value = None self.grad_values = None return grad_values side_length = 224 # VGG19 is 224x224x3 step = 168 # Iteration hyperparameters (modify these) iterations_per_image = 32 # Number of image traversals samples_per_tile = 5 # Number of style tiles to try per iteration iterations_per_sample = 16 # Number of style transfer iterations per sample # Use all png files from the style subdirectory, random 224 squares will be used # from these files to perform style transfer - so image size should be # approximately the dimensions of the content. style_files = glob.glob('style/*.png') file_id = random.randrange(696969) # Load content.png, the size of this image will significantly impact run time. content_image = load_img('content.png') content_width, content_height = content_image.size # Load style images. style_images = [] for style_name in style_files: style_image = load_img(style_name) style_images.append(style_image) # If this setup was run previously, use the existing output image. if (os.path.isfile('last.png')): output_image_a = load_img('last_a.png') output_image_b = load_img('last_b.png') else: output_image_a = load_img('content.png') output_image_b = load_img('content.png') # Compute the tile count/step size. There will be overlap and it should # be a good thing. x_tiles = int(content_width / step) y_tiles = int(content_height / step) # Add iterleaved tiles tiles = [] x_start = 0 for i in range(x_tiles): y_start = 0 for j in range(y_tiles): if (i + j) % 2 == 0: tiles += [(x_start, y_start, True)] else: tiles += [(x_start, y_start, False)] y_start = y_start + step x_start = x_start + step feature_layers = get_random_style_layers() print('Style layers: ' + str(feature_layers)) total_variation_weight = random.uniform(0.001, 0.1) style_weight = random.uniform(0.001, 0.1) content_weight = random.uniform(0.0001, 0.1) fidelity_weight = random.uniform(0.001, 0.1) # Number of times to cover the entire image (optimize each tile) for image_iteration in range(iterations_per_image): print('Iteration: ', image_iteration) # Randomize hyperparameters because I don't know what good values are. # Modify these/make them fixed. fidelity_weight *= 2 content_weight *= 2 # Bump the tile a random value to do a smoother stitch. fname_a = 'output_' + str(file_id) + '_%d_a.png' % image_iteration fname_b = 'output_' + str(file_id) + '_%d_b.png' % image_iteration # Iterate over each image tile. for tile in tiles: x_start = tile[0] y_start = tile[1] is_a = tile[2] print(' Processing tile: (', x_start, ', ', y_start, ')') style_file = 'style_' + str(x_start) + '_' + str(y_start) + '.png' box = (x_start, y_start, x_start + side_length, y_start + side_length) tile_content = content_image.crop(box) base_image = K.variable(preprocess_image(tile_content)) best_loss = -1 # For each tile, sample the random portions of the style image(s) and choose # the best of the lot. for sample_index in range(samples_per_tile): # Set baseline error with current best style sample if (sample_index == 0 and os.path.isfile(style_file)): print(' Using existing style: ',style_file) tile_style = load_img(style_file) using_best = True else: print(' Using random tile') tile_style = random_style_tile() using_best = False evaluator = Evaluator() if (is_a): tile_output = output_image_a.crop(box) else: tile_output = output_image_b.crop(box) x = preprocess_image(tile_output) style_reference_image = K.variable(preprocess_image(tile_style) ) if K.image_data_format() == 'channels_first': combination_image = K.placeholder((1, 3, side_length, side_length)) else: combination_image = K.placeholder((1, side_length, side_length, 3)) # combine the 3 images into a single Keras tensor input_tensor = K.concatenate([base_image, style_reference_image, combination_image], axis=0) # Reinitialize VGG19. There's probably a way to do this once and # improve performance. model = vgg19.VGG19(input_tensor=input_tensor, weights= 'imagenet', include_top=False) outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) # combine these loss functions into a single scalar loss = K.variable(0.0) layer_features = outputs_dict['block5_conv2'] base_image_features = layer_features[0, :, :, :] combination_features = layer_features[2, :, :, :] loss = loss + content_weight * content_loss(base_image_features, combination_features) for layer_name in feature_layers: layer_features = outputs_dict[layer_name] style_reference_features = layer_features[1, :, :, :] combination_features = layer_features[2, :, :, :] loss = loss + (style_weight / len(feature_layers)) * style_loss(style_reference_features, combination_features) loss = loss + total_variation_weight * total_variation_loss(combination_image) loss = loss + fidelity_weight * fidelity_loss(combination_features, base_image_features) # get the gradients of the generated image wrt the loss grads = K.gradients(loss, combination_image) outputs = [loss] if isinstance(grads, (list, tuple)): outputs += grads else: outputs.append(grads) f_outputs = K.function([combination_image], outputs) # Use optimization iterations hyperparameter, for the current best, # just do half as many. iterations = iterations_per_sample if (using_best == True): iterations = int(iterations / 2) # With this content/style combo, use the style transfer algorithm. for iteration in range(iterations): x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x. flatten(), fprime=evaluator.grads, maxfun=20) if (min_val > 1000000): min_str = str(int(min_val / 1000000)) + 'M' elif (min_val > 1000): min_str = str(int(min_val / 1000)) + 'k' else: min_str = str(int(min_val)) print(' Loss[', iteration, ']: ', min_str) # If this is the first style sample or it's better than the last # use this output. if (best_loss == -1 or min_val < best_loss): print(' Updating from prev: ', min_val / 1000000, 'M < ', best_loss / 1000000, 'M') img = array_to_img(deprocess_image(x.copy())) best_loss = min_val if (is_a): output_image_a.paste(img, (x_start, y_start)) else: output_image_b.paste(img, (x_start, y_start)) save_img(style_file, tile_style) print(' Updated style file: ', style_file) else: print(' Skipping update: ', min_val / 1000000, 'M > ', best_loss / 1000000, 'M') # Reset the back end K.clear_session() # Save per-tile progress for long runs if (samples_per_tile * iterations_per_sample > 64): save_img(fname_a, output_image_a) save_img(fname_b, output_image_b) save_img('last_a.png', output_image_a) save_img('last_b.png', output_image_b) save_img(fname_a, output_image_a) save_img(fname_b, output_image_b) save_img('last_a.png', output_image_a) save_img('last_b.png', output_image_b)
Until we meet again, this is John Henry Eden, signing off. |
◄ |
2020.11.29
ModsTweaking the TensorFlow implementation of Neural Style Transfer. |
2020.12.12
On lockThe covid surge is a good time for video games and fantasy football. |
► |
2020.11.29
ModsTweaking the TensorFlow implementation of Neural Style Transfer. |
2021.01.02
The next breakReflecting on investments and Warren B. Some sunset surf shots, video games, and AI image stylization. |
2022.08.19
RewiringAfter some interesting reads, I implemented a convolution+pooling block inspired by ResNet. It looks like this: |
pyimagesearch.com
Breaking captchas with deep learning, Keras, and TensorFlow - PyImageSearchLearn to break captchas with deep learning, Keras, and TensorFlow using Python. |
bartwronski.com
Using JAX, numpy, and optimization techniques to improve separable image filters | Bart WronskiBy the end of this blog post, we aim to improve the results of my previous blog post of approximating circular bokeh with a sum of 4 separable filters and reducing its artifacts (left) through an offline optimization process to get the results on the right. In today's blog post I will look at two... |
blog.keras.io
Building a simple Keras + deep learning REST API |