diff --git a/example/cnn_mnist_1d.f90 b/example/cnn_mnist_1d.f90 index 059d09c5..f5aff119 100644 --- a/example/cnn_mnist_1d.f90 +++ b/example/cnn_mnist_1d.f90 @@ -12,7 +12,7 @@ program cnn_mnist_1d real, allocatable :: validation_images(:,:), validation_labels(:) real, allocatable :: testing_images(:,:), testing_labels(:) integer :: n - integer, parameter :: num_epochs = 250 + integer, parameter :: num_epochs = 20 call load_mnist(training_images, training_labels, & validation_images, validation_labels, & diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 7b48f919..6fe95863 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -160,7 +160,7 @@ end function conv2d interface locally_connected - module function locally_connected2d(filters, kernel_size, activation) result(res) + module function locally_connected2d(filters, kernel_size, activation, stride) result(res) !! 1-d locally connected network constructor !! !! This layer is for building 1-d locally connected network. @@ -183,6 +183,8 @@ module function locally_connected2d(filters, kernel_size, activation) result(res !! Width of the convolution window, commonly 3 or 5 class(activation_function), intent(in), optional :: activation !! Activation function (default sigmoid) + integer, intent(in), optional :: stride + !! Size of the stride (default 1) type(layer) :: res !! Resulting layer instance end function locally_connected2d diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index fc630fd2..98fb6748 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -105,12 +105,14 @@ module function conv2d(filters, kernel_width, kernel_height, activation, stride) end function conv2d - module function locally_connected2d(filters, kernel_size, activation) result(res) + module function locally_connected2d(filters, kernel_size, activation, stride) result(res) integer, intent(in) :: filters integer, intent(in) :: kernel_size class(activation_function), intent(in), optional :: activation + integer, intent(in), optional :: stride type(layer) :: res + integer :: stride_tmp class(activation_function), allocatable :: activation_tmp res % name = 'locally_connected2d' @@ -123,9 +125,18 @@ module function locally_connected2d(filters, kernel_size, activation) result(res res % activation = activation_tmp % get_name() + if (present(stride)) then + stride_tmp = stride + else + stride_tmp = 1 + endif + + if (stride_tmp < 1) & + error stop 'stride must be >= 1 in a conv1d layer' + allocate( & res % p, & - source=locally_connected2d_layer(filters, kernel_size, activation_tmp) & + source=locally_connected2d_layer(filters, kernel_size, activation_tmp, stride_tmp) & ) end function locally_connected2d diff --git a/src/nf/nf_locally_connected2d_layer.f90 b/src/nf/nf_locally_connected2d_layer.f90 index b5e98ddd..03bbde89 100644 --- a/src/nf/nf_locally_connected2d_layer.f90 +++ b/src/nf/nf_locally_connected2d_layer.f90 @@ -15,6 +15,7 @@ module nf_locally_connected2d_layer integer :: channels integer :: kernel_size integer :: filters + integer :: stride real, allocatable :: biases(:,:) ! size(filters) real, allocatable :: kernel(:,:,:,:) ! filters x channels x window x window @@ -40,12 +41,13 @@ module nf_locally_connected2d_layer end type locally_connected2d_layer interface locally_connected2d_layer - module function locally_connected2d_layer_cons(filters, kernel_size, activation) & + module function locally_connected2d_layer_cons(filters, kernel_size, activation, stride) & result(res) !! `locally_connected2d_layer` constructor function integer, intent(in) :: filters integer, intent(in) :: kernel_size class(activation_function), intent(in) :: activation + integer, intent(in) :: stride type(locally_connected2d_layer) :: res end function locally_connected2d_layer_cons end interface locally_connected2d_layer @@ -91,7 +93,9 @@ end function get_num_params module subroutine get_params_ptr(self, w_ptr, b_ptr) class(locally_connected2d_layer), intent(in), target :: self real, pointer, intent(out) :: w_ptr(:) + !! Pointer to the kernel weights (flattened) real, pointer, intent(out) :: b_ptr(:) + !! Pointer to the biases end subroutine get_params_ptr module function get_gradients(self) result(gradients) @@ -106,7 +110,9 @@ end function get_gradients module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) class(locally_connected2d_layer), intent(in), target :: self real, pointer, intent(out) :: dw_ptr(:) + !! Pointer to the kernel weight gradients (flattened) real, pointer, intent(out) :: db_ptr(:) + !! Pointer to the bias gradients end subroutine get_gradients_ptr end interface diff --git a/src/nf/nf_locally_connected2d_layer_submodule.f90 b/src/nf/nf_locally_connected2d_layer_submodule.f90 index 809762ce..1c31eb85 100644 --- a/src/nf/nf_locally_connected2d_layer_submodule.f90 +++ b/src/nf/nf_locally_connected2d_layer_submodule.f90 @@ -7,15 +7,17 @@ contains - module function locally_connected2d_layer_cons(filters, kernel_size, activation) result(res) + module function locally_connected2d_layer_cons(filters, kernel_size, activation, stride) result(res) integer, intent(in) :: filters integer, intent(in) :: kernel_size class(activation_function), intent(in) :: activation + integer, intent(in) :: stride type(locally_connected2d_layer) :: res res % kernel_size = kernel_size res % filters = filters res % activation_name = activation % get_name() + res % stride = stride allocate(res % activation, source = activation) end function locally_connected2d_layer_cons @@ -24,8 +26,11 @@ module subroutine init(self, input_shape) integer, intent(in) :: input_shape(:) self % channels = input_shape(1) - self % width = input_shape(2) - self % kernel_size + 1 + self % width = (input_shape(2) - self % kernel_size) / self % stride +1 + if (mod(input_shape(2) - self % kernel_size , self % stride) /= 0) self % width = self % width + 1 + + ! Output of shape: filters x width allocate(self % output(self % filters, self % width)) self % output = 0 @@ -52,14 +57,17 @@ end subroutine init pure module subroutine forward(self, input) class(locally_connected2d_layer), intent(in out) :: self real, intent(in) :: input(:,:) + integer :: input_width integer :: j, n integer :: iws, iwe + + input_width = size(input, dim=2) do j = 1, self % width - iws = j - iwe = j + self % kernel_size - 1 + iws = self % stride * (j-1) + 1 + iwe = min(iws + self % kernel_size - 1, input_width) do n = 1, self % filters - self % z(n, j) = sum(self % kernel(n, j, :, :) * input(:, iws:iwe)) + self % biases(n, j) + self % z(n, j) = sum(self % kernel(n, j, :, 1:iwe-iws+1) * input(:, iws:iwe)) + self % biases(n, j) end do end do self % output = self % activation % eval(self % z) @@ -69,12 +77,15 @@ pure module subroutine backward(self, input, gradient) class(locally_connected2d_layer), intent(in out) :: self real, intent(in) :: input(:,:) real, intent(in) :: gradient(:,:) + integer :: input_width integer :: j, n, k integer :: iws, iwe real :: gdz(self % filters, self % width) real :: db_local(self % filters, self % width) real :: dw_local(self % filters, self % width, self % channels, self % kernel_size) + input_width = size(input, dim=2) + do j = 1, self % width gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j)) end do @@ -90,11 +101,11 @@ pure module subroutine backward(self, input, gradient) do n = 1, self % filters do j = 1, self % width - iws = j - iwe = j + self % kernel_size - 1 + iws = self % stride * (j-1) + 1 + iwe = min(iws + self % kernel_size - 1, input_width) do k = 1, self % channels - dw_local(n, j, k, :) = dw_local(n, j, k, :) + input(k, iws:iwe) * gdz(n, j) - self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, j, k, :) * gdz(n, j) + dw_local(n, j, k, 1:iwe-iws+1) = dw_local(n, j, k, 1:iwe-iws+1) + input(k, iws:iwe) * gdz(n, j) + self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, j, k, 1:iwe-iws+1) * gdz(n, j) end do end do end do @@ -131,5 +142,4 @@ module subroutine get_gradients_ptr(self, dw_ptr, db_ptr) db_ptr(1:size(self % db)) => self % db end subroutine get_gradients_ptr - end submodule nf_locally_connected2d_layer_submodule diff --git a/test/test_locally_connected2d_layer.f90 b/test/test_locally_connected2d_layer.f90 index 0157b916..c34c1f01 100644 --- a/test/test_locally_connected2d_layer.f90 +++ b/test/test_locally_connected2d_layer.f90 @@ -58,6 +58,7 @@ program test_locally_connected2d_layer select type(this_layer => input_layer % p); type is(input2d_layer) call this_layer % set(sample_input) end select + deallocate(sample_input) call locally_connected_1d_layer % forward(input_layer) call locally_connected_1d_layer % get_output(output) @@ -67,11 +68,33 @@ program test_locally_connected2d_layer write(stderr, '(a)') 'locally_connected2d layer with zero input and sigmoid function must forward to all 0.5.. failed' end if + ! Minimal locally_connected_1d layer: 1 channel, 3x3 pixel image, stride = 3; + allocate(sample_input(1, 17)) + sample_input = 0 + + input_layer = input(1, 17) + locally_connected_1d_layer = locally_connected(filters, kernel_size, stride = 3) + call locally_connected_1d_layer % init(input_layer) + + select type(this_layer => input_layer % p); type is(input2d_layer) + call this_layer % set(sample_input) + end select + deallocate(sample_input) + + call locally_connected_1d_layer % forward(input_layer) + call locally_connected_1d_layer % get_output(output) + + if (.not. all(abs(output) < tolerance)) then + ok = .false. + write(stderr, '(a)') 'locally_connected2d layer with zero input and sigmoid function must forward to all 0.5.. failed' + end if + + !Final if (ok) then print '(a)', 'test_locally_connected2d_layer: All tests passed.' else write(stderr, '(a)') 'test_locally_connected2d_layer: One or more tests failed.' stop 1 end if - + end program test_locally_connected2d_layer