The effects of various levels of sampling and quantization on an image were observed. Mean squared error was computed to measure quality of the processed image with respect to original image.
Images were subjected to point operations in order to enhance it. Also, the histogram of images were computed, analyzed and equalized in-order to improve the contrast of the image.
Various spatial filters were applied to the images and the results were analyzed. The images were sharpened in order to enhance the edges.
A modified version of Gaussian filter called Bilateral filter was implemented and the results were compared with Gaussian Filters. Morphological filters like erosion and dilation were applied to the images and tested.
Canny edge detection was implemented and the results were compared with Sobel edge detection. Images were analyzed and filtered based on the noise present in the image.
Deconvolution was performed using inverse, pseudo-inverse and wiener filters. The filters were tested on images with gaussian blur, motion blur and additive gaussian noise.
Run length encoding and Huffman encoding were implemented on a given dataset and the resulting compression effects were observed and analyzed
Block Based Discrete Cosine Transforms were used to analyze images for use in compression. Blurred Images were partially restored using pseudo inverse filters.
Adaptive Thresholding was implemented and the results were compared with Global Thresholding. Corner features in an image were extracted using Harris Corner Detection method.
Block Matching algorithm was implemented to estimate the motion between consecutive frames and compensate the motion in-order to compress the images. Various images were tested with different block sizes and search window sizes.
Sampling is the process by which the values of the image at all the points on the rectangular grid of the image plane are obtained. They can be done at various frequencies/resolutions. However when the sampling rate (resolution) is below the nyquist rate, the information can be lost. Below you can see the picture of mickey mouse at various resolutions ( 640 X 480, 320 X 240, 32 X 24 ). It can be observed that at the resolution of 32 X 24 the figure of mickey mouse is not clear and hence the information has been lost due to aliasing.



#include < stdio.h >
#include < opencv2/opencv.hpp >
using namespace cv;
int main(int argc, char** argv )
{
Mat image;
image = imread( "../images/mickey.jpg", 1 );
cv::Mat resized_image1 = cv::Mat(240, 320, CV_8UC3, cv::Scalar(0, 0, 0)) ;
cv::Mat resized_image2 = cv::Mat(24, 32, CV_8UC3, cv::Scalar(0, 0, 0)) ;
int rows = 600;
int cols = 800;
resize(image,resized_image1,resized_image1.size(),0,0);
resize(image,resized_image2,resized_image2.size(),0,0);
if ( !image.data )
{
printf("No image data \n");
return -1;
}
namedWindow("Original Image", WINDOW_NORMAL);
resizeWindow("Original Image",rows,cols);
imwrite( "../results/sampling/original_image.jpg", image );
imshow("Original Image", image);
namedWindow("320X240 Image", WINDOW_NORMAL );
resizeWindow("320X240 Image",rows,cols);
imwrite( "../results/sampling/320_x_240_image.jpg", resized_image1);
imshow("320X240 Image", resized_image1);
namedWindow("32X24 Image", WINDOW_NORMAL );
resizeWindow("32X24 Image",rows,cols);
imwrite( "../results/sampling/32_x_24_image.jpg", resized_image2 );
imshow("32X24 Image", resized_image2);
waitKey(0);
return 0;
}
Quantization is the process by which the sampled values of the image are approximated based on the desired number of distinct values. As the desired number of distinct values decreases, the quality of the image also decreases. The quality of the images can be determined using the Average Mean Squared Error, which is the average of squared difference of pixel luminance between the original image and the image with reduced number of distinct values. Below you can see the picture of avengers at various quantizations per channel ( 8-bit, 4-bit, 2-bit, 1-bit ). It can be observed that the mean squared error increases as the image is more approximated, resulting in loss of quality.
#include < stdio.h >
#include < opencv2/opencv.hpp >
#include < math.h >
using namespace cv;
Mat quantize(const Mat& input_image,int number_of_bits)
{
Mat quantized_image = input_image.clone();
uchar masked_bit = 0xFF;
masked_bit = masked_bit << (8 - number_of_bits);
for(int j = 0; j < quantized_image.rows; j++)
for(int i = 0; i < quantized_image.cols; i++)
{
Vec3b rgb_value = quantized_image.at< Vec3b >(j, i);
rgb_value[0] = rgb_value[0] & masked_bit;
rgb_value[1] = rgb_value[1] & masked_bit;
rgb_value[2] = rgb_value[2] & masked_bit;
quantized_image.at< Vec3b >(j, i) = rgb_value;
}
return quantized_image;
}
float mean_squared_error(const Mat& image_1, const Mat& image_2)
{
int m = image_1.rows;
int n = image_1.cols;
float mse = 0.0;
for(int j = 0; j < m ; j++)
for(int i = 0; i < n; i++)
{
Vec3b rgb_value_1 = image_1.at< Vec3b >(j, i);
Vec3b rgb_value_2 = image_2.at< Vec3b >(j, i);
mse += abs(rgb_value_1[0]-rgb_value_2[0])^2 + abs(rgb_value_1[1]-rgb_value_2[1])^2 + abs(rgb_value_1[2]-rgb_value_2[2])^2 ;
}
return mse/(m*n*3);
}
int main(int argc, char** argv )
{
Mat image, image_4_bit, image_2_bit, image_1_bit;
image = imread( "../images/avengers.jpg", 1 );
if ( !image.data )
{
printf("No image data \n");
return -1;
}
image_4_bit = quantize(image,4);
image_2_bit = quantize(image,2);
image_1_bit = quantize(image,1);
float mse_4_bit, mse_2_bit, mse_1_bit;
mse_4_bit = mean_squared_error(image,image_4_bit);
mse_2_bit = mean_squared_error(image,image_2_bit);
mse_1_bit = mean_squared_error(image,image_1_bit);
std::ostringstream str_4_bit, str_2_bit,str_1_bit;
str_4_bit << "MSE of 4 Bit Image = " << mse_4_bit;
str_2_bit << "MSE of 2 Bit Image = " << mse_2_bit;
str_1_bit << "MSE of 1 Bit Image = " << mse_1_bit;
namedWindow("Original Image", WINDOW_AUTOSIZE);
imwrite( "../results/quantization/original_image.jpg", image );
imshow("Original Image", image);
namedWindow("4 bit Image", WINDOW_AUTOSIZE);
putText(image_4_bit, str_4_bit.str(), cvPoint(30,30),
FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(200,200,250), 1, CV_AA);
imwrite( "../results/quantization/4_bit_image.jpg", image_4_bit);
imshow("4 bit Image", image_4_bit);
namedWindow("2 bit Image", WINDOW_AUTOSIZE);
putText(image_2_bit, str_2_bit.str(), cvPoint(30,30),
FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(200,200,250), 1, CV_AA);
imwrite( "../results/quantization/2_bit_image.jpg", image_2_bit);
imshow("2 bit Image", image_2_bit);
namedWindow("1 bit Image", WINDOW_AUTOSIZE);
putText(image_1_bit, str_1_bit.str(), cvPoint(30,30),
FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(200,200,250), 1, CV_AA);
imwrite( "../results/quantization/1_bit_image.jpg", image_1_bit);
imshow("1 bit Image", image_1_bit);
waitKey(0);
return 0;
}
Point Operations are memoryless operations that functionally maps the intensity levels of an image to another level. This can be used to enhance the appearance of the image and feature extraction. Some of the point operations done on the image of Einstein are shown below.
RGB images can be converted to grayscale by taking a proportion of the value in each color channel.
Inverse operation gives the negative of the image.
Log operations ,on the image intensities, non-linearly transforms the image .
Gamma correction can be done to compensate for the non-linearities in brightness of various displays.
#include < stdio.h >
#include < opencv2/opencv.hpp >
#include < math.h >
#define R_WEIGHT 0.2989
#define G_WEIGHT 0.5870
#define B_WEIGHT 0.1140
#define TRANSFORMATION_CONSTANT 25
#define GAMMA 0.95
using namespace cv;
Mat convert_to_grayscale(const Mat& input_image)
{
Mat rgb_image = input_image.clone();
Mat grayscale_image = cv::Mat(rgb_image.rows, rgb_image.cols, CV_8UC1, cv::Scalar(0, 0, 0));
for(int j = 0; j < grayscale_image.rows; j++)
for(int i = 0; i < grayscale_image.cols; i++)
{
Vec3b rgb_value = rgb_image.at< Vec3b >(j, i);
grayscale_image.at< uchar >(j,i) = R_WEIGHT*rgb_value[0] + G_WEIGHT*rgb_value[1] + B_WEIGHT*rgb_value[2] ;
}
return grayscale_image;
}
Mat log_transformation(const Mat& input_image)
{
Mat log_image = input_image.clone();
for(int j = 0; j < log_image.rows ; j++)
for(int i = 0; i < log_image.cols; i++)
{
log_image.at< uchar >(j,i) = TRANSFORMATION_CONSTANT*log(log_image.at< uchar >(j,i)+1);
}
return log_image;
}
Mat inverse_transformation(const Mat& input_image)
{
Mat inverse_image = input_image.clone();
for(int j = 0; j < inverse_image.rows ; j++)
for(int i = 0; i < inverse_image.cols; i++)
{
inverse_image.at< uchar >(j,i) = 255 - inverse_image.at< uchar >(j,i);
}
return inverse_image;
}
Mat gamma_correction(const Mat& input_image)
{
Mat gamma_corrected_image = input_image.clone();
for(int j = 0; j < gamma_corrected_image.rows ; j++)
for(int i = 0; i < gamma_corrected_image.cols; i++)
{
gamma_corrected_image.at< uchar >(j,i) = pow( gamma_corrected_image.at< uchar >(j,i) , GAMMA );
}
return gamma_corrected_image;
}
int main(int argc, char** argv )
{
Mat image, grayscale_image, log_image, inverse_image, gamma_corrected_image;
image = imread( "../images/einstein.jpg", 1 );
if ( !image.data )
{
printf("No image data \n");
return -1;
}
grayscale_image = convert_to_grayscale(image);
inverse_image = inverse_transformation(grayscale_image);
log_image = log_transformation(grayscale_image);
gamma_corrected_image = gamma_correction(grayscale_image);
namedWindow("RGB Image", WINDOW_AUTOSIZE);
imwrite( "../results/point_transformations/rgb_image.jpg", image );
imshow("RGB Image", image);
namedWindow("Grayscale Image", WINDOW_AUTOSIZE);
imwrite( "../results/point_transformations/grayscale_image.jpg", grayscale_image );
imshow("Grayscale Image", grayscale_image);
namedWindow("Inverse Image", WINDOW_AUTOSIZE);
imwrite( "../results/point_transformations/inverse_image.jpg", inverse_image );
imshow("Inverse Image", inverse_image);
namedWindow("Log Image", WINDOW_AUTOSIZE);
imwrite( "../results/point_transformations/log_image.jpg", log_image );
imshow("Log Image", log_image);
namedWindow("Gamma Corrected Image", WINDOW_AUTOSIZE);
imwrite( "../results/point_transformations/gamma_corrected_image.jpg", gamma_corrected_image );
imshow("Gamma Corrected Image", gamma_corrected_image);
waitKey(0);
return 0;
}
Histogram gives the number of occurences of various intensity levels in an image. It can be used to determine the quality of the image. For an image to be visually appealing, the histogram has to be spread apart rather than being concentrated in a narrow range. An image with balanced histogram would be of high contrast, hence more attractive. Images with unbalanced histograms can be equalized in order to increase the contrast. However the side effect of equalization is that a few features might be lost. In this case, the house has disappeared.


#include < opencv2/opencv.hpp >
#include < stdio.h >
#define R_WEIGHT 0.2989
#define G_WEIGHT 0.5870
#define B_WEIGHT 0.1140
using namespace cv;
Mat convert_to_grayscale(const Mat& input_image)
{
Mat rgb_image = input_image.clone();
Mat grayscale_image = cv::Mat(rgb_image.rows, rgb_image.cols, CV_8UC1, cv::Scalar(0, 0, 0));
for(int j = 0; j < grayscale_image.rows; j++)
for(int i = 0; i < grayscale_image.cols; i++)
{
Vec3b rgb_value = rgb_image.at< Vec3b >(j, i);
grayscale_image.at< uchar >(j,i) = R_WEIGHT*rgb_value[0] + G_WEIGHT*rgb_value[1] + B_WEIGHT*rgb_value[2] ;
}
return grayscale_image;
}
void compute_histogram(const Mat& input_image, int histogram[])
{
for (int i = 0 ; i <256 ; i++)
histogram[i] = 0;
// Store the frequency of intensities
for(int j = 0; j < input_image.rows; j++)
for(int i = 0; i < input_image.cols; i++)
{
histogram[input_image.at< uchar >(j,i)]++;
}
}
void display_histogram(int histogram[], const char* name)
{
int hist[256];
for(int i = 0; i < 256; i++)
{
hist[i]=histogram[i];
}
// draw the histograms
int hist_w = 512; int hist_h = 400;
int bin_w = cvRound((double) hist_w/256);
Mat histogram_image(hist_h, hist_w, CV_8UC1, Scalar(255, 255, 255));
// find the maximum intensity element from histogram
int max = hist[0];
for(int i = 1; i < 256; i++){
if(max < hist[i]){
max = hist[i];
}
}
// normalize the histogram between 0 and histImage.rows
for(int i = 0; i < 256; i++){
hist[i] = ((double)hist[i]/max)*histogram_image.rows;
}
// draw the intensity line for histogram
for(int i = 0; i < 256; i++)
{
line(histogram_image, Point(bin_w*(i), hist_h),
Point(bin_w*(i), hist_h - hist[i]),
Scalar(0,0,0), 1, 8, 0);
}
// display histogram
namedWindow(name, CV_WINDOW_AUTOSIZE);
imshow(name, histogram_image);
std::ostringstream display_string ;
display_string<<"../results/histogram_equalization/"<< name <<".jpg";
imwrite( display_string.str(), histogram_image );
}
void compute_cumulative_histogram(int histogram[], int cumulative_histogram[])
{
cumulative_histogram[0] = histogram[0];
for(int j = 1; j < 256 ; j++)
{
cumulative_histogram[j] = histogram[j]+cumulative_histogram[j-1];
}
}
int scale_histogram(int cumulative_histogram[],int scaled_histogram[], float scaling_factor)
{
for(int j = 0; j < 256 ; j++)
{
scaled_histogram[j] = cvRound(cumulative_histogram[j]*scaling_factor);
}
}
Mat equalize_image(const Mat& input_image, int histogram[])
{
int cumulative_histogram[256], scaled_histogram[256];
int image_size = input_image.rows*input_image.cols;
// Compute alpha (scaling factor) based on total number of pixels and maximum intensity
float scaling_factor = 255.0/image_size;
// Compute probability of each intensity by normalizing the histogram
double intensity_probability[256];
for(int i = 0; i < 256 ; i++)
{
intensity_probability[i] = histogram[i]/image_size;
}
// Compute the cumulative histogram by adding all values of lower intensities
compute_cumulative_histogram(histogram, cumulative_histogram);
// Scaled the cumulative histogram by the scaling factor
scale_histogram(cumulative_histogram, scaled_histogram, scaling_factor);
Mat equalized_image = input_image.clone();
// Equalize the image by looking at the value from scaled histogram at the intensity
// level at the corresponding pixel
for(int j = 0; j < equalized_image.rows; j++)
for(int i = 0; i < equalized_image.cols; i++)
{
equalized_image.at< uchar >(j,i) = saturate_cast< uchar >(scaled_histogram[equalized_image.at< uchar >(j,i)]);
}
return equalized_image;
}
int main(int argc, char** argv )
{
Mat rgb_image, grayscale_image, equalized_image;
rgb_image = imread( "../images/trees.jpg", 1 );
if ( !rgb_image.data )
{
printf("No image data \n");
return -1;
}
int histogram[256], equalized_histogram[256];
grayscale_image = convert_to_grayscale(rgb_image);
compute_histogram(grayscale_image, histogram);
equalized_image = equalize_image(grayscale_image, histogram);
compute_histogram(equalized_image, equalized_histogram);
namedWindow("Original Image", WINDOW_AUTOSIZE);
imwrite( "../results/histogram_equalization/original_image.jpg", rgb_image );
imshow("Original Image", grayscale_image);
display_histogram(histogram, "original_histogram");
namedWindow("Equalized Image", WINDOW_AUTOSIZE);
imwrite( "../results/histogram_equalization/equalized_image.jpg", equalized_image );
imshow("Equalized Image", equalized_image);
display_histogram(equalized_histogram, "equalized_histogram");
waitKey(0);
return 0;
}
These filters are basically used for enhancing the images by removing the noise. This process can also be called as smoothing. Spatial filters take into account only the distance between neighbouring pixels and not the value of the pixels. These filters can either be linear (like Mean Filters, Gaussian Filters) or non-linear (like Median Filters). Linear filters/kernels are usually applied to an image using convolution. The results of application of spatial filters, with kernel size of 7 X 7, to noisy images are shown below.

An averaging kernel is convolved with the image. Although the noise has reduced, the resulting image is not good enough.
The gaussian kernel gives more importance to closer pixels than distant ones. When such a filter is convolved with images with salt and pepper noise, the results are not good.
This is a non linear filter, which takes into the account only the median of the neighbouring pixels. As seen below, this filter has almost perfectly restored the noisy image.
#include < iostream >
#include < opencv2/opencv.hpp >
#include < math.h >
#include < stdlib.h >
#define PI 3.14159
#define R_WEIGHT 0.2989
#define G_WEIGHT 0.5870
#define B_WEIGHT 0.1140
using namespace cv;
void swap(int & a1,int & a2)
{
a1 = a1 + a2;
a2 = a1 - a2;
a1 = a1 - a2;
}
void merge(int A[ ] , int start, int mid, int end) {
//stores the starting position of both parts in temporary variables.
int p = start ,q = mid+1;
int Arr[end-start+1] , k=0;
for(int i = start ;i <= end ;i++) {
if(p > mid) //checks if first part comes to an end or not .
Arr[ k++ ] = A[ q++] ;
else if ( q > end) //checks if second part comes to an end or not
Arr[ k++ ] = A[ p++ ];
else if( A[ p ] < A[ q ]) //checks which part has smaller element.
Arr[ k++ ] = A[ p++ ];
else
Arr[ k++ ] = A[ q++];
}
for (int p=0 ; p< k ;p ++) {
/* Now the real array has elements in sorted manner including both
parts.*/
A[ start++ ] = Arr[ p ] ;
}
}
void merge_sort (int A[ ] , int start , int end ) {
if( start < end ) {
int mid = (start + end ) / 2 ; // defines the current array in 2 parts .
merge_sort (A, start , mid ) ; // sort the 1st part of array .
merge_sort (A,mid+1 , end ) ; // sort the 2nd part of array.
// merge the both parts by comparing elements of both the parts.
merge(A,start , mid , end );
}
}
Mat convert_to_grayscale(const Mat& input_image)
{
Mat rgb_image = input_image.clone();
Mat grayscale_image = cv::Mat(rgb_image.rows, rgb_image.cols, CV_8UC1, cv::Scalar(0, 0, 0));
for(int j = 0; j < grayscale_image.rows; j++)
for(int i = 0; i < grayscale_image.cols; i++)
{
Vec3b rgb_value = rgb_image.at< Vec3b >(j, i);
grayscale_image.at< uchar >(j,i) = R_WEIGHT*rgb_value[0] + G_WEIGHT*rgb_value[1] + B_WEIGHT*rgb_value[2] ;
}
return grayscale_image;
}
Mat image_padding(const Mat& input_image, int offset)
{
Mat padded_image = Mat(input_image.rows+2*offset, input_image.cols+2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
padded_image.at< uchar >(j+offset,i+offset) = input_image.at< uchar >(j,i);
}
return padded_image;
}
Mat image_depadding(const Mat& input_image, int offset)
{
Mat depadded_image = Mat(input_image.rows-2*offset, input_image.cols-2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows-2*offset ; j++)
for(int i = 0; i < input_image.cols-2*offset; i++)
{
depadded_image.at< uchar >(j,i) = input_image.at< uchar >(j+offset,i+offset);
}
return depadded_image;
}
Mat convolve(const Mat& input_image, const Mat& kernel)
{
int kernel_size = kernel.rows;
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat flipped_kernel = Mat(kernel.rows, kernel.cols, CV_32F, 0.0);
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
flipped_kernel.at< float >(m,n) = kernel.at< float >(kernel_size-m-1,kernel_size-n-1);
}
Mat convolved_image = Mat(padded_image.rows, padded_image.cols, CV_8UC1, 0.0);
float value = 0.0;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
value += (padded_image.at< uchar >(j+m-offset,i+n-offset))*flipped_kernel.at< float >(m,n);
}
convolved_image.at< uchar >(j,i) = (int)value;
value = 0;
}
Mat depadded_image = image_depadding(convolved_image, offset);
return depadded_image;
}
Mat gaussian_filter(const Mat& input_image, int kernel_size, float sigma)
{
Mat kernel = Mat(kernel_size, kernel_size, CV_32F, 0.0);
int k;
if(kernel_size % 2 != 0)
{
k = (kernel_size-1)/2;
}
else
{
k = (kernel_size)/2;
}
float value = 0.0,value2 = 0.0;
// Create the Gaussian Kernel
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = (float)( 1.0 /( 2.0*PI*pow(sigma,2) ) ) * exp(-1.0* ( pow(i+1-(k+1),2) + pow(j+1-(k+1),2) ) / ( 2.0 * pow(sigma,2) ) ) ;
value += kernel.at< float >(j,i);
}
// Normalize kernel , so that the sum of all elements in the kernel is 1
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = kernel.at< float >(j,i)/value ;
}
Mat filtered_image = convolve(input_image, kernel);
return filtered_image;
}
Mat mean_filter(const Mat& input_image, int kernel_size)
{
Mat kernel = Mat(kernel_size, kernel_size, CV_32F, 0.0);
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = ( 1.0 / pow(kernel_size,2)) ;
}
Mat filtered_image = convolve(input_image, kernel);
return filtered_image;
}
Mat median_filter(const Mat& input_image, int kernel_size)
{
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat filtered_image = Mat(padded_image.rows, padded_image.cols, CV_8UC1, 0.0);
int size = kernel_size*kernel_size;
int neighborhood[size];
int k;
int mid ;
if(kernel_size % 2 != 0)
{
mid = (size+1)/2-1;
}
else
{
mid = (size)/2-1;
}
float value = 0.0;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
k = 0;
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
neighborhood[k] = padded_image.at< uchar >(j+m-offset,i+n-offset) ;
k++;
}
merge_sort(neighborhood,0,size-1);
filtered_image.at< uchar >(j,i) = neighborhood[mid];
}
Mat depadded_image = image_depadding(filtered_image, offset);
return depadded_image;
}
int main(int argc, char** argv )
{
Mat image, grayscale_image, gaussian_filtered_image, mean_filtered_image, median_filtered_image;
image = imread( "../images/balloons.png", 1 );
if ( !image.data )
{
printf("No image data \n");
return -1;
}
grayscale_image = convert_to_grayscale(image);
gaussian_filtered_image = gaussian_filter(grayscale_image,7,1.4);
mean_filtered_image = mean_filter(grayscale_image,7);
median_filtered_image = median_filter(grayscale_image,7);
namedWindow("Original Image", WINDOW_AUTOSIZE);
imwrite( "../results/filter/grayscale_image.jpg", grayscale_image );
imshow("Original Image", grayscale_image);
namedWindow("Gaussian Filtered Image", WINDOW_AUTOSIZE);
imwrite( "../results/filter/gaussian_filtered_image.jpg", gaussian_filtered_image );
imshow("Gaussian Filtered Image", gaussian_filtered_image );
namedWindow("Mean Filtered Image", WINDOW_AUTOSIZE);
imwrite( "../results/filter/mean_filtered_image.jpg", mean_filtered_image );
imshow("Mean Filtered Image", mean_filtered_image);
namedWindow("Median Filtered Image2", WINDOW_AUTOSIZE);
imwrite( "../results/filter/median_filtered_image.jpg", median_filtered_image );
imshow("Median Filtered Image", median_filtered_image);
waitKey(0);
return 0;
}
Sharpening is the process by which the edges of an image are enhanced. It is basically a 3 step process. The first step involves low pass filtering an image using either an averaging or gaussian filter. Then the resultant image is subtracted from the input image, which is equivalent to high pass filtering an image. A proportion of this high pass filtered image , as determined by alpha, is added to the input image. This results in a sharpened image.
#include < iostream >
#include < opencv2/opencv.hpp >
#include < math.h >
#include < stdlib.h >
#define PI 3.14159
#define R_WEIGHT 0.2989
#define G_WEIGHT 0.5870
#define B_WEIGHT 0.1140
using namespace cv;
Mat convert_to_grayscale(const Mat& input_image)
{
Mat rgb_image = input_image.clone();
Mat grayscale_image = cv::Mat(rgb_image.rows, rgb_image.cols, CV_8UC1, cv::Scalar(0, 0, 0));
for(int j = 0; j < grayscale_image.rows; j++)
for(int i = 0; i < grayscale_image.cols; i++)
{
Vec3b rgb_value = rgb_image.at< Vec3b >(j, i);
grayscale_image.at< uchar >(j,i) = R_WEIGHT*rgb_value[0] + G_WEIGHT*rgb_value[1] + B_WEIGHT*rgb_value[2] ;
}
return grayscale_image;
}
Mat sharpen(const Mat& input_image, int kernel_size, float alpha)
{
Mat low_pass_filtered_image, high_pass_filtered_image, sharpened_image;
low_pass_filtered_image = input_image.clone();
//Low pass filter the image using Gaussian
blur( input_image, low_pass_filtered_image, Size( kernel_size, kernel_size ));
high_pass_filtered_image = input_image.clone();
sharpened_image = input_image.clone();
for(int j = 0; j < input_image.rows; j++)
for(int i = 0; i < input_image.cols; i++)
{
//High pass filter the image by subtracting input_image from low pass filtered image and saturate the output
high_pass_filtered_image.at< uchar >(j,i) = saturate_cast< uchar >(input_image.at< uchar >(j,i) - low_pass_filtered_image.at< uchar >(j,i));
//Sharpen the image by doing weighted addition of the high pass filtered image to the input image and saturate the output
sharpened_image.at< uchar >(j,i) = saturate_cast< uchar >(input_image.at< uchar >(j,i) + alpha*high_pass_filtered_image.at< uchar >(j,i));
}
namedWindow("HPF Image", WINDOW_AUTOSIZE);
imwrite( "../results/sharpening/high_pass_filtered_image.jpg", high_pass_filtered_image);
imshow("HPF Image", high_pass_filtered_image);
namedWindow("LPF Image", WINDOW_AUTOSIZE);
imwrite( "../results/sharpening/low_pass_filtered_image.jpg", low_pass_filtered_image);
imshow("LPF Image", low_pass_filtered_image);
return sharpened_image;
}
int main(int argc, char** argv )
{
Mat image, grayscale_image, sharpened_image, sharpened_image2, sharpened_image3;
image = imread( "../images/moon.png", 1 );
if ( !image.data )
{
printf("No image data \n");
return -1;
}
grayscale_image = convert_to_grayscale(image);
sharpened_image = sharpen(grayscale_image,9,0.5);
namedWindow("Original Image", WINDOW_AUTOSIZE);
imwrite( "../results/sharpening/grayscale_image.jpg", grayscale_image );
imshow("Original Image", grayscale_image);
namedWindow("Sharpened Image", WINDOW_AUTOSIZE);
imwrite( "../results/sharpening/sharpened_image.jpg", sharpened_image );
imshow("Sharpened Image", sharpened_image);
waitKey(0);
return 0;
}
This is a modification of gaussian filter. This filter utilizes both the spatial difference and the pixel difference. Hence this filter would be able to remove noise while enhancing the edges. This filter is non-linear in nature due to additional range weight component that is muliplied with the gaussian/spatial weight. Hence computation cannot be done in the frequency domain. From the results below it can be seen that the Bilateral Filter outperforms Gaussian Filter. The bilateral filter smoothens the entire image except at the edges




#include < iostream >
#include < opencv2/opencv.hpp >
using namespace cv;
Mat image_padding(const Mat& input_image, int offset)
{
Mat padded_image = Mat(input_image.rows+2*offset, input_image.cols+2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
padded_image.at< uchar >(j+offset,i+offset) = input_image.at< uchar >(j,i);
}
return padded_image;
}
Mat image_depadding(const Mat& input_image, int offset)
{
Mat depadded_image = Mat(input_image.rows-2*offset, input_image.cols-2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows-2*offset ; j++)
for(int i = 0; i < input_image.cols-2*offset; i++)
{
depadded_image.at< uchar >(j,i) = input_image.at< uchar >(j+offset,i+offset);
}
return depadded_image;
}
Mat bilateral_convolve(const Mat& input_image, const Mat& gaussian_kernel, float sigma_r)
{
int kernel_size = gaussian_kernel.rows;
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat convolved_image = Mat(padded_image.rows, padded_image.cols, CV_8UC1, 0.0);
float value, range_weight, weight, cumulative_weight;
int pixel_intensity, neighboring_pixel_intensity ;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
int pixel_intensity = padded_image.at< uchar >(j,i);
cumulative_weight = 0.0;
value = 0.0;
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
neighboring_pixel_intensity = padded_image.at< uchar >(j+m-offset,i+n-offset);
range_weight = (float) exp(-1.0* ( pow(pixel_intensity - neighboring_pixel_intensity,2) ) / ( 2.0 * pow(sigma_r,2) ) );
weight = gaussian_kernel.at< float >(m,n) * range_weight ;
value += neighboring_pixel_intensity*weight;
cumulative_weight += weight ;
//std::cout<(m,n)<(j,i) = (int) (value/cumulative_weight);
}
Mat depadded_image = image_depadding(convolved_image, offset);
return depadded_image;
}
Mat bilateral_filter(const Mat& input_image, int kernel_size, float sigma_g, float sigma_r)
{
Mat kernel = Mat(kernel_size, kernel_size, CV_32F, 0.0);
int k;
if(kernel_size % 2 != 0)
{
k = (kernel_size-1)/2;
}
else
{
k = (kernel_size)/2;
}
// Create the Gaussian Kernel
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = (float) exp(-1.0* ( pow(i+1-(k+1),2) + pow(j+1-(k+1),2) ) / ( 2.0 * pow(sigma_g,2) ) ) ; // ( 1.0 /( 2.0*PI*pow(sigma_g,2) ) ) *
}
Mat filtered_image = bilateral_convolve(input_image, kernel, sigma_r);
return filtered_image;
}
int main(int argc, char** argv )
{
Mat image1,image2, grayscale_image1,grayscale_image2, bilaterally_filtered_image1,bilaterally_filtered_image2, gaussian_filtered_image1,gaussian_filtered_image2;
image1 = imread( "../images/bike.jpg", 1 );
image2 = imread( "../images/tajmahal.jpg", 1 );
if ( !image1.data || !image2.data )
{
printf("No image data \n");
return -1;
}
cvtColor( image1, grayscale_image1, CV_BGR2GRAY );
GaussianBlur(grayscale_image1, gaussian_filtered_image1, Size(7,7),0,0);
bilaterally_filtered_image1 = bilateral_filter(grayscale_image1,7,15,10);
cvtColor( image2, grayscale_image2, CV_BGR2GRAY );
GaussianBlur(grayscale_image2, gaussian_filtered_image2, Size(7,7),0,0);
bilaterally_filtered_image2 = bilateral_filter(grayscale_image2,7,15,10);
namedWindow("Original Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/grayscale_image1.jpg", grayscale_image1 );
imshow("Original Image 1", grayscale_image1);
namedWindow("Bilaterally Filtered Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/bilaterally_filtered_image1.jpg", bilaterally_filtered_image1 );
imshow("Bilaterally Filtered Image 1", bilaterally_filtered_image1 );
namedWindow("Gaussian Filtered Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/gaussian_filtered_image1.jpg", gaussian_filtered_image1);
imshow("Gaussian Filtered Image 1", gaussian_filtered_image1);
namedWindow("Original Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/grayscale_image2.jpg", grayscale_image2 );
imshow("Original Image 2", grayscale_image2);
namedWindow("Bilaterally Filtered Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/bilaterally_filtered_image2.jpg", bilaterally_filtered_image2 );
imshow("Bilaterally Filtered Image 2", bilaterally_filtered_image2 );
namedWindow("Gaussian Filtered Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/bilateral_filter/gaussian_filtered_image2.jpg", gaussian_filtered_image2 );
imshow("Gaussian Filtered Image 2", gaussian_filtered_image2 );
waitKey(0);
return 0;
}
These are non-linear filters, that filter images based on shapes. Dilation computes the maximum value of all the pixels in the neighborhood. Erosion computes the minimum value of all the pixels in the neighborhood. Depending on whether the edges are black or white, both these operations may be used to make the edges thinner or thicker. In the images below, the black edges are made thicker by erosion and are made thinner by dilation. These kind of operations are useful in segmentation.
#include < iostream >
#include < opencv2/opencv.hpp >
using namespace cv;
void merge(int A[ ] , int start, int mid, int end) {
//stores the starting position of both parts in temporary variables.
int p = start ,q = mid+1;
int Arr[end-start+1] , k=0;
for(int i = start ;i <= end ;i++) {
if(p > mid) //checks if first part comes to an end or not .
Arr[ k++ ] = A[ q++] ;
else if ( q > end) //checks if second part comes to an end or not
Arr[ k++ ] = A[ p++ ];
else if( A[ p ] < A[ q ]) //checks which part has smaller element.
Arr[ k++ ] = A[ p++ ];
else
Arr[ k++ ] = A[ q++];
}
for (int p=0 ; p< k ;p ++) {
/* Now the real array has elements in sorted manner including both
parts.*/
A[ start++ ] = Arr[ p ] ;
}
}
void merge_sort (int A[ ] , int start , int end ) {
if( start < end ) {
int mid = (start + end ) / 2 ; // defines the current array in 2 parts .
merge_sort (A, start , mid ) ; // sort the 1st part of array .
merge_sort (A,mid+1 , end ) ; // sort the 2nd part of array.
// merge the both parts by comparing elements of both the parts.
merge(A,start , mid , end );
}
}
Mat image_padding(const Mat& input_image, int offset)
{
Mat padded_image = Mat(input_image.rows+2*offset, input_image.cols+2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
padded_image.at< uchar >(j+offset,i+offset) = input_image.at< uchar >(j,i);
}
return padded_image;
}
Mat image_depadding(const Mat& input_image, int offset)
{
Mat depadded_image = Mat(input_image.rows-2*offset, input_image.cols-2*offset, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows-2*offset ; j++)
for(int i = 0; i < input_image.cols-2*offset; i++)
{
depadded_image.at< uchar >(j,i) = input_image.at< uchar >(j+offset,i+offset);
}
return depadded_image;
}
Mat morphological_filter(const Mat& input_image, int kernel_size,std::string type )
{
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat filtered_image = Mat(padded_image.rows, padded_image.cols, CV_8UC1, 0.0);
int size = kernel_size*kernel_size;
int neighborhood[size];
int k;
float value = 0.0;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
k = 0;
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
neighborhood[k] = padded_image.at< uchar >(j+m-offset,i+n-offset) ;
k++;
}
merge_sort(neighborhood,0,size-1);
if(type == "erosion")
filtered_image.at< uchar >(j,i) = neighborhood[0];
else if(type == "dilation")
filtered_image.at< uchar >(j,i) = neighborhood[k-1];
}
Mat depadded_image = image_depadding(filtered_image, offset);
return depadded_image;
}
int main(int argc, char** argv )
{
Mat image1, grayscale_image1, eroded_image1, dilated_image1,image2, grayscale_image2, eroded_image2, dilated_image2;
image1 = imread( "../images/letter_a.jpg", 1 );
image2 = imread( "../images/fingerprint.png", 1 );
if ( !image1.data || !image2.data )
{
printf("No image data \n");
return -1;
}
cvtColor( image1, grayscale_image1, CV_BGR2GRAY );
eroded_image1 = morphological_filter(grayscale_image1,3,"erosion");
dilated_image1 = morphological_filter(grayscale_image1,3,"dilation");
cvtColor( image2, grayscale_image2, CV_BGR2GRAY );
eroded_image2 = morphological_filter(grayscale_image2,3,"erosion");
dilated_image2 = morphological_filter(grayscale_image2,3,"dilation");
namedWindow("Original Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/grayscale_image1.jpg", grayscale_image1 );
imshow("Original Image 1", grayscale_image1);
namedWindow("Eroded Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/eroded_image1.jpg", eroded_image1 );
imshow("Eroded Image 1", eroded_image1 );
namedWindow("Dilated Image 1", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/dilated_image1.jpg", dilated_image1 );
imshow("Dilated Image 1", dilated_image1);
namedWindow("Original Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/grayscale_image2.jpg", grayscale_image2 );
imshow("Original Image 2", grayscale_image2);
namedWindow("Eroded Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/eroded_image2.jpg", eroded_image2 );
imshow("Eroded Image 2", eroded_image2 );
namedWindow("Dilated Image 2", WINDOW_AUTOSIZE);
imwrite( "../results/morphological_filter/dilated_image2.jpg", dilated_image2 );
imshow("Dilated Image 2", dilated_image2 );
waitKey(0);
return 0;
}
Provided below are some images from which the type of noise in it is analyzed and a suitable filtering technique is chosen and applied to remove the noise.

The above image is very bright. In order to make the features visible, the contrast has to be enhanced. This can be done by equalizing its histogram so that the distribution of intensities of the image is spread across the spectrum rather than being concentrated at a particular intensity

The above image is very dark and similiar to the previous example, the contrast has to be enhanced by histogram equalization.
The equalized image is a bit noisy and has small/continous perturbations. A gaussian filter can be applied to the above image in order smoothen the image and reduce the noise

The noise in the above image can be classified as salt and pepper noise. This kind of noise is characterized by random sprinkle of extreme intensities across the image. In order to remove such noise, a non-linear filter like Median filter is used,
Source : http://cnx.org/resources/4181ddf62e3a2047d36357f16180ce247b094532/plane_noise1.png
The above image has salt and pepper noise as well. A median filter can remove it easily.
Source : en.wikipedia.org/wiki/File:Phase_correlation.png

This is a multi-step edge detection algorithm that typically uses and optimizes other edge detection operators such as Prewitt, Robert and Sobel. First the input image is smoothened with a gaussian filter. Then an edge detection operator is applied to the image and the intensity gradient and direction is computed. Based on the gradient and its direction non maximum suppression is applied to make the edges thinner. Strong and weak edges from the resulting image are identified by double thresholding it. The weak edges are then tracked to see if its connected to other strong edges, in which case the weak edge will now be considered as strong edge or will be eliminated otherwise. When comparing this method of edge detection to the traditional edge detection operator's like Sobel, it can be seen from the examples below that the Canny edge detector gives out thinner edges and lesser false edges than the Sobel edge detector.
#include < iostream >
#include < opencv2/opencv.hpp >
#include < math.h >
#include < string >
#define PI 3.14159
#define THRESHOLD 35
#define INTERPOLATION true
#define STRONG_EDGE 2
#define WEAK_EDGE 1
#define NOT_EDGE 0
#define THRESHOLD_RATIO_H 0.24
#define THRESHOLD_RATIO_L 0.25
using namespace cv;
Mat image_padding(const Mat& input_image, int offset)
{
Mat padded_image = Mat(input_image.rows+2*offset, input_image.cols+2*offset, CV_32F, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
padded_image.at< float >(j+offset,i+offset) = input_image.at< uchar >(j,i);
}
return padded_image;
}
Mat image_depadding(const Mat& input_image, int offset)
{
Mat depadded_image = Mat(input_image.rows-2*offset, input_image.cols-2*offset, CV_32F, 0.0);
for(int j = 0; j < input_image.rows-2*offset ; j++)
for(int i = 0; i < input_image.cols-2*offset; i++)
{
depadded_image.at< float >(j,i) = input_image.at< float >(j+offset,i+offset);
}
return depadded_image;
}
Mat convolve(const Mat& input_image, const Mat& kernel)
{
int kernel_size = kernel.rows;
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat flipped_kernel = Mat(kernel.rows, kernel.cols, CV_32F, 0.0);
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
flipped_kernel.at< float >(m,n) = kernel.at< float >(kernel_size-m-1,kernel_size-n-1);
}
Mat convolved_image = Mat(padded_image.rows, padded_image.cols, CV_32F, 0.0);
float value = 0.0;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
value += (padded_image.at< float >(j+m-offset,i+n-offset))*flipped_kernel.at< float >(m,n);
}
convolved_image.at< float >(j,i) = value;
value = 0;
}
Mat depadded_image = image_depadding(convolved_image, offset);
return depadded_image;
}
Mat get_image_gradient(const Mat& input_image, const Mat& horizontal_image, const Mat& vertical_image)
{
Mat filtered_image = Mat(input_image.rows, input_image.cols, CV_8UC1, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
filtered_image.at< uchar >(j,i) = (( sqrt( pow(horizontal_image.at< float >(j,i),2) + pow(vertical_image.at< float >(j,i),2)))) ;
}
return filtered_image;
}
Mat get_image_gradient_direction(const Mat& input_image, const Mat& horizontal_image, const Mat& vertical_image)
{
Mat filtered_image = Mat(input_image.rows, input_image.cols, CV_32F, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
filtered_image.at< float >(j,i) =(float)(atan2( vertical_image.at< float >(j,i), horizontal_image.at< float >(j,i))*180/PI);
}
return filtered_image;
}
void threshold_image(const Mat& input_image, Mat& thresholded_image)
{
thresholded_image = input_image.clone();
for(int j = 0; j < input_image.rows; j++)
for(int i =0; i < input_image.cols; i++)
{
if (input_image.at< uchar >(j,i) >= THRESHOLD)
thresholded_image.at< uchar >(j,i) = 255;
else
thresholded_image.at< uchar >(j,i) = 0;
}
}
Mat non_maximum_suppression(const Mat& image_gradient,const Mat& gradient_direction, const Mat& horizontal_image, const Mat& vertical_image)
{
Mat nms_image = image_gradient.clone();
for(int j = 1; j < image_gradient.rows-1; j++)
for(int i =1; i < image_gradient.cols-1; i++)
{
float angle = gradient_direction.at< float >(j,i);
float top_elements[2];
float bottom_elements[2];
float ratio;
float current_gradient = image_gradient.at< uchar >(j,i) ;
float bottom_interpolation,top_interpolation;
if(INTERPOLATION)
{
if( (angle >= 0 && angle <= 45) || (angle < -135 && angle >= -180))
{
bottom_elements[0] = image_gradient.at< uchar >(j,i+1);
bottom_elements[1] = image_gradient.at< uchar >(j+1,i+1);
top_elements[0] = image_gradient.at< uchar >(j,i-1);
top_elements[1] = image_gradient.at< uchar >(j-1,i-1);
ratio = abs(vertical_image.at< float >(j,i)/current_gradient);
bottom_interpolation = (bottom_elements[1] - bottom_elements[0])*ratio +bottom_elements[0];
top_interpolation = (top_elements[1] - top_elements[0])*ratio +top_elements[0];
if(current_gradient < bottom_interpolation ||
current_gradient < top_interpolation )
{nms_image.at< uchar >(j,i) = 0;}
}
else if( (angle > 45 && angle <= 90) || (angle < -90 && angle >= -135))
{
bottom_elements[0] = image_gradient.at< uchar >(j+1,i);
bottom_elements[1] = image_gradient.at< uchar >(j+1,i+1);
top_elements[0] = image_gradient.at< uchar >(j-1,i);
top_elements[1] = image_gradient.at< uchar >(j-1,i-1);
ratio = abs(horizontal_image.at< float >(j,i)/current_gradient);
bottom_interpolation = (bottom_elements[1] - bottom_elements[0])*ratio +bottom_elements[0];
top_interpolation = (top_elements[1] - top_elements[0])*ratio +top_elements[0];
if(current_gradient < bottom_interpolation ||
current_gradient < top_interpolation )
{nms_image.at< uchar >(j,i) = 0; }
}
else if( (angle > 90 && angle <= 135) || (angle < -45 && angle >= -90))
{
bottom_elements[0] = image_gradient.at< uchar >(j+1,i);
bottom_elements[1] = image_gradient.at< uchar >(j+1,i-1);
top_elements[0] = image_gradient.at< uchar >(j-1,i);
top_elements[1] = image_gradient.at< uchar >(j-1,i+1);
ratio = abs(horizontal_image.at< float >(j,i)/current_gradient);
bottom_interpolation = (bottom_elements[1] - bottom_elements[0])*ratio +bottom_elements[0];
top_interpolation = (top_elements[1] - top_elements[0])*ratio +top_elements[0];
if(current_gradient < bottom_interpolation ||
current_gradient < top_interpolation )
{nms_image.at< uchar >(j,i) = 0; }
}
else if( (angle > 135 && angle <= 180) || (angle < 0 && angle >= -45))
{
bottom_elements[0] = image_gradient.at< uchar >(j,i-1);
bottom_elements[1] = image_gradient.at< uchar >(j+1,i-1);
top_elements[0] = image_gradient.at< uchar >(j,i+1);
top_elements[1] = image_gradient.at< uchar >(j-1,i+1);
ratio = abs(horizontal_image.at< float >(j,i)/current_gradient);
bottom_interpolation = (bottom_elements[1] - bottom_elements[0])*ratio +bottom_elements[0];
top_interpolation = (top_elements[1] - top_elements[0])*ratio +top_elements[0];
if(current_gradient < bottom_interpolation ||
current_gradient < top_interpolation )
{nms_image.at< uchar >(j,i) = 0; }
}
}
else {
if( (angle >= -22.5 && angle <= 22.5) || (angle < -157.5 && angle >= -180))
if(image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j,i+1) ||
image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j,i-1) )
{nms_image.at< uchar >(j,i) = 0; }
else if( (angle >= 22.5 && angle <= 67.5) || (angle < -112.5 && angle >= -157.5))
if(image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j+1,i+1) ||
image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j-1,i-1) )
{nms_image.at< uchar >(j,i) = 0; }
else if( (angle >= 67.5 && angle <= 112.5) || (angle < -67.5 && angle >= -112.5))
if(image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j+1,i) ||
image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j-1,i) )
{nms_image.at< uchar >(j,i) = 0; }
else if( (angle >= 112.5 && angle <= 157.5) || (angle < -22.5 && angle >= -67.5))
if(image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j+1,i-1) ||
image_gradient.at< uchar >(j,i) < image_gradient.at< uchar >(j-1,i+1) )
{nms_image.at< uchar >(j,i) = 0; }
}
}
return nms_image;
}
bool check_for_connected_strong_edges(int arr[],int size)
{
for(int i = 0; i < size; i++)
{
if ( arr[i] == STRONG_EDGE )
return true;
}
return false;
}
Mat hysterisis(const Mat& input_image, int max_threshold, int min_threshold)
{
Mat double_thresholded_image = input_image.clone() ;
Mat edge_strength_image = input_image.clone();
int size = input_image.rows * input_image.cols;
float strong_edges_row[size];
int neighborhood[9];
int k = 0;
for(int j = 1; j < input_image.rows-1; j++)
for(int i =1; i < input_image.cols-1; i++)
{
if (input_image.at< uchar >(j,i) >= max_threshold)
edge_strength_image.at< uchar >(j,i) = STRONG_EDGE;
else if (input_image.at< uchar >(j,i) > min_threshold && input_image.at< uchar >(j,i) < max_threshold)
edge_strength_image.at< uchar >(j,i) = WEAK_EDGE ;
else
edge_strength_image.at< uchar >(j,i) = NOT_EDGE;
}
for(int j = 1; j < input_image.rows-1; j++)
for(int i =1; i < input_image.cols-1; i++)
{
if (edge_strength_image.at< uchar >(j,i) == WEAK_EDGE)
{
k = 0;
for(int m = -1; m < 2 ; m++)
for(int n = -1; n < 2; n++)
{
neighborhood[k] = edge_strength_image.at< uchar >(j+m,i+n) ;
k++;
}
if( check_for_connected_strong_edges( neighborhood, 9) )
{
double_thresholded_image.at< uchar >(j,i) = 255 ;
edge_strength_image.at< uchar >(j,i) = STRONG_EDGE;
}
else
{
double_thresholded_image.at< uchar >(j,i) = 0 ;
edge_strength_image.at< uchar >(j,i) = WEAK_EDGE;
}
}
else if (edge_strength_image.at< uchar >(j,i) == STRONG_EDGE)
double_thresholded_image.at< uchar >(j,i) = 255 ;
else
double_thresholded_image.at< uchar >(j,i) = 0 ;
}
return double_thresholded_image;
}
void sobel_filter(const Mat& input_image, Mat& image_gradient, Mat& image_gradient_direction, Mat& horizontal_image, Mat& vertical_image)
{
Mat horizontal_kernel = Mat(3, 3, CV_32F, 0.0);
Mat vertical_kernel = Mat(3, 3, CV_32F, 0.0);
Mat nms_image, double_thresholded_image;
horizontal_kernel.at< float >(0,0) = 1 ;
horizontal_kernel.at< float >(0,1) = 0 ;
horizontal_kernel.at< float >(0,2) = -1 ;
horizontal_kernel.at< float >(1,0) = 2 ;
horizontal_kernel.at< float >(1,1) = 0 ;
horizontal_kernel.at< float >(1,2) = -2 ;
horizontal_kernel.at< float >(2,0) = 1 ;
horizontal_kernel.at< float >(2,1) = 0 ;
horizontal_kernel.at< float >(2,2) = -1 ;
vertical_kernel.at< float >(0,0) = 1 ;
vertical_kernel.at< float >(0,1) = 2 ;
vertical_kernel.at< float >(0,2) = 1 ;
vertical_kernel.at< float >(1,0) = 0 ;
vertical_kernel.at< float >(1,1) = 0 ;
vertical_kernel.at< float >(1,2) = 0 ;
vertical_kernel.at< float >(2,0) = -1 ;
vertical_kernel.at< float >(2,1) = -2 ;
vertical_kernel.at< float >(2,2) = -1 ;
horizontal_image = convolve(input_image, horizontal_kernel);
vertical_image = convolve(input_image, vertical_kernel);
image_gradient = get_image_gradient(input_image,horizontal_image, vertical_image);
image_gradient_direction = get_image_gradient_direction(input_image, horizontal_image, vertical_image);
}
void canny_edge_detection(const Mat& input_image, int num)
{
Mat gaussian_filtered_image, image_gradient,nms_image, double_thresholded_image;
Mat gradient_direction, horizontal_image, vertical_image, sobel_filtered_image;
gaussian_filtered_image = input_image.clone();
GaussianBlur(gaussian_filtered_image,gaussian_filtered_image,Size(7,7), 1.4, 1.4, BORDER_DEFAULT);
sobel_filter(gaussian_filtered_image, image_gradient,gradient_direction, horizontal_image, vertical_image);
threshold_image(image_gradient,sobel_filtered_image);
nms_image = non_maximum_suppression(image_gradient,gradient_direction, horizontal_image, vertical_image) ;
double min, max;
cv::minMaxLoc(nms_image, &min, &max);
int max_threshold = max*THRESHOLD_RATIO_H;
int min_threshold = max_threshold*THRESHOLD_RATIO_L;
double_thresholded_image = hysterisis( nms_image, max_threshold, min_threshold);
std::ostringstream path1, path2 ;
path1 << "../results/canny_edge_detection/sobel_edge_detector" << num << ".jpg" ;
path2 << "../results/canny_edge_detection/canny_edge_detector" << num << ".jpg" ;
namedWindow("Image Gradient", WINDOW_AUTOSIZE);
imwrite( path1.str(), image_gradient );
imshow("Sobel Image Gradient", image_gradient);
namedWindow("Sobel Filter ", WINDOW_AUTOSIZE);
imwrite( path1.str(), sobel_filtered_image );
imshow("Sobel Filter ", sobel_filtered_image);
namedWindow("Non Maximum Suppression", WINDOW_AUTOSIZE);
imwrite( "../results/edge_detection/nms_image"+num+".jpg", nms_image );
imshow("Non Maximum Suppression", nms_image);
namedWindow("Hysterisis", WINDOW_AUTOSIZE);
imwrite(path2.str(), double_thresholded_image );
imshow("Hysterisis", double_thresholded_image);
}
int main(int argc, char** argv )
{
Mat grayscale_image1,grayscale_image2,grayscale_image3,grayscale_image4,grayscale_image5;
Mat image1 = imread( "../results/filter/equalized_image_1.jpg", 1 );
Mat image2 = imread( "../results/filter/gaussian_filtered_image_2.jpg", 1 );
Mat image3 = imread( "../results/filter/median_filtered_image_3.jpg", 1 );
//http://cnx.org/resources/4181ddf62e3a2047d36357f16180ce247b094532/plane_noise1.png
Mat image4 = imread( "../results/filter/median_filtered_image_4.jpg", 1 );
//en.wikipedia.org/wiki/File:Phase_correlation.png
Mat image5 = imread( "../results/filter/gaussian_filtered_image_5.jpg", 1 );
cv::Mat resized_image2 = cv::Mat(640, 1000, CV_8UC1, cv::Scalar(0, 0, 0)) ;
resize(image2,resized_image2,resized_image2.size(),0,0);
imwrite( "../results/canny_edge_detection/equalized_image2.jpg", resized_image2 );
if ( !image1.data || !resized_image2.data || !image3.data || !image4.data || !image5.data)
{
printf("No image data \n");
return -1;
}
cvtColor( image1, grayscale_image1, CV_BGR2GRAY );
cvtColor( resized_image2, grayscale_image2, CV_BGR2GRAY );
cvtColor( image3, grayscale_image3, CV_BGR2GRAY );
cvtColor( image4, grayscale_image4, CV_BGR2GRAY );
cvtColor( image5, grayscale_image5, CV_BGR2GRAY );
canny_edge_detection(grayscale_image1,1);
canny_edge_detection(grayscale_image2,2);
canny_edge_detection(grayscale_image3,3);
canny_edge_detection(grayscale_image4,4);
canny_edge_detection(grayscale_image5,5);
waitKey(0);
return 0;
}
Inverse filter is an image restoration technique that is used to deconvolve the point spread function of an image (i.e. To deblur an image). The blurred image is first converted to frequency domain using Fourier Transform and then the frequency domain point spread function is inverted and multiplied element wise with the blurred image in the frequency domain. However in the presence of spectral nulls, the values of the deconvolved image can go to infinity.In order to avoid this Pseudo-Inverse filters are used. In pseudo-inverse filters, the inverted point-spread function is applied only if the value is above a certain threshold. These filters however do not work well in the presence of noise. Results of my implementation of these filters are shown below.











Unlike Inverse Filters, Wiener Filters not only remove the blurring effect but also try to smoothen the noise present in the image. In addition to the point spread functions, it makes use of the signal to noise ratio to remove noise from the image. When you compare the images below with the pseudo-inverse filter versions, it can be seen that performance would be better. However it hasn't completely restored the image. It might need some more tweaking of my implementation of these filters.
#include < iostream >
#include < opencv2/opencv.hpp >
#include < math.h >
using namespace cv;
#define STD_DEV 50
#define KERNEL_SIZE 17
#define SIGMA 3
#define PI 3.14159
#define THRESHOLD 0.2
Mat image_padding(const Mat& input_image)
{
Mat padded_image ;
int rows = getOptimalDFTSize(input_image.rows);
int cols = getOptimalDFTSize(input_image.cols);
copyMakeBorder(input_image, padded_image, 0, rows - input_image.rows, 0, cols - input_image.cols, BORDER_CONSTANT, Scalar::all(0));
return padded_image;
}
Mat add_noise(const Mat& input_image, int std_dev)
{
Mat noisy_image(input_image.rows,input_image.cols,CV_8U);
Mat noise(input_image.rows,input_image.cols,CV_32F);
randn(noise,Scalar::all(0),Scalar::all(STD_DEV));
noise.convertTo(noisy_image,CV_8U);
noisy_image += input_image.clone();
return noisy_image;
}
Mat pad_kernel(cv::Size size,const Mat& kernel, int kernel_size)
{
Mat padded_kernel = Mat::zeros(size, CV_32F);
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
padded_kernel.at< float >(j,i) = kernel.at< float >(j,i);
}
return padded_kernel;
}
Mat get_gaussian_blur_kernel(int kernel_size, float sigma)
{
Mat kernel = Mat(kernel_size, kernel_size, CV_32F, 0.0);
int k;
if(kernel_size % 2 != 0)
{
k = (kernel_size-1)/2;
}
else
{
k = (kernel_size)/2;
}
float value = 0.0;
// Create the Gaussian Kernel
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = (float)( 1.0 /( 2.0*PI*pow(sigma,2) ) ) * exp(-1.0* ( pow(i+1-(k+1),2) + pow(j+1-(k+1),2) ) / ( 2.0 * pow(sigma,2) ) ) ;
value += kernel.at< float >(j,i);
}
// Normalize kernel , so that the sum of all elements in the kernel is 1
for(int j = 0; j < kernel_size ; j++)
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = kernel.at< float >(j,i)/value ;
}
return kernel;
}
Mat get_motion_blur_kernel(int kernel_size)
{
Mat kernel = Mat::zeros(kernel_size, kernel_size, CV_32F);
int j = kernel_size/2;
for(int i = 0; i < kernel_size; i++)
{
kernel.at< float >(j,i) = 1.0/kernel_size ;
}
return kernel;
}
Mat fourier_transform(const Mat& padded_image)
{
Mat image;
padded_image.convertTo(image,CV_32F);
//Real part and imaginary part
Mat images[] = {Mat_< float >(image), Mat::zeros(image.size(), CV_32F)};
Mat complex_image;
merge(images, 2, complex_image);
dft(complex_image,complex_image);
return complex_image;
}
Mat power_spectrum(const Mat& input_image)
{
Mat complex_image = fourier_transform(input_image);
Mat images[2],image_magnitude,spectrum_image;
split(complex_image, images);
magnitude(images[0],images[1],image_magnitude);
multiply(image_magnitude,image_magnitude,spectrum_image);
return spectrum_image;
}
Mat wiener_filter(const Mat& noisy_image, const Mat& signal_spectrum, Mat kernel, int kernel_size)
{
Mat noise(noisy_image.rows,noisy_image.cols,CV_8U);
randn(noise,Scalar::all(0),Scalar::all(STD_DEV));
Scalar mean_input_image = mean(noisy_image);
Mat noise_spectrum = power_spectrum(noise);
Mat images[2], kernel_images[2];
Mat complex_image = fourier_transform(noisy_image);
split(complex_image,images);
Mat padded_kernel = pad_kernel(noisy_image.size(),kernel,kernel_size);
Mat kernel_spectrum = power_spectrum(padded_kernel);
Mat kernel_spectrum_squared;
Mat kernel_complex_image = fourier_transform(padded_kernel);
split(kernel_complex_image,kernel_images);
multiply(kernel_spectrum,kernel_spectrum,kernel_spectrum_squared);
Mat inv_snr = noise_spectrum/signal_spectrum;
Mat weight = Mat::zeros(noisy_image.size(), CV_32F) ;
for(int j = 0; j < noisy_image.rows ; j++)
for(int i = 0; i < noisy_image.cols; i++)
{
if( kernel_spectrum.at< float >(j,i) > THRESHOLD)
weight.at< float >(j,i) = ((kernel_spectrum_squared.at< float >(j,i))/(kernel_spectrum_squared.at< float >(j,i) + inv_snr.at< float >(j,i)))/kernel_spectrum.at< float >(j,i) ;
else
weight.at< float >(j,i) = 0.0;
}
multiply(images[0],weight,images[0]);
multiply(images[1],weight,images[1]);
merge(images,2,complex_image);
idft(complex_image,complex_image);
split(complex_image,images);
Scalar mean_restored_image = mean(images[0]);
double scale_factor = mean_input_image.val[0]/mean_restored_image.val[0];
multiply(images[0],scale_factor,images[0]);
Mat normalized_image ;
images[0].convertTo(normalized_image,CV_8UC1);
return normalized_image;
}
Mat inverse_filter(const Mat& noisy_image, Mat kernel, int kernel_size, bool pseudo_inverse = false)
{
Mat noise(noisy_image.rows,noisy_image.cols,CV_8U);
randn(noise,Scalar::all(0),Scalar::all(STD_DEV));
Scalar mean_input_image = mean(noisy_image);
Mat images[2], kernel_images[2];
Mat complex_image = fourier_transform(noisy_image);
split(complex_image,images);
Mat padded_kernel = pad_kernel(noisy_image.size(),kernel,kernel_size);
Mat kernel_spectrum = power_spectrum(padded_kernel);
Mat kernel_spectrum_squared;
Mat kernel_complex_image = fourier_transform(padded_kernel);
split(kernel_complex_image,kernel_images);
multiply(kernel_spectrum,kernel_spectrum,kernel_spectrum_squared);
Mat weight = Mat::zeros(noisy_image.size(), CV_32F) ;
for(int j = 0; j < noisy_image.rows ; j++)
for(int i = 0; i < noisy_image.cols; i++)
{
if (pseudo_inverse)
{
if( kernel_spectrum.at< float >(j,i) > THRESHOLD)
weight.at< float >(j,i) = (1/kernel_spectrum.at< float >(j,i)) ;
else
weight.at< float >(j,i) = 0.0;
}
else
{
weight.at< float >(j,i) = (1/kernel_spectrum.at< float >(j,i)) ;
}
}
multiply(images[0],weight,images[0]);
multiply(images[1],weight,images[1]);
merge(images,2,complex_image);
idft(complex_image,complex_image);
split(complex_image,images);
Scalar mean_restored_image = mean(images[0]);
double scale_factor = mean_input_image.val[0]/mean_restored_image.val[0];
multiply(images[0],scale_factor,images[0]);
Mat normalized_image ;
images[0].convertTo(normalized_image,CV_8UC1);
return normalized_image;
}
int main(int argc, char** argv )
{
Mat input_image,sample_image,resized_sample_image,padded_image,noisy_gaussian_blurred_image,noisy_motion_blurred_image,gaussian_blurred_image,motion_blurred_image;
input_image = imread( "../images/horse.jpg", CV_LOAD_IMAGE_GRAYSCALE ) ;
sample_image = imread( "../images/lena.jpg", CV_LOAD_IMAGE_GRAYSCALE ) ;
if (!input_image.data)
{
printf("No image data \n");
return -1;
}
//GaussianBlur(input_image,input_image,Size(3,3),1.5,1.5);
padded_image = image_padding(input_image);
Mat motion_blur_kernel = get_motion_blur_kernel(KERNEL_SIZE);
Mat gaussian_blur_kernel = get_gaussian_blur_kernel(KERNEL_SIZE, SIGMA);
filter2D(padded_image,motion_blurred_image,-1,motion_blur_kernel, Point(-1,-1),0,BORDER_DEFAULT);
filter2D(padded_image,gaussian_blurred_image,-1,gaussian_blur_kernel, Point(-1,-1),0,BORDER_DEFAULT);
//GaussianBlur(padded_image,gaussian_blurred_image,Size(KERNEL_SIZE,KERNEL_SIZE),SIGMA,SIGMA);
noisy_gaussian_blurred_image =add_noise(gaussian_blurred_image,STD_DEV);
noisy_motion_blurred_image =add_noise(motion_blurred_image,STD_DEV);
resize(sample_image, resized_sample_image,padded_image.size());
Mat sample_image_power_spectrum = power_spectrum(resized_sample_image);
Mat restored_image2 = wiener_filter(noisy_motion_blurred_image,sample_image_power_spectrum,gaussian_blur_kernel,KERNEL_SIZE);
Mat restored_image1 = wiener_filter(noisy_gaussian_blurred_image,sample_image_power_spectrum,motion_blur_kernel,KERNEL_SIZE);
Mat restored_image3 = inverse_filter(gaussian_blurred_image,gaussian_blur_kernel,KERNEL_SIZE);
Mat restored_image4 = inverse_filter(motion_blurred_image,motion_blur_kernel,KERNEL_SIZE);
Mat restored_image5 = inverse_filter(gaussian_blurred_image,gaussian_blur_kernel,KERNEL_SIZE, true);
Mat restored_image6 = inverse_filter(motion_blurred_image,motion_blur_kernel,KERNEL_SIZE, true);
Mat restored_image7 = inverse_filter(noisy_gaussian_blurred_image,gaussian_blur_kernel,KERNEL_SIZE, true);
Mat restored_image8 = inverse_filter(noisy_motion_blurred_image,motion_blur_kernel,KERNEL_SIZE, true);
namedWindow("Original Image", WINDOW_AUTOSIZE);
imwrite( "../results/wiener_filter/original_image.jpg", padded_image);
imshow("Original Image", padded_image);
namedWindow("Noisy Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/wiener_filter/noisy_gaussian_blurred_image.jpg", noisy_gaussian_blurred_image);
imshow("Noisy Gaussian Blurred Image", noisy_gaussian_blurred_image);
namedWindow("Noisy Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/wiener_filter/noisy_motion_blurred_image.jpg", noisy_motion_blurred_image);
imshow("Noisy Motion Blurred Image", noisy_motion_blurred_image);
namedWindow("Restored Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/wiener_filter/restored_gaussian_blurred_image.jpg", restored_image1);
imshow("Restored Gaussian Blurred Image", restored_image1);
namedWindow("Restored Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/wiener_filter/restored_motion_blurred_image.jpg", restored_image2);
imshow("Restored Motion Blurred Image", restored_image2);
namedWindow("Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/gaussian_blurred_image.jpg", gaussian_blurred_image);
imshow("Gaussian Blurred Image", gaussian_blurred_image);
namedWindow("Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/motion_blurred_image.jpg", motion_blurred_image);
imshow("Motion Blurred Image", motion_blurred_image);
namedWindow("Inverse Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/inverse_motion_blurred_image.jpg", restored_image4);
imshow("Inverse Motion Blurred Image", restored_image4);
namedWindow("Inverse Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/inverse_gaussian_blurred_image.jpg", restored_image3);
imshow("Inverse Gaussian Blurred Image", restored_image3);
namedWindow("Pseudo Inverse Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/pseudo_inverse_motion_blurred_image.jpg", restored_image6);
imshow("Pseudo Inverse Motion Blurred Image", restored_image6);
namedWindow("Pseudo Inverse Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/pseudo_inverse_gaussian_blurred_image.jpg", restored_image5);
imshow("Pseudo Inverse Gaussian Blurred Image", restored_image5);
namedWindow("Pseudo Inverse Noisy Motion Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/pseudo_inverse_noisy_motion_blurred_image.jpg", restored_image8);
imshow("Pseudo Inverse Noisy Motion Blurred Image", restored_image8);
namedWindow("Pseudo Inverse Noisy Gaussian Blurred Image", WINDOW_AUTOSIZE);
imwrite( "../results/inverse_filter/pseudo_inverse_noisy_gaussian_blurred_image.jpg", restored_image7);
imshow("Pseudo Inverse Noisy Gaussian Blurred Image", restored_image7);
waitKey(0);
return 0;
}
Run length encoding utilizes the property of image in which usually a number of consecutive pixels are identical. Hence it proves to be a good tool for compressing images. The pixel intensity along with number of consecutive occurances of the same pixel intensity are stored as pairs. The encoding pattern may be from left to right or zig-zag. However, if the image appears to have a huge variance without a lot of consecutive identical intensities, then this encoding technique rather than compressing the image would have expanded the size of the image.

#include < iostream >
using namespace std;
int main(int argc, char** argv )
{
int input_size = 25;
int a[input_size] = {1,1,1,2,2,3,4,4,4,4,4,4,4,5,5,5,5,3,2,2,2,2,2,2,3};
//int a[input_size] = {1,1,2,4,2,5,7,1,2,3,4,10,11,15,7,3,3,3,3,6,7,8,1,1};
int prev_val = 0;
int count = 0;
int output_size=0;
cout << endl << "Input" << endl << endl;
for(int i = 0 ; i< input_size; i++)
cout << a[i] << ",";
cout << endl << endl << "Output" << endl << endl
for(int i = 0; i< input_size; i++)
{
if(prev_val != a[i])
{
if(count != 0)
{
cout << count << " times - " << prev_val << endl;
output_size += 2;
}
count = 1;
}
else
{
++count;
}
prev_val = a[i];
}
cout << count << " times - " << prev_val << endl;
output_size += 2;
cout << endl << "Input Size = " << input_size << " , Output Size = " << output_size << endl;
cout << "Compression Ratio = " << (float)input_size/output_size << " : 1" << endl << endl;
}
Huffman encoding is a very efficient compression technique that allows variable bit representation based on frequency of intensities across an image. It utilizes the histogram of an image in-order to encode. The most frequent pixel intensity is represented by the smallest number of bits while the least frequent pixel intensity is represented by the largest number of bits. However the huffman code book needs to be stored as an overhead so that the decoder can understand.
#include < iostream >
#include < bits/stdc++.h >
#define MAX_ELEMENT 10
using namespace std;
string encoded_data[MAX_ELEMENT+1] = {""};
//Data type used for Priority Queue
class Node
{
int data;
unsigned int frequency;
public:
Node *left, *right;
Node(int _data, unsigned int _frequency)
{
data = _data;
frequency = _frequency;
}
int getData() const { return data; }
unsigned int getFrequency() const { return frequency; }
};
//Comparator used by Priority Queue for sorting
class FrequencyComparator
{
public:
int operator() (const Node* p1, const Node* p2)
{
return p1->getFrequency() > p2->getFrequency();
}
};
//Encode bits as a string
void encode(Node* root, string str)
{
if(root == NULL)
return;
if(root->getData() > -1)
encoded_data[root->getData()] = str;
encode(root->left, str+"0");
encode(root->right,str+"1");
}
//Implement Huffman Encoding
void huffman_encode(int frequency_table[], int size)
{
Node *left_node, *right_node, *top_node;
// Creates a Min heap of nodes (order by Frequency)
priority_queue, FrequencyComparator > MinHeap;
for (int i = 0; i < size; i++)
{
if(frequency_table[i] > 0)
MinHeap.push(new Node(i,frequency_table[i]) );
}
while(MinHeap.size() > 1)
{
left_node = MinHeap.top();
MinHeap.pop();
right_node = MinHeap.top();
MinHeap.pop();
// -1 denotes a special node containing frequencies of left and right nodes
top_node = new Node(-1, left_node->getFrequency() + right_node->getFrequency());
top_node->left = left_node;
top_node->right = right_node;
MinHeap.push(top_node);
}
encode(MinHeap.top(),"");
for(int i = 0; i < size;i++)
{
if(frequency_table[i] > 0)
cout << " Data = " << i << " , Huffman Code = " << encoded_data[i] << " , Frequency = " << frequency_table[i] << endl;
}
cout << endl;
}
int main ()
{
int input_size = 25;
int a[input_size] = {1,1,1,1,1,3,4,4,4,4,4,4,4,5,5,5,5,3,7,1,1,1,2,2,9};
int frequency_table[MAX_ELEMENT+1] = {0};
cout << endl << "Input" << endl << endl;
for(int i = 0 ; i < input_size; i++)
cout << a[i] << ",";
for(int i = 0; i < input_size; i++)
{
frequency_table[a[i]] += 1;
}
cout << endl << endl << "Output - Bit Representation" << endl;
huffman_encode(frequency_table, MAX_ELEMENT+1);
return 0;
}
From the given images flower.tif and flower1.tif , the blurring function was extracted in the frequency domain by dividing the two complex images of the blurred and the original image respectively. Using this blurring function in frequency domain and taking pseudo-inverse of this and multiplying it with the blurred image in frequency domain will restore it and then inverse dft can be performed to view it the spatial domain. However there might be a few artifacts due to loss in certain information. Based on the threshold level, the details in the restored image can vary. Alternatively, the blurred image was sharpened using unsharp mask technique. It looks slightly better than the inverse filter. With regards to the image caterpillar.jpg, I tried to guess the impulse function and tried various kernels like gaussian kernel, averaging kernel. However it didn't restore the image. In this case Blind Deconvolution has to be done by using a cost function by which various kernels and original images are guessed and the best ones are extracted. Due to lack of time , I was not able to implement this algorithm. Classical sharpening technique along with a median filter enhanced the image a bit.








Discrete Cosine Transforms transforms image into frequency domain comprised of cosine waves. Hence the frequency domain image only has a real part. It is very useful for compression due to its energy compaction property. On the given images, DCT was used for compression. Then the initial values of each block was accepted and rest of them were masked. The masked image is then quantized using JPEG's standard quantization table. This quantized image is then encoded using Run Length Coding which is a lossless encoding technique. It was observed that with increase in block size, the compression ratio and the quality of the image increased as indicated by the PSNR. Most of the information is available towards the starting point of each block. As the size of the block increases, the variance reduces and the valuable information is more concentrated towards the starting point.
clear;
close all;
I = imread('moon.tif');
%I = rgb2gray(I);
I = im2double(I);
OriginalImage = I;
[k,l] = size(I);
I = imresize(I, [512 512]);
n = 8
T = dctmtx(n);
dct = @(block_struct) T * block_struct.data * T';
B = blockproc(I,[n n],dct);
B2 = B;
c = 10;
[M,N]=size(B);
for x = 1:n
for y = 1:n
if x < (n+1)/2 && y < (n+1)/2
mask(x,y) = 1;
else
mask(x,y) = 0;
end
end
end
selection_matrix = repmat(mask,512/n,512/n);
B2 = B2.*selection_matrix;
B2 = imresize(B2,[512 512]);
% JPEG Standard Quantization Array
Quantz=[16 11 10 16 24 40 51 61
12 12 14 19 26 58 60 55
14 13 16 24 40 57 69 56
14 17 22 29 51 87 80 62
18 22 37 56 68 109 103 77
24 35 55 64 81 104 113 92
49 64 78 87 103 121 120 101
72 92 95 98 112 100 103 99 ];
prev_val = -1;
count = 0;
output_size = 0;
ResizedQuantizer = repmat(uint8(ceil(double(Quantz)./90000)), 512/8 , 512/8);
QuantizedImage = round(B2./double(ResizedQuantizer));
for x = 1:512
for y = 1:512
if prev_val ~= QuantizedImage(x,y)
if count ~= 0
output_size = output_size + 2;
end
count = 1;
else
count = count +1;
end
prev_val = QuantizedImage(x,y);
end
end
output_size = output_size +2
compression_ratio = (512*512)/output_size
DeQuantizedImage = QuantizedImage.*double(ResizedQuantizer);
invdct = @(block_struct) T' * block_struct.data * T;
I2 = blockproc(DeQuantizedImage,[n n],invdct);
ReconstructedImage = imresize(I2,[k l]);
error = OriginalImage - ReconstructedImage;
MSE = sum(sum(error .* error))/(k*l);
PSNR = 20*log10(max(max(max(OriginalImage)))) - 10*log10(MSE);
textstring = ['N : ' num2str(n) ' X ' num2str(n) ' , Compression Ratio : ' num2str(compression_ratio) ': 1 , PSNR :' num2str(PSNR) ];
position = [1 50];
figure;
imshow(ReconstructedImage);
title(textstring);
figure;
imshow(OriginalImage);
Adaptive Thresholding is a data dependent thresholding technique, while global thresholding is a data independent thresholding technique. The fundamental principle of Adaptive Thresholding is that the region of interest can be segmented out by using the blurred version of an image minus a constant as the threshold. Different types of filters like Mean filters, Gaussian filter , Median filter can be used to blur the image. The results of various filters with the same constant and window size are seen below. It can be found that the globally thresholded image suffers due to illumination gradient as a result of which some numbers disappear and the adaptively thresholded images gives out a much better result. This would be really useful for text segmentation.



#include
#include "opencv2/opencv.hpp"
#define GAUSSIAN_FILTER 0
#define MEAN_FILTER 1
#define MEDIAN_FILTER 2
using namespace cv;
using namespace std;
void threshold_image(const Mat& input_image, Mat& thresholded_image, float threshold, bool inverse=false, bool adaptive=false, int filter_type = GAUSSIAN_FILTER, int size_k = 11)
{
thresholded_image = input_image.clone();
int image_type = input_image.type();
thresholded_image.convertTo(thresholded_image,CV_8UC1);
Mat blurred_image;
if(filter_type == MEAN_FILTER)
blur(thresholded_image, blurred_image, Size(size_k,size_k), Point(-1,-1));
else if(filter_type == GAUSSIAN_FILTER)
GaussianBlur( thresholded_image,blurred_image, Size(size_k,size_k ), 4, 4);
else if(filter_type == MEDIAN_FILTER)
medianBlur( thresholded_image,blurred_image, size_k);
if(blurred_image.type() != CV_8UC1)
blurred_image.convertTo(blurred_image,image_type);
float c = threshold;
for(int j = 1; j < input_image.rows-1; j++)
for(int i =1; i < input_image.cols-1; i++)
{
if(image_type == CV_8UC1)
{
if(adaptive)
{
threshold =(int) saturate_cast< uchar >(blurred_image.at< uchar >(j,i) - c);
}
if (input_image.at< uchar >(j,i) >= threshold)
if(inverse)
thresholded_image.at< uchar >(j,i) = 0;
else
thresholded_image.at< uchar >(j,i) = 255;
else
if(inverse)
thresholded_image.at< uchar >(j,i) = 255;
else
thresholded_image.at< uchar >(j,i) = 0;
}
else if(image_type == CV_32F)
{
if(adaptive)
{
threshold = (blurred_image.at< float >(j,i) - c);
}
if (input_image.at< float >(j,i) >= threshold)
if(inverse)
thresholded_image.at< uchar >(j,i) = 0;
else
thresholded_image.at< uchar >(j,i) = 255;
else
if(inverse)
thresholded_image.at< uchar >(j,i) = 255;
else
thresholded_image.at< uchar >(j,i) = 0;
}
}
}
int main(int argc, char** argv )
{
Mat grayscale_image1;
Mat image1 = imread( "../images/article.png", 1 );
if ( !image1.data)
{
printf("No image data \n");
return -1;
}
cvtColor( image1, grayscale_image1, CV_BGR2GRAY );
GaussianBlur( grayscale_image1,grayscale_image1, Size( 7, 7), 3, 3);
Mat mean_thresholded_image, gaussian_thresholded_image, median_thresholded_image, global_thresholded_image;
int kernel_size = 11;
int c = 1;
threshold_image(grayscale_image1, mean_thresholded_image, c, false, true, MEAN_FILTER, kernel_size);
threshold_image(grayscale_image1, gaussian_thresholded_image, c, false, true, GAUSSIAN_FILTER, kernel_size);
threshold_image(grayscale_image1, median_thresholded_image, c, false, true, MEDIAN_FILTER, kernel_size);
threshold_image(grayscale_image1, global_thresholded_image, 125, false, false);
namedWindow("Input Image 1", WINDOW_AUTOSIZE);
imshow("Input Image 1", grayscale_image1);
namedWindow("Mean Thresholded Image", WINDOW_AUTOSIZE);
imwrite( "../results/adaptive_threshold/mean_thresholded_image.jpg", mean_thresholded_image);
imshow("Mean Thresholded Image", mean_thresholded_image);
namedWindow("Gaussian Thresholded Image", WINDOW_AUTOSIZE);
imwrite( "../results/adaptive_threshold/gaussian_thresholded_image.jpg", gaussian_thresholded_image);
imshow("Gaussian Thresholded Image", gaussian_thresholded_image);
namedWindow("Median Thresholded Image", WINDOW_AUTOSIZE);
imwrite( "../results/adaptive_threshold/median_thresholded_image.jpg", median_thresholded_image);
imshow("Median Thresholded Image", median_thresholded_image);
namedWindow("Global Thresholded Image", WINDOW_AUTOSIZE);
imwrite( "../results/adaptive_threshold/global_thresholded_image.jpg", global_thresholded_image);
imshow("Global Thresholded Image", global_thresholded_image);
waitKey(0);
return 0;
}
Harris Corner Detection method estimates the location of corners based on the gradients at each pixel. It fundamentally relies on the property that corners have large gradients in both x and y direction. The algorithm proceeds by computing the x and y gradients of the image using operators like Sobel. Based on these gradients the Harris Matrix is computed as follows




#include "iostream"
#include "opencv2/opencv.hpp"
#define THRESHOLD 35000000000
#define DISTANCE_THRESHOLD 8
using namespace cv;
using namespace std;
struct IndexT
{
int col;
int row;
float distance(int j, int i)
{
return sqrt(pow((row-j),2)+pow((col-i),2));
}
};
Mat image_padding(const Mat& input_image, int offset)
{
Mat padded_image = Mat(input_image.rows+2*offset, input_image.cols+2*offset, CV_32F, 0.0);
for(int j = 0; j < input_image.rows ; j++)
for(int i = 0; i < input_image.cols; i++)
{
padded_image.at< float >(j+offset,i+offset) = input_image.at< uchar >(j,i);
}
return padded_image;
}
Mat image_depadding(const Mat& input_image, int offset)
{
Mat depadded_image = Mat(input_image.rows-2*offset, input_image.cols-2*offset, CV_32F, 0.0);
for(int j = 0; j < input_image.rows-2*offset ; j++)
for(int i = 0; i < input_image.cols-2*offset; i++)
{
depadded_image.at< float >(j,i) = input_image.at< float >(j+offset,i+offset);
}
return depadded_image;
}
Mat convolve(const Mat& input_image, const Mat& kernel)
{
int kernel_size = kernel.rows;
int offset;
if(kernel_size % 2 != 0)
{
offset = (kernel_size+1)/2 - 1;
}
else
{
offset = (kernel_size)/2 - 1;
}
Mat padded_image = image_padding(input_image, offset);
Mat flipped_kernel = Mat(kernel.rows, kernel.cols, CV_32F, 0.0);
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
flipped_kernel.at< float >(m,n) = kernel.at< float >(kernel_size-m-1,kernel_size-n-1);
}
Mat convolved_image = Mat(padded_image.rows, padded_image.cols, CV_32F, 0.0);
float value = 0.0;
for(int j = offset; j < padded_image.rows - offset ; j++)
for(int i = offset; i < padded_image.cols - offset; i++)
{
for(int m = 0; m < kernel_size ; m++)
for(int n = 0; n < kernel_size; n++)
{
value += (padded_image.at< float >(j+m-offset,i+n-offset))*flipped_kernel.at< float >(m,n);
}
convolved_image.at< float >(j,i) = value;
value = 0;
}
Mat depadded_image = image_depadding(convolved_image, offset);
return depadded_image;
}
void adaptive_threshold(const Mat& input_image, Mat& thresholded_image, float c, bool inverse=false)
{
thresholded_image = input_image.clone();
int image_type = input_image.type();
if(image_type != CV_8UC1)
thresholded_image.convertTo(thresholded_image,CV_8UC1);
Mat blurred_image;
GaussianBlur( thresholded_image,blurred_image, Size( 5, 5 ), 1.4, 1.4);
if(blurred_image.type() != CV_8UC1)
blurred_image.convertTo(blurred_image,image_type);
float threshold;
for(int j = 1; j < input_image.rows-1; j++)
for(int i =1; i < input_image.cols-1; i++)
{
if(image_type == CV_8UC1)
{
threshold =(int) saturate_cast< uchar >(blurred_image.at< uchar >(j,i) - c);
if (input_image.at< uchar >(j,i) >= threshold)
if(inverse)
thresholded_image.at< uchar >(j,i) = 0;
else
thresholded_image.at< uchar >(j,i) = 255;
else
if(inverse)
thresholded_image.at< uchar >(j,i) = 255;
else
thresholded_image.at< uchar >(j,i) = 0;
}
else if(image_type == CV_32F)
{
threshold = (blurred_image.at< float >(j,i) - c);
if (input_image.at< float >(j,i) >= threshold)
if(inverse)
thresholded_image.at< uchar >(j,i) = 0;
else
thresholded_image.at< uchar >(j,i) = 255;
else
if(inverse)
thresholded_image.at< uchar >(j,i) = 255;
else
thresholded_image.at< uchar >(j,i) = 0;
}
}
}
Mat harris_corner_detection(const Mat& input_image)
{
Mat horizontal_kernel = Mat(3, 3, CV_32F, 0.0);
Mat vertical_kernel = Mat(3, 3, CV_32F, 0.0);
Mat image_gradient, image_gradient_direction,converted_image, nms_image, double_thresholded_image;
horizontal_kernel.at< float >(0,0) = 1.0/9.0 ;
horizontal_kernel.at< float >(0,1) = 0 ;
horizontal_kernel.at< float >(0,2) = -1.0/9.0 ;
horizontal_kernel.at< float >(1,0) = 2/9.0 ;
horizontal_kernel.at< float >(1,1) = 0 ;
horizontal_kernel.at< float >(1,2) = -2/9.0 ;
horizontal_kernel.at< float >(2,0) = 1/9.0 ;
horizontal_kernel.at< float >(2,1) = 0 ;
horizontal_kernel.at< float >(2,2) = -1/9.0 ;
vertical_kernel.at< float >(0,0) = 1/9.0 ;
vertical_kernel.at< float >(0,1) = 2/9.0 ;
vertical_kernel.at< float >(0,2) = 1/9.0 ;
vertical_kernel.at< float >(1,0) = 0 ;
vertical_kernel.at< float >(1,1) = 0 ;
vertical_kernel.at< float >(1,2) = 0 ;
vertical_kernel.at< float >(2,0) = -1/9.0 ;
vertical_kernel.at< float >(2,1) = -2/9.0 ;
vertical_kernel.at< float >(2,2) = -1/9.0 ;
Mat horizontal_image = convolve(input_image, horizontal_kernel);
Mat vertical_image = convolve(input_image, vertical_kernel);
Mat horizontal_image_squared = Mat(horizontal_image.rows, horizontal_image.cols, CV_32F, 0.0);
Mat vertical_image_squared = Mat(horizontal_image.rows, horizontal_image.cols, CV_32F, 0.0);
Mat horizontal_vertical_image = Mat(horizontal_image.rows, horizontal_image.cols, CV_32F, 0.0);
Mat combined_image = Mat(horizontal_image.rows, horizontal_image.cols, CV_32F, 0.0);
for(int j = 0; j < horizontal_image.rows ; j++)
for(int i = 0; i < horizontal_image.cols ; i++)
{
combined_image.at< float >(j,i) = horizontal_image.at< float >(j,i) + vertical_image.at< float >(j,i);
horizontal_image_squared.at< float >(j,i) = pow( horizontal_image.at< float >(j,i), 2);
vertical_image_squared.at< float >(j,i) = pow( vertical_image.at< float >(j,i), 2);
horizontal_vertical_image .at< float >(j,i) = horizontal_image.at< float >(j,i)*vertical_image.at< float >(j,i) ;
}
GaussianBlur( horizontal_image_squared, horizontal_image_squared, Size(9,9), 1.4, 1.4, BORDER_DEFAULT );
GaussianBlur( vertical_image_squared, vertical_image_squared, Size(9,9), 1.4, 1.4, BORDER_DEFAULT );
GaussianBlur( horizontal_vertical_image, horizontal_vertical_image, Size(9,9), 1.4, 1.4, BORDER_DEFAULT );
Mat harris_response= Mat(horizontal_image.rows, horizontal_image.cols, CV_32F, 0.0);
Mat corners= Mat::zeros(horizontal_image.rows, horizontal_image.cols, CV_8UC1 );
float k = 0.04;
for(int j = 0; j < horizontal_image.rows ; j++)
for(int i = 0; i < horizontal_image.cols ; i++)
{
//R = Det(H) - k(Trace(H))^2;
harris_response.at< float >(j,i) = horizontal_image_squared.at< float >(j,i)*vertical_image_squared.at< float >(j,i) -pow(horizontal_vertical_image.at< float >(j,i),2)
- k*(pow(horizontal_image_squared.at< float >(j,i)+vertical_image_squared.at< float >(j,i),2));
}
Mat corners2 = Mat::zeros(horizontal_image.rows, horizontal_image.cols, CV_8UC1 );
int size = 5;
int offset = (size+1)/2 - 1;
normalize(harris_response, harris_response, 1,0,NORM_MINMAX);
adaptive_threshold(harris_response, corners, -0.3);
if(corners.type() != CV_8UC1)
corners.convertTo(corners,CV_8UC1);
std::vector corner_indices;
for(int j = 0; j < horizontal_image.rows ; j++)
for(int i = 0; i < horizontal_image.cols ; i++)
{
IndexT current_index;
current_index.row = j;
current_index.col = i;
if( corners.at< uchar >(j,i) > 200)
if(corner_indices.empty())
corner_indices.push_back(current_index);
else
{
bool corner_neighbor = false;
for (std::vector::iterator it = corner_indices.begin() ; it != corner_indices.end(); ++it)
{
if(it->distance(j,i) < DISTANCE_THRESHOLD)
corner_neighbor = true;
}
if(!corner_neighbor)
corner_indices.push_back(current_index);
}
}
for (std::vector::iterator it = corner_indices.begin() ; it != corner_indices.end(); ++it)
{
corners2.at< uchar >(it->row,it->col) = 255;
}
Mat image_with_corners = input_image.clone();
int border_offset = 3;
for( int j = border_offset; j < corners2.rows-border_offset ; j++ )
{ for( int i = border_offset; i < corners2.cols-border_offset; i++ )
{
if( (int) corners2.at< uchar >(j,i) == 255 )
{
circle( image_with_corners, Point( i, j ), 10, Scalar(127), 2, 8, 0 );
}
}
}
return image_with_corners;
}
int main(int argc, char** argv )
{
Mat grayscale_image1, grayscale_image2;
Mat image1 = imread( "../images/house.jpg", 1 );
Mat image2 = imread( "../images/checkerboard.jpg", 1 );
if ( !image1.data || !image2.data )
{
printf("No image data \n");
return -1;
}
cvtColor( image1, grayscale_image1, CV_BGR2GRAY );
cvtColor( image2, grayscale_image2, CV_BGR2GRAY );
Mat corners1 = harris_corner_detection(grayscale_image1);
Mat corners2 = harris_corner_detection(grayscale_image2);
namedWindow("Input Image 1", WINDOW_AUTOSIZE);
imshow("Input Image 1", image1);
namedWindow("Corner 1", WINDOW_AUTOSIZE);
imwrite( "../results/harris_corner_detection/corners_1.jpg", corners1);
imshow("Corner 1", corners1);
namedWindow("Input Image 2", WINDOW_AUTOSIZE);
imshow("Input Image 2", image2);
namedWindow("Corner 2", WINDOW_AUTOSIZE);
imwrite( "../results/harris_corner_detection/corners_2.jpg", corners2);
imshow("Corner 2", corners2);
waitKey(0);
return 0;
}
Block Matching is the most basic motion estimation technique, in which the current or reference image is divided in to a number of blocks based on the specified block size and these blocks are then matched with the corresponding block in the previous image within the specified search area and the closeness of the block is determined either by sum of absolute differences or sum of squared differences. The result of this technique will give the motion field which contains the motion vectors that map blocks in the previous image to current/reference image. This motion field can be used to predict the current frame from the previous frame to give the motion compensated image. The error between this predicted image and the reference image is called the prediction error. This can be used for compression.

It can be seen from the images above and the table below that with increased search window size and block size , the prediction error decreases. It can also be observed that the regular variant of Block Matching algorithm doesnt deal well with rotations, large translations or change in illumination.

It can be seen from the images above and the table below that with increased search window size and block size , the prediction error decreases. But this might not be the case in all the frames as indicated
#include < iostream >
#include < opencv2/opencv.hpp >
#include < math.h >
#define THRESHOLD 35000000000
#define PI 3.1416
#define SAD 0
#define SSD 1
using namespace cv;
using namespace std;
struct MotionVector
{
float vel_x;
float vel_y;
MotionVector() :vel_x(0.0),vel_y(0.0){};
};
class MotionField
{
std::vector< MotionVector > motion_field;
cv::Mat motion_field_image;
int matching_criteria;
int rows;
int cols;
public:
int height;
int width;
MotionField(int num_rows = 0, int num_cols = 0, Mat previous_frame = Mat::zeros(1,1,CV_8UC3) )
{
rows = num_rows;
cols = num_cols;
height = num_rows;
width = num_cols;
motion_field.resize(rows*cols);
motion_field_image = previous_frame.clone();
}
void set(int row, int col, const MotionVector& motion_vector)
{
if(row >= rows || row < 0)
{
cout << "Row index out of bounds of the height of motion field" << endl;
return ;
}
if(col >= cols || col < 0 )
{
cout << "Col index out of bounds of the width of motion field" << endl;
return;
}
int index = row*cols + col;
motion_field[index] = motion_vector;
}
MotionVector& get(int row, int col)
{
if(row >= rows || row < 0)
{
cout << "Row index out of bounds of the height of motion field" << endl;
row = rows-1;
}
if(col >= cols || col < 0 )
{
cout << "Col index out of bounds of the width of motion field" << endl;
col = cols-1;
}
return motion_field[row*cols+col];
}
void print(int resolution=10)
{
cout << "Printing Motion Field" << endl;
for(int j = 0; j < rows ; j+=resolution)
{
for(int i = 0; i < cols; i+=resolution)
{
MotionVector motion_vector = get(j,i);
float dx = motion_vector.vel_x;
float dy = motion_vector.vel_y;
cout << "(" << dx << "," << dy << ") ";
}
cout << endl;
}
}
void quiver(const Mat& image, int resolution)
{
float max_length = -1000;
for(int j = 0; j < rows ; j+=resolution)
for(int i = 0; i < cols; i+=resolution)
{
MotionVector motion_vector = get(j,i);
float dx = motion_vector.vel_x;
float dy = motion_vector.vel_y;
float length = sqrt(pow(dx,2)+pow(dy,2));
if(length > max_length)
max_length = length;
}
for(int j = 0; j < rows ; j+=resolution)
for(int i = 0; i < cols; i+=resolution)
{
MotionVector motion_vector = get(j,i);
float dx = motion_vector.vel_x;
float dy = motion_vector.vel_y;
cv::Point p1(i,j);
float length = sqrt(pow(dx,2)+pow(dy,2));
if(length > 0)
{
float arrow_size = 5.0*length/max_length;
cv::Point p2((int)(p1.x+dx),(int)(p1.y+dy));
arrowedLine(image,p1,p2,CV_RGB(0,255,0),1,CV_AA,0,0.2);
}
else
{
circle( image, Point( i, j ), 0.2, Scalar(0,255,0), 0.1, 8, 0 );
}
}
}
std::vector getMotionField()
{
return motion_field;
}
Mat getImage(int resolution=10)
{
quiver(motion_field_image,resolution);
return motion_field_image;
}
};
void bilinearInterpolation(Mat image, int vacant_pixel_value=0)
{
Vec3b vacant_pixel;
vacant_pixel[0] = vacant_pixel_value;
vacant_pixel[1] = vacant_pixel_value;
vacant_pixel[2] = vacant_pixel_value;
// Bilinear Interpolation
for(int j = 0; j < image.rows ; j++)
{
for(int i = 0; i < image.cols; i++)
{
if(image.at< Vec3b >(j, i) == vacant_pixel)
{
int count_r = i;
//get right most filled_neighbour
while( image.at< Vec3b >(j, count_r) == vacant_pixel && count_r< image.cols)
{
count_r++;
}
int count_l = i;
//get left most filled_neighbour
while( image.at< Vec3b >(j, count_l) == vacant_pixel && count_l >= 0 )
{
count_l--;
}
int count_b = j;
//get bottom most filled_neighbour
while( image.at< Vec3b >(count_b, i) == vacant_pixel && count_b < image.rows )
{
count_b++;
}
int count_t = j;
//get top most filled_neighbour
while( image.at< Vec3b >(count_t, i) == vacant_pixel && count_t >= 0 )
{
count_t--;
}
if(count_r >=image.cols)
count_r = image.cols-1;
if(count_l < 0)
count_l = 0;
if(count_b >=image.rows)
count_b = image.rows-1;
if(count_t < 0)
count_t = 0;
float left_offset = i-count_l;
float right_offset = count_r-i;
float top_offset = j-count_t;
float bottom_offset = count_b-j;
float col_offset = left_offset+right_offset;
float row_offset = top_offset+bottom_offset;
left_offset = (1-left_offset/col_offset)/2.0;
right_offset = (1-right_offset/col_offset)/2.0;
top_offset = (1-top_offset/row_offset)/2.0;
bottom_offset = (1-bottom_offset/row_offset)/2.0;
Vec3b top_pixel = image.at< Vec3b >(count_t, i);
Vec3b bottom_pixel = image.at< Vec3b >(count_b, i);
Vec3b right_pixel = image.at< Vec3b >(j, count_r);
Vec3b left_pixel = image.at< Vec3b >(j, count_l);
Vec3b interpolated_value;
if(left_pixel == vacant_pixel)
left_offset = 0;
if(right_pixel == vacant_pixel)
right_offset = 0;
if(top_pixel == vacant_pixel)
top_offset = 0;
if(bottom_pixel == vacant_pixel)
bottom_offset = 0;
col_offset = left_offset+right_offset;
row_offset = top_offset+bottom_offset;
left_offset = left_offset/(2*col_offset);
right_offset = right_offset/(2*col_offset);
top_offset = top_offset/(2*row_offset);
bottom_offset = bottom_offset/(2*row_offset);
interpolated_value[0] = left_offset*left_pixel[0] + right_offset*right_pixel[0] + top_offset*top_pixel[0]
+ bottom_offset*bottom_pixel[0];
interpolated_value[1] = left_offset*left_pixel[1] + right_offset*right_pixel[1] + top_offset*top_pixel[1]
+ bottom_offset*bottom_pixel[1];
interpolated_value[2] = left_offset*left_pixel[2] + right_offset*right_pixel[2] + top_offset*top_pixel[2]
+ bottom_offset*bottom_pixel[2];
image.at< Vec3b >(j, i) = interpolated_value;
}
}
}
}
MotionField estimateMotionByBlockMatching(const Mat& previousFrame, const Mat& currentFrame,int matching_criteria = SAD, int block_size = 8, int search_window_size = 32)
{
Mat previous_frame_resized, current_frame_resized;
float width_offset = (float)currentFrame.cols/(float)block_size;
int new_width = currentFrame.cols + (cvRound(width_offset) - width_offset)*block_size;
float height_offset = (float)currentFrame.rows/(float)block_size;
int new_height = currentFrame.rows + (cvRound(height_offset) - height_offset)*block_size;
if(new_width == currentFrame.cols && new_height == currentFrame.rows )
{
previous_frame_resized = previousFrame.clone();
current_frame_resized = currentFrame.clone();
}
else
{
resize(previousFrame, previous_frame_resized, Size(new_height,new_width));
resize(currentFrame, current_frame_resized, Size(new_height,new_width));
}
Mat previous_grayscale_image = Mat::zeros( new_height, new_width, CV_8UC1 );
Mat current_grayscale_image = Mat::zeros( new_height, new_width, CV_8UC1 );
cvtColor( previous_frame_resized , previous_grayscale_image, CV_BGR2GRAY );
cvtColor( current_frame_resized , current_grayscale_image, CV_BGR2GRAY );
MotionField motion_field(new_height, new_width, previous_frame_resized);
if(search_window_size <= block_size || search_window_size%block_size != 0)
{
cout<<"Search window size must be greater than the block size and must be a multiple of block size ";
return motion_field;
}
int number_of_blocks_cols = new_width/block_size;
int number_of_blocks_rows = new_height/block_size;
cv::Mat currentFrameBlocks[number_of_blocks_rows][number_of_blocks_cols];
//Parse through each block in current image
for(int j = 0; j < number_of_blocks_rows ; j++)
{
for(int i = 0; i < number_of_blocks_cols ; i++)
{
currentFrameBlocks[j][i] = Mat::zeros( block_size, block_size, CV_8UC1 );
int start_row_index = j*block_size;
int start_col_index = i*block_size;
int stop_row_index = start_row_index +block_size;
int stop_col_index = start_col_index +block_size;
if(start_row_index < 0)
start_row_index = 0;
if(start_col_index < 0)
start_col_index = 0;
if(stop_row_index > new_height)
stop_row_index = new_height;
if(stop_col_index > new_width)
stop_col_index = new_width;
int row = 0;
int col = 0;
//Filling up each block with the values in current frame
for(int m=start_row_index; m < stop_row_index; m++)
{
col = 0;
for(int n=start_col_index ; n < stop_col_index; n++)
{
currentFrameBlocks[j][i].at(row,col) = current_grayscale_image.at(m,n);
col++;
}
row++;
}
int center_x = (start_row_index+stop_row_index)/2;
int center_y = (start_col_index+stop_col_index)/2;
// cout << "current_center x = " << center_x << " , y=" << center_y << endl;
int start_row_search_index = center_x - search_window_size/2 ;
int start_col_search_index = center_y - search_window_size/2 ;
int stop_row_search_index = center_x + search_window_size/2;
int stop_col_search_index = center_y + search_window_size/2;
if(start_row_search_index < 0)
start_row_search_index = 0;
if(start_col_search_index < 0)
start_col_search_index = 0;
if(stop_row_search_index > new_height)
stop_row_search_index = new_height;
if(stop_col_search_index > new_width)
stop_col_search_index = new_width;
float lowest_error = 10000;
MotionVector best_motion_vector;
//Parse through the search window in previous frame and find the best motion vector
for(int m=start_row_search_index; m < stop_row_search_index; m++)
{
for(int n=start_col_search_index ; n < stop_col_search_index; n++)
{
float error = 0.0;
//Compute either Sum of Squared Differences or Sum of Absolute Differences
for(int q=0; q < block_size; q++)
{
for(int r=0 ; r < block_size; r++)
{
int current_frame_pixel = currentFrameBlocks[j][i].at(q,r);
int row_index = (m+q);
int col_index = (r+n);
// if(row_index < new_height && row_index >= 0 && col_index < new_width && col_index >= 0)
// {
int previous_frame_pixel = previous_grayscale_image.at(m+q,r+n);
if(matching_criteria == SAD)
error += abs(current_frame_pixel - previous_frame_pixel);
else
error += sqrt(pow(current_frame_pixel-previous_frame_pixel,2));
//]}
}
}
int center_x_search_window = m+block_size/2;
int center_y_search_window = n+block_size/2;
if(error(j, i);
Vec3b rgb_value_2 = image_2.at< Vec3b >(j, i);
mse += abs(rgb_value_1[0]-rgb_value_2[0])^2 + abs(rgb_value_1[1]-rgb_value_2[1])^2 + abs(rgb_value_1[2]-rgb_value_2[2])^2 ;
}
return mse/(m1*n1*3.0);
}
Mat compensateMotion(Mat previous_frame, MotionField motion_field)
{
Mat previous_frame_resized;
resize(previous_frame, previous_frame_resized, Size(motion_field.width,motion_field.height));
Mat motion_compensated_image = Mat(motion_field.height, motion_field.width, CV_8UC3, Scalar(0,0,0));//Mat::zeros( motion_field.height, motion_field.width, CV_8UC3 )*-1;
for(int j = 0; j < motion_field.height ; j++)
{
for(int i = 0; i < motion_field.width; i++)
{
MotionVector motion_vector = motion_field.get(j,i);
int predicted_row = j+motion_vector.vel_y;
int predicted_col = i+motion_vector.vel_x;
if(predicted_row < motion_field.height && predicted_col= 0 && predicted_col >= 0)
motion_compensated_image.at< Vec3b >(predicted_row, predicted_col) = previous_frame_resized.at< Vec3b >(j, i);
}
}
bilinearInterpolation(motion_compensated_image,0);
return motion_compensated_image;
}
Mat compute_prediction_error(const Mat& predicted_image, const Mat& target_image)
{
Mat target_image_resized,prediction_error_image,predicted_image_grayscale;
resize(target_image, target_image_resized, predicted_image.size());
prediction_error_image = Mat::zeros(predicted_image.rows,predicted_image.cols,CV_8UC3);
for(int j = 0; j < predicted_image.rows ; j++)
{
for(int i = 0; i < predicted_image.cols; i++)
{
Vec3b target_image_pixel, predicted_image_pixel, prediction_error_pixel;
target_image_pixel = target_image_resized.at< Vec3b >(j,i);
predicted_image_pixel = predicted_image.at< Vec3b >(j,i);
prediction_error_image.at< Vec3b >(j,i) = target_image_pixel - predicted_image_pixel;
}
}
return prediction_error_image;
}
int main(int argc, char** argv )
{
Mat grayscale_image1, grayscale_image2;
Mat foreman_images[10],football_images[3];
std::ostringstream foreman_source, football_source;
MotionField foreman_motion_fields[9],football_motion_fields[3];
Mat foreman_motion_estimated_images[9],football_motion_estimated_images[3];
Mat foreman_motion_compensated_images[9],football_motion_compensated_images[3];
Mat foreman_prediction_error_images[9],football_prediction_error_images[3];
for(int i=0;i<10;i++)
{
foreman_source.str("");
foreman_source.clear();
foreman_source << "../images/foreman/fm000" << i+1 << ".tif";
foreman_images[i] = imread(foreman_source.str(),1);
if(!foreman_images[i].data)
{
cout << "No image data =" << foreman_source.str();
break;
}
if(i>=1)
{
std::ostringstream foreman_estimation_result,foreman_compensation_result,foreman_prediction_result, mse_str;
foreman_estimation_result << "../results/motion_estimation/foreman/sad/8X32/estimated_motion_field_" << i << "_8_32.jpg";
foreman_motion_fields[i-1]=estimateMotionByBlockMatching(foreman_images[i-1],foreman_images[i],SAD,8,32);
foreman_motion_estimated_images[i-1] = foreman_motion_fields[i-1].getImage();
imwrite(foreman_estimation_result.str(),foreman_motion_estimated_images[i-1]);
foreman_compensation_result << "../results/motion_compensation/foreman/sad/8X32/motion_compensated_image_" << i <<"_8_32.jpg";
foreman_motion_compensated_images[i-1]=compensateMotion(foreman_images[i-1],foreman_motion_fields[i-1]);
foreman_prediction_error_images[i-1]=compute_prediction_error(foreman_motion_compensated_images[i-1],foreman_images[i]);
float mse = mean_squared_error(foreman_motion_compensated_images[i-1], foreman_images[i]);
mse_str << "mse = " << mse;
putText(foreman_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(foreman_compensation_result.str(),foreman_motion_compensated_images[i-1]);
foreman_prediction_result << "../results/motion_compensation/foreman/sad/8X32/prediction_error_image_" << i << "_8_32.jpg";
imwrite(foreman_prediction_result.str(),foreman_prediction_error_images[i-1]);
foreman_estimation_result.str("");
foreman_estimation_result.clear();
foreman_compensation_result.str("");
foreman_compensation_result.clear();
foreman_prediction_result.str("");
foreman_prediction_result.clear();
mse_str.str("");
mse_str.clear();
foreman_estimation_result << "../results/motion_estimation/foreman/sad/16X64/estimated_motion_field_" << i << "_16_64.jpg";
foreman_motion_fields[i-1]=estimateMotionByBlockMatching(foreman_images[i-1],foreman_images[i],SAD,16,64);
foreman_motion_estimated_images[i-1] = foreman_motion_fields[i-1].getImage();
imwrite(foreman_estimation_result.str(),foreman_motion_estimated_images[i-1]);
foreman_compensation_result << "../results/motion_compensation/foreman/sad/16X64/motion_compensated_image_" << i << "_16_64.jpg";
foreman_motion_compensated_images[i-1]=compensateMotion(foreman_images[i-1],foreman_motion_fields[i-1]);
foreman_prediction_error_images[i-1]=compute_prediction_error(foreman_motion_compensated_images[i-1],foreman_images[i]);
mse = mean_squared_error(foreman_motion_compensated_images[i-1], foreman_images[i]);
mse_str << "mse = " << mse;
putText(foreman_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(foreman_compensation_result.str(),foreman_motion_compensated_images[i-1]);
foreman_prediction_result << "../results/motion_compensation/foreman/sad/16X64/prediction_error_image_" << i << "_16_64.jpg";
imwrite(foreman_prediction_result.str(),foreman_prediction_error_images[i-1]);
foreman_estimation_result.str("");
foreman_estimation_result.clear();
foreman_compensation_result.str("");
foreman_compensation_result.clear();
foreman_prediction_result.str("");
foreman_prediction_result.clear();
mse_str.str("");
mse_str.clear();
foreman_estimation_result << "../results/motion_estimation/foreman/ssd/8X32/estimated_motion_field_" << i << "_8_32.jpg";
foreman_motion_fields[i-1]=estimateMotionByBlockMatching(foreman_images[i-1],foreman_images[i],SSD,8,32);
foreman_motion_estimated_images[i-1] = foreman_motion_fields[i-1].getImage();
imwrite(foreman_estimation_result.str(),foreman_motion_estimated_images[i-1]);
foreman_compensation_result << "../results/motion_compensation/foreman/ssd/8X32/motion_compensated_image_" << i << "_8_32.jpg";
foreman_motion_compensated_images[i-1]=compensateMotion(foreman_images[i-1],foreman_motion_fields[i-1]);
foreman_prediction_error_images[i-1]=compute_prediction_error(foreman_motion_compensated_images[i-1],foreman_images[i]);
mse = mean_squared_error(foreman_motion_compensated_images[i-1], foreman_images[i]);
mse_str << "mse = " << mse;
putText(foreman_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(foreman_compensation_result.str(),foreman_motion_compensated_images[i-1]);
foreman_prediction_result << "../results/motion_compensation/foreman/ssd/8X32/prediction_error_image_" << i << "_8_32.jpg";
imwrite(foreman_prediction_result.str(),foreman_prediction_error_images[i-1]);
foreman_estimation_result.str("");
foreman_estimation_result.clear();
foreman_compensation_result.str("");
foreman_compensation_result.clear();
foreman_prediction_result.str("");
foreman_prediction_result.clear();
mse_str.str("");
mse_str.clear();
foreman_estimation_result << "../results/motion_estimation/foreman/ssd/16X64/estimated_motion_field_" << i << "_16_64.jpg";
foreman_motion_fields[i-1]=estimateMotionByBlockMatching(foreman_images[i-1],foreman_images[i],SSD,16,64);
foreman_motion_estimated_images[i-1] = foreman_motion_fields[i-1].getImage();
imwrite(foreman_estimation_result.str(),foreman_motion_estimated_images[i-1]);
foreman_compensation_result <<"../results/motion_compensation/foreman/ssd/16X64/motion_compensated_image_" << i << "_16_64.jpg";
foreman_motion_compensated_images[i-1]=compensateMotion(foreman_images[i-1],foreman_motion_fields[i-1]);
foreman_prediction_error_images[i-1]=compute_prediction_error(foreman_motion_compensated_images[i-1],foreman_images[i]);
mse = mean_squared_error(foreman_motion_compensated_images[i-1], foreman_images[i]);
mse_str << "mse = " << mse;
putText(foreman_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(foreman_compensation_result.str(),foreman_motion_compensated_images[i-1]);
foreman_prediction_result << "../results/motion_compensation/foreman/ssd/16X64/prediction_error_image_" << i << "_16_64.jpg";
imwrite(foreman_prediction_result.str(),foreman_prediction_error_images[i-1]);
}
}
for(int i=0;i<3;i++)
{
football_source.str("");
football_source.clear();
football_source << "../images/football/football_qcif_" << i+1 << ".bmp";
football_images[i] = imread(football_source.str(),1);
if(!football_images[i].data)
{
cout << "No image data =" << football_source.str();
break;
}
if(i>=1)
{
std::ostringstream football_estimation_result,football_compensation_result,football_prediction_result, mse_str;
football_estimation_result << "../results/motion_estimation/football/sad/8X32/estimated_motion_field_" << i << "_8_32.jpg";
football_motion_fields[i-1]=estimateMotionByBlockMatching(football_images[i-1],football_images[i],SAD,8,32);
football_motion_estimated_images[i-1] = football_motion_fields[i-1].getImage();
imwrite(football_estimation_result.str(),football_motion_estimated_images[i-1]);
football_compensation_result << "../results/motion_compensation/football/sad/8X32/motion_compensated_image_" << i << "_8_32.jpg";
football_motion_compensated_images[i-1]=compensateMotion(football_images[i-1],football_motion_fields[i-1]);
football_prediction_error_images[i-1]=compute_prediction_error(football_motion_compensated_images[i-1],football_images[i]);
float mse = mean_squared_error(football_motion_compensated_images[i-1], football_images[i]);
mse_str << "mse = " << mse;
putText(football_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(football_compensation_result.str(),football_motion_compensated_images[i-1]);
football_prediction_result << "../results/motion_compensation/football/sad/8X32/prediction_error_image_" << i << "_8_32.jpg";
imwrite(football_prediction_result.str(),football_prediction_error_images[i-1]);
football_estimation_result.str("");
football_estimation_result.clear();
football_compensation_result.str("");
football_compensation_result.clear();
football_prediction_result.str("");
football_prediction_result.clear();
mse_str.str("");
mse_str.clear();
football_estimation_result << "../results/motion_estimation/football/sad/16X64/estimated_motion_field_" << i << "_16_64.jpg";
football_motion_fields[i-1]=estimateMotionByBlockMatching(football_images[i-1],football_images[i],SAD,16,64);
football_motion_estimated_images[i-1] = football_motion_fields[i-1].getImage();
imwrite(football_estimation_result.str(),football_motion_estimated_images[i-1]);
football_compensation_result << "../results/motion_compensation/football/sad/16X64/motion_compensated_image_" << i << "_16_64.jpg";
football_motion_compensated_images[i-1]=compensateMotion(football_images[i-1],football_motion_fields[i-1]);
football_prediction_error_images[i-1]=compute_prediction_error(football_motion_compensated_images[i-1],football_images[i]);
mse = mean_squared_error(football_motion_compensated_images[i-1], football_images[i]);
mse_str << "mse = " << mse;
putText(football_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(football_compensation_result.str(),football_motion_compensated_images[i-1]);
football_prediction_result << "../results/motion_compensation/football/sad/16X64/prediction_error_image_" << i << "_16_64.jpg";
imwrite(football_prediction_result.str(),football_prediction_error_images[i-1]);
football_estimation_result.str("");
football_estimation_result.clear();
football_compensation_result.str("");
football_compensation_result.clear();
football_prediction_result.str("");
football_prediction_result.clear();
mse_str.str("");
mse_str.clear();
football_estimation_result << "../results/motion_estimation/football/ssd/8X32/estimated_motion_field_" << i << "_8_32.jpg";
football_motion_fields[i-1]=estimateMotionByBlockMatching(football_images[i-1],football_images[i],SSD,8,32);
football_motion_estimated_images[i-1] = football_motion_fields[i-1].getImage();
imwrite(football_estimation_result.str(),football_motion_estimated_images[i-1]);
football_compensation_result << "../results/motion_compensation/football/ssd/8X32/motion_compensated_image_" << i << "_8_32.jpg";
football_motion_compensated_images[i-1]=compensateMotion(football_images[i-1],football_motion_fields[i-1]);
football_prediction_error_images[i-1]=compute_prediction_error(football_motion_compensated_images[i-1],football_images[i]);
mse = mean_squared_error(football_motion_compensated_images[i-1], football_images[i]);
mse_str << "mse = " << mse;
putText(football_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(football_compensation_result.str(),football_motion_compensated_images[i-1]);
football_prediction_result << "../results/motion_compensation/football/ssd/8X32/prediction_error_image_" << i << "_8_32.jpg";
imwrite(football_prediction_result.str(),football_prediction_error_images[i-1]);
football_estimation_result.str("");
football_estimation_result.clear();
football_compensation_result.str("");
football_compensation_result.clear();
football_prediction_result.str("");
football_prediction_result.clear();
mse_str.str("");
mse_str.clear();
football_estimation_result << "../results/motion_estimation/football/ssd/16X64/estimated_motion_field_" << i << "_16_64.jpg";
football_motion_fields[i-1]=estimateMotionByBlockMatching(football_images[i-1],football_images[i],SSD,16,64);
football_motion_estimated_images[i-1] = football_motion_fields[i-1].getImage();
imwrite(football_estimation_result.str(),football_motion_estimated_images[i-1]);
football_compensation_result << "../results/motion_compensation/football/ssd/16X64/motion_compensated_image_" << i << "_16_64.jpg";
football_motion_compensated_images[i-1]=compensateMotion(football_images[i-1],football_motion_fields[i-1]);
football_prediction_error_images[i-1]=compute_prediction_error(football_motion_compensated_images[i-1],football_images[i]);
mse = mean_squared_error(football_motion_compensated_images[i-1], football_images[i]);
mse_str << "mse = " << mse;
putText(football_motion_compensated_images[i-1], mse_str.str(), cvPoint(20,20),
FONT_HERSHEY_COMPLEX_SMALL, 0.5, cvScalar(0,255,0), 1, CV_AA);
imwrite(football_compensation_result.str(),football_motion_compensated_images[i-1]);
football_prediction_result << "../results/motion_compensation/football/ssd/16X64/prediction_error_image_" << i << "_16_64.jpg";
imwrite(football_prediction_result.str(),football_prediction_error_images[i-1]);
}
}
waitKey(0);
return 0;
}
The image is initially sharpened in order to boost its edges. Then bilateral filter was used to smoothen the input image while not distorting the edges.
Edges are extracted from the enhanced image using Canny Edge Detection method.
The edges might have some discontinuities and in-order to rectify that erosion and dilation operations are used for opening and closing (filling) the holes / discontinuities in the edges.
The enhanced image from the previous step contains a lot of contours. The contours from this image are extracted using OpenCV's 'findContour' function. The largest contour with 4 vertices will contain the document that has to be scanned and this contour is made the reference.
The 4 corners of the reference contour are extracted using Harris's Corner Detection mthod
The four corners are ordered in the clockwise manner. The width and height of the contour are approximated based on the location of these corners and then a reference rectangle is constructed based on this. The perspective transform between the corners of the contour and the corners of the reference rectangle is computed. Then the pixels inside the contour are warped to the reference rectangle using the perspective transform computed previously. This warped image is made the reference image for image registration purposes.
In order to enhance the warped image and make it suitable for printing purposes, the warped image is thresholded adaptively using the Gaussian Minus C method resulting in a Binary image.
To extract content from the filled version of the reference document, features are detected in both the reference document and the filled document. In this project, ORB features have been used in order to do that, as ORB is an efficient alternative to SIFT. The matching features in both the documents are computed using k-NN and further filtered to get more robust features. In order to align the filled document with the reference document, 4 matching features are required to compute the perspective transform linking both the documents. These 4 features are extracted by doing RANSAC, which will give the best combination of 4 features that minimizes the alignment error. Using the resulting perspective transform , the filled document is warped and aligned with the reference document.
Background Subtraction is required to extract the content from the filled document. This is done by using the adaptively thresholded versions of the reference document and the aligned document, and then using combinational logic to get the foreground.
Based on the filtered version of the extracted foreground, pixels in the scanned image are overlaid on top of the reference document.
The flowcharts of the techniques used are illustrated below.
The results of the operations done on the scanned image are shown below.
#include < iostream >
#include < opencv2/opencv.hpp >
#include < algorithm >
#include "image_filters.h"
#include "image_enhancement.h"
#include "edge_detection.h"
#include "harris_corner_detection.h"
#include "image_transformation.h"
#include < opencv2/features2d.hpp >
#define THRESHOLD_RATIO_H 0.24
#define THRESHOLD_RATIO_L 0.
using namespace cv;
using namespace std;
struct RansacT
{
Mat best_perspective_transform;
std::vector best_four_matches;
};
RansacT get_best_perspective_transform(vector keypoints1, vector keypoints2, std::vector matches, int iterations = 50000, float threshold = 0.175)
{
std::vector num_vector;
// set some values:
for (int i=0; i best_index_vector;
//DO RANSAC
while(count index_vector;
index_vector.push_back(index1);
index_vector.push_back(index2);
index_vector.push_back(index3);
index_vector.push_back(index4);
point1[0] = Point2f(keypoints1[matches[index1].queryIdx].pt.x, keypoints1[matches[index1].queryIdx].pt.y);
point2[0] = Point2f(keypoints2[matches[index1].trainIdx].pt.x, keypoints2[matches[index1].trainIdx].pt.y);
point1[1] = Point2f(keypoints1[matches[index2].queryIdx].pt.x, keypoints1[matches[index2].queryIdx].pt.y);
point2[1] = Point2f(keypoints2[matches[index2].trainIdx].pt.x, keypoints2[matches[index2].trainIdx].pt.y);
point1[2] = Point2f(keypoints1[matches[index3].queryIdx].pt.x, keypoints1[matches[index3].queryIdx].pt.y);
point2[2] = Point2f(keypoints2[matches[index3].trainIdx].pt.x, keypoints2[matches[index3].trainIdx].pt.y);
point1[3] = Point2f(keypoints1[matches[index4].queryIdx].pt.x, keypoints1[matches[index4].queryIdx].pt.y);
point2[3] = Point2f(keypoints2[matches[index4].trainIdx].pt.x, keypoints2[matches[index4].trainIdx].pt.y);
Mat pt = perspective_transform(point2, point1);
int matched_points = 0;
for (int i=0; i(0,0) = keypoints2[matches[i].trainIdx].pt.x;
B.at(1,0) = keypoints2[matches[i].trainIdx].pt.y;
Mat A = Mat::ones( 3, 1, CV_32F );
A.at(0,0) = keypoints1[matches[i].queryIdx].pt.x;
A.at(1,0) = keypoints1[matches[i].queryIdx].pt.y;
Mat Warped_B = pt*B;
float euclidean_distance = sqrt(pow(A.at(0,0)-Warped_B.at(0,0),2)+pow(A.at(1,0)-Warped_B.at(1,0),2));
if(euclidean_distance <= threshold)
matched_points+=1;
}
if(matched_points > best_matched_points)
{
best_matched_points = matched_points;
best_perspective_transform = pt;
best_index_vector = index_vector;
}
count++;
}
RansacT data;
data.best_perspective_transform = best_perspective_transform ;
data.best_four_matches = best_index_vector;
return data;
}
//Order the points as Top_Left, Top_Right, Bottom_Right, Bottom_Left
void order_points(std::vector corner_indices, IndexT ordered_points[])
{
int max_sum = 0;
int min_sum = 100000;
int max_diff = -100000;
int min_diff = 100000;
for (std::vector::iterator it = corner_indices.begin() ; it != corner_indices.end(); ++it)
{
int sum = it->row + it->col;
int diff = it->row - it->col;
if(sum > max_sum)
{
max_sum = sum ;
ordered_points[2] = *it;
}
if(sum < min_sum)
{
min_sum = sum ;
ordered_points[0] = *it;
}
if(diff < min_diff)
{
min_diff = diff ;
ordered_points[1] = *it;
}
if(diff > max_diff)
{
max_diff = diff ;
ordered_points[3] = *it;
}
}
}
Mat scan_document(const Mat& input_image, bool image_registration = false)
{
Mat grayscale_image, equalized_image,detected_edges, canny_detected_edges,receipt,warped_image;
float row_ratio = cvCeil(input_image.rows/640);
float col_ratio = cvCeil(input_image.cols/480);
int scale_factor = row_ratio>col_ratio?row_ratio:col_ratio;
if(scale_factor < 1)
scale_factor = 1;
Mat rgb_image_resized = Mat::zeros( input_image.rows/scale_factor, input_image.cols/scale_factor, CV_8UC3 );
if(input_image.cols > 480 || input_image.rows >640 )
rgb_image_resized = resize_image(input_image,input_image.cols/scale_factor,input_image.rows/scale_factor);
else
{
rgb_image_resized = input_image.clone();
scale_factor = 1;
}
grayscale_image = convert_to_grayscale(rgb_image_resized);
grayscale_image = gaussian_filter(grayscale_image,3, 1.4);
equalized_image = grayscale_image;
equalized_image = bilateral_filter(equalized_image, 5, 25, 15);
if(equalized_image.type() != CV_8UC1)
equalized_image.convertTo(equalized_image, CV_8UC1);
equalized_image = sharpen(equalized_image,5, 1);
canny_detected_edges = canny_edge_detection(equalized_image, THRESHOLD_RATIO_L, THRESHOLD_RATIO_H);
detected_edges = canny_detected_edges;
detected_edges = morphological_filter( detected_edges, 5, true, false);
detected_edges = morphological_filter( detected_edges, 3, false, false);
detected_edges = morphological_filter( detected_edges, 3, false, false);
detected_edges = morphological_filter( detected_edges, 7, true, false);
detected_edges = morphological_filter( detected_edges, 3, true, false);
detected_edges = morphological_filter( detected_edges, 5, false, false);
detected_edges = morphological_filter( detected_edges, 3, false, false);
vector > contours;
vector hierarchy;
vector largest_contour;
vector rectangle;
findContours( detected_edges, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
int number_of_contours = contours.size();
if (number_of_contours > 0 )
{
float previous_area = 0.0;
float current_area = 0.0;
int current_number_of_vertices = 0;
int largest_contour_id = 0;
float largest_contour_length;
float largest_contour_area;
int largest_contour_center_x;
int largest_contour_center_y;
int number_of_vertices;
Mat image_of_contours = Mat::zeros( rgb_image_resized.rows, rgb_image_resized.cols, CV_8UC1 );//rgb_image_resized;//Mat::zeros( detected_edges.size(), CV_8UC3 );;
Mat viz_corner = rgb_image_resized.clone();
Mat viz_contour = rgb_image_resized.clone();
RNG rng(12345);
// Find contour with largest area
for( int i = 0; i < number_of_contours ; i++ )
{
cv::approxPolyDP(cv::Mat(contours[i]), contours[i], cv::arcLength(cv::Mat(contours[i]), true)*0.05, true);
current_area = contourArea(contours[i]);
current_number_of_vertices = contours[i].size();
if ( current_area > previous_area )
{
largest_contour_area = current_area;
largest_contour_id = i;
number_of_vertices = current_number_of_vertices;
previous_area = current_area;
}
}
Scalar color = Scalar( 255);
drawContours( image_of_contours , contours, largest_contour_id, color, 2, 8, hierarchy, 0, Point());
drawContours( viz_contour , contours, largest_contour_id, Scalar(0, 255,0),2, 8, hierarchy, 0, Point());
Mat detected_corners;
CornerT corner_data = harris_corner_detection(image_of_contours,-0.25,30,0.03,4);
detected_corners = corner_data.corner_image;
IndexT ordered_points[4];
if(corner_data.corner_indices.size() < 4)
{
cout << "Cannot process image with less than 4 corners" << endl;
return input_image;
}
else
cout << "Image with " << corner_data.corner_indices.size() << " corners found" << endl;
order_points(corner_data.corner_indices, ordered_points);
for(int i = 0 ; i<4 ; i++)
{
circle( viz_corner, Point( ordered_points[i].col, ordered_points[i].row ), 10, Scalar(0,255,0), 2, 8, 0 );
ordered_points[i].row = cvFloor(ordered_points[i].row*scale_factor);
ordered_points[i].col = cvFloor(ordered_points[i].col*scale_factor);
}
int width1 = cvFloor(sqrt(pow(ordered_points[3].col - ordered_points[2].col,2) + pow(ordered_points[3].row - ordered_points[2].row,2)));
int width2 = cvFloor(sqrt(pow(ordered_points[1].col - ordered_points[0].col,2) + pow(ordered_points[1].row - ordered_points[0].row,2)));
int width = (width1 > width2) ? width1 : width2;
int height1 = cvFloor(sqrt(pow(ordered_points[1].col - ordered_points[2].col,2) + pow(ordered_points[1].row - ordered_points[2].row,2)));
int height2 = cvFloor(sqrt(pow(ordered_points[3].col - ordered_points[0].col,2) + pow(ordered_points[3].row - ordered_points[0].row,2)));
int height = (height1 > height2) ? height1 : height2;
Point2f input_rectangle[4], output_rectangle[4];
input_rectangle[0] = Point2f(ordered_points[0].col, ordered_points[0].row);
input_rectangle[1] = Point2f(ordered_points[1].col, ordered_points[1].row);
input_rectangle[2] = Point2f(ordered_points[2].col, ordered_points[2].row);
input_rectangle[3] = Point2f(ordered_points[3].col, ordered_points[3].row);
output_rectangle[0] = Point2f(0,0);
output_rectangle[1] = Point2f(width-1,0);
output_rectangle[2] = Point2f(width-1,height-1);
output_rectangle[3] = Point2f(0,height-1);
Mat lambda( 2, 4, CV_32FC1 );
int vacant_pixel_value = 0;
warped_image = Mat::ones(height,width,CV_8UC3)*vacant_pixel_value;
rgb_image_resized = input_image.clone();
Mat warped_grayscale_image;
Mat pt = perspective_transform(input_rectangle, output_rectangle);
warp_image(rgb_image_resized, warped_image, pt);
bilinearInterpolation(warped_image,vacant_pixel_value);
warped_grayscale_image = convert_to_grayscale(warped_image);
receipt = warped_grayscale_image;
threshold_image(gaussian_filter(warped_grayscale_image,3, 1.4), receipt, 5, false, true);
if(!image_registration)
{
namedWindow("Input", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/resized_input.jpg", rgb_image_resized );
imshow("Input",rgb_image_resized);
namedWindow("Image Enhancement", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/image_enhancement.jpg", equalized_image );
imshow("Image Enhancement",equalized_image);
namedWindow("Canny Edge Detection", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/canny_edge_detection.jpg", canny_detected_edges );
imshow("Canny Edge Detection",canny_detected_edges);
namedWindow("Morphological Operations", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/morphological_operation.jpg", detected_edges );
imshow("Morphological Operations",detected_edges);
namedWindow("Contour Detection", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/contour_detection.jpg", viz_contour );
imshow("Contour Detection",viz_contour);
namedWindow("Corner Detection", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/corner_detection.jpg", viz_corner);
imshow("Corner Detection",viz_corner);
namedWindow("Warp with Perspective Transform", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/warped_image.jpg", warped_image );
imshow("Warp with Perspective Transform",warped_image);
namedWindow("Adaptive Thresholding", WINDOW_AUTOSIZE);
imwrite( "../results/document_scan/adaptive_thresholding.jpg", receipt );
imshow("Adaptive Thresholding",receipt);
}
}
return warped_image;
}
Mat background_subtraction(Mat image1, Mat image2)
{
Mat thresholded_image1,thresholded_image2, subtracted_image1,subtracted_image2;
Mat improved_image1 = sharpen(gaussian_filter(convert_to_grayscale(image1),5, 2),5,1);
Mat improved_image2 = sharpen(gaussian_filter(convert_to_grayscale(image2),5, 1.4),5,1) ;
threshold_image(improved_image1,thresholded_image1, 5, false, true);
threshold_image(improved_image2,thresholded_image2, 5, false, true);
subtracted_image1 = Mat::zeros(image1.rows,image1.cols,CV_8UC1);
subtracted_image2 = Mat::zeros(image1.rows,image1.cols,CV_8UC1);
for(int j = 0; j < image1.rows ; j++)
for(int i = 0; i < image1.cols; i++)
{
if(thresholded_image1.at(j,i) == thresholded_image2.at(j,i) && thresholded_image1.at(j,i) == 0)
subtracted_image1.at(j,i) = 255;
else
subtracted_image1.at(j,i) = 0;
}
subtracted_image1 = morphological_filter( subtracted_image1, 5, true, false);
for(int j = 0; j < image1.rows ; j++)
for(int i = 0; i < image1.cols; i++)
{
if(subtracted_image1.at(j,i) == thresholded_image2.at(j,i) && thresholded_image2.at(j,i) == 0 )
subtracted_image2.at(j,i) = 0;
else
subtracted_image2.at(j,i) = 255;
}
imshow("Thresholded Reference Image",thresholded_image1);
imwrite( "../results/image_registration/thresholded_reference_image.jpg", thresholded_image1);
imshow("Thresholded Scanned Image",thresholded_image2);
imwrite( "../results/image_registration/thresholded_scanned_image.jpg", thresholded_image2);
subtracted_image2 = morphological_filter( subtracted_image2, 3, false, false);
subtracted_image2 = median_filter(subtracted_image2,5);
return subtracted_image2;
}
Mat foreground_addition(const Mat& image1, const Mat& image2, const Mat& foreground)
{
Mat restored_image = image1.clone();
for(int j = 10; j < image1.rows-10 ; j++)
for(int i = 10; i < image1.cols-10; i++)
{
if(foreground.at(j,i) == 0 )
{
if(image1.type() == CV_8UC3)
restored_image.at(j,i) = image2.at(j,i);
else
restored_image.at(j,i) = image2.at(j,i);
}
}
return restored_image;
}
Mat image_matching(Mat reference_image, Mat scanned_image)
{
int size = 0;
int count = 0;
int max_iterations = 20;
std::vector matches;
vector keypoints1, keypoints2;
while(size < 500 && count < max_iterations)
{
Ptr detector = ORB::create(5000 + count*500);
Ptr extractor = ORB::create();
detector->detect(reference_image, keypoints1);
detector->detect(scanned_image, keypoints2);
Mat descriptors1,descriptors2;
extractor->compute(reference_image, keypoints1,descriptors1);
extractor->compute(scanned_image, keypoints2,descriptors2);
vector< vector > matches12, matches21;
Ptr matcher = DescriptorMatcher::create("BruteForce-Hamming");
matcher->knnMatch( descriptors1, descriptors2, matches12, 2 );
matcher->knnMatch( descriptors2, descriptors1, matches21, 2 );
std::vector good_matches1, good_matches2;
// Ratio Test proposed by David Lowe
float ratio = 0.8;
for(int i=0; i < matches12.size(); i++){
if(matches12[i][0].distance < ratio * matches12[i][1].distance)
good_matches1.push_back(matches12[i][0]);
}
for(int i=0; i < matches21.size(); i++){
if(matches21[i][0].distance < ratio * matches21[i][1].distance)
good_matches2.push_back(matches21[i][0]);
}
// Symmetric Test
std::vector better_matches;
for(int i=0; i=2)
{
try
{
registration = atoi(argv[1]);
}
catch (Exception e)
{
//do nothing
}
}
if(registration == 1)
{
image2 = imread( "../images/image_registration/scanned_image.jpg", 1 );
image1 = imread( "../images/image_registration/reference_image.jpg", 1);
if ( !image1.data || !image2.data)
{
printf("No image data \n");
return -1;
}
Mat scanned_image = image2;
Mat reference_image = scan_document(image1,true);
Mat warped_scanned_image = image_matching(reference_image,scanned_image);
Mat foreground = background_subtraction(reference_image, warped_scanned_image);
Mat restored_image = foreground_addition(reference_image, warped_scanned_image, foreground);
Mat thresholded_restored_image;
threshold_image(convert_to_grayscale(restored_image),thresholded_restored_image, 6, false, true);
imshow("Reference Image",reference_image);
imwrite( "../results/image_registration/reference_image.jpg", reference_image);
imshow("Warped Scanned Image",warped_scanned_image);
imwrite( "../results/image_registration/warped_scanned_image.jpg", warped_scanned_image);
imshow("Background Subtraction",foreground);
imwrite( "../results/image_registration/background_subtraction.jpg", foreground);
imshow("Thresholded Restored Image",thresholded_restored_image);
imwrite( "../results/image_registration/thresholded_restored_image.jpg", thresholded_restored_image);
imshow("Restored Image",restored_image);
imwrite( "../results/image_registration/restored_image.jpg", restored_image);
}
else
{
image1 = imread( "../images/document_scan/scanned_image.jpg", 1 );
if (!image1.data)
{
printf("No image data \n");
return -1;
}
scan_document(image1);
}
waitKey(0);
return 0;
}
I am a second year master's degree student in Robotics, at University of Maryland, College Park (UMD) , specializing in Vision based Control of Autonomous Vehicles. I will be graduating in May 2017. I look forward to work at the intersection of Robotics, Computer Vision and Machine Learning.
Roboticist