Image Convolution Visualizer

Image Convolution Visualizer

In the field of image processing, the application of convolution matrices to images stands as a cornerstone technique. This post focuses on illustrating this process through an interactive visualization, utilizing the Quadrille filter method alongside custom display functions. The visualization aims to clarify how various masks affect individual pixels in a source image, making the intricate process of image convolution more comprehensible. By offering an interactive experience, it sheds light on the significant impact these matrices have on digital images, providing a deeper insight into a key image manipulation method.

The below interactive visualization demonstrates the process of applying image convolution to a source image, utilizing a specific mask to generate a target image. This visualization offers an interactive display where the pixel under the mouse, along with its neighboring pixels in the source image (displayed on the left), are highlighted to illustrate their role in computing the corresponding pixel in the target image (shown on the right). Beneath this, on the left side, the mask is displayed with its numerical values and a green shading that reflects the relative values of each cell. Correspondingly, on the right side, the visualization showcases the component-wise multiplication of the mask values with the neighboring pixels (from above left), culminating in the final value of the target image’s pixel (highlighted on the right). It is important to note that the dimensions of the neighboring pixels always match the kernel size of the mask.

(move mouse on source or target images and see how a convolution is applied)

code
Quadrille.textColor = 'magenta';
Quadrille.outline = 'lime';
let array;
let source, target;
let sources = {};
let targets = {};
let mask, stringMask;
let masks, maskSelector;
let image;
let resolution;
let alpha;
const size = 345;
const maskLength = 300;
const length = width => width / (2 ** resolution.value());
const toString = cell => cell instanceof p5.Color ? int(red(cell)).toString() : 
                         typeof cell === 'number' ? cell.toString() : '0';
const toNumber = cell => typeof cell === 'string' ? eval(cell) : Number(cell);
const removeRows = (quadrille, dimension) => {
  for (let i = 0; i < dimension; i++) {
    quadrille.delete(0);
    quadrille.delete(quadrille.height - 1);
  }
}
const crop = (quadrille, dimension) => {
  removeRows(quadrille, dimension);
  quadrille.transpose();
  removeRows(quadrille, dimension);
  quadrille.transpose();
}

function createNumberQuadrille(q) {
  const clone = q.clone();
  visitQuadrille(q, (row, col) => clone.fill(row, col, toNumber(q.read(row, col))));
  clone.dimension = (clone.width - 1) / 2;
  clone.min = clone.max = clone.read(0, 0);
  for (const cell of clone) {
    clone.min = (cell.value < clone.min) ? cell.value : clone.min;
    clone.max = (cell.value > clone.max) ? cell.value : clone.max;
  }
  clone.numberDisplay = numberDisplay.bind(clone);
  return clone;
}

function createStringQuadrille(q) {
  const clone = q.clone();
  visitQuadrille(q, (row, col) => clone.fill(row, col, toString(q.read(row, col))));
  return clone;
}

function numberDisplay({graphics, value, outline, outlineWeight, cellLength}) {
  const numberColor = 'lime';
  colorMode(RGB, 255);
  noStroke();
  fill(color(red(numberColor), green(numberColor), blue(numberColor),
             (this.min === this.max) ? 256 / this.size :
             map(value, this.min, this.max, 0, 255)));
  rect(0, 0, cellLength, cellLength);
}

function stringDisplay({graphics, value, cellLength, textColor, textZoom}) {
  textZoom = value.length === 1 ? 0.89 : textZoom;
  graphics.noStroke();
  graphics.fill(textColor);
  graphics.textSize(cellLength * textZoom / value.length);
  graphics.textAlign(CENTER, CENTER);
  graphics.text(value, 0, 0, cellLength, cellLength);
}

function preload() {
  image = loadImage('/images/mandrill.png');
}

function setup() {
  createCanvas(size * 2, size + maskLength);
  image.filter(GRAY);
  resolution = createSelect();
  resolution.position(15, 15);
  resolution.option('low', 3);
  resolution.option('medium', 4);
  resolution.option('high', 5);
  resolution.selected(4);
  resolution.changed(() => {
    source = sources[resolution.value()];
    target = targets[resolution.value()];
  });
  masks = {
    'identity': [
      ['0', '0', '0'],
      ['0', '1', '0'],
      ['0', '0', '0']
    ],
    'ridge': [
      ['-2', '-1', '0'],
      ['-1', '1', '1'],
      ['0', '1', '2']
    ],
    'sharpen': [
      ['0', '-1', '0'],
      ['-1', '5', '-1'],
      ['0', '-1', '0']
    ],
    'box blur': [
      ['1/9', '1/9', '1/9'],
      ['1/9', '1/9', '1/9'],
      ['1/9', '1/9', '1/9']
    ],
    'gaussian blur': [
      ['1/16', '1/8', '1/16'],
      ['1/8', '1/4', '1/8'],
      ['1/16', '1/8', '1/16']
    ],
    'gaussian blur 5x5': [
      ['1/256', '4/256', '6/256', '4/256', '1/256'],
      ['4/256', '16/256', '24/256', '16/256', '4/256'],
      ['6/256', '24/256', '36/256', '24/256', '6/256'],
      ['4/256', '16/256', '24/256', '16/256', '4/256'],
      ['1/256', '4/256', '6/256', '4/256', '1/256']
    ],
    'unsharp 5x5': [
      ['-1/256', '-4/256', '-6/256', '-4/256', '-1/256'],
      ['-4/256', '-16/256', '-24/256', '-16/256', '-4/256'],
      ['-6/256', '-24/256', '476/256', '-24/256', '-6/256'],
      ['-4/256', '-16/256', '-24/256', '-16/256', '-4/256'],
      ['-1/256', '-4/256', '-6/256', '-4/256', '-1/256']
    ]
  }
  maskSelector = createSelect();
  maskSelector.position(15, height - 20);
  for (let mask in masks) {
    maskSelector.option(mask);
  }
  maskSelector.selected('ridge');
  maskSelector.changed(update);
  update();
}

function update() {
  stringMask = createQuadrille(masks[maskSelector.value()]);
  mask = createNumberQuadrille(stringMask);
  for (let pow = 3; pow <= 5; pow++) {
    let quadrille = createQuadrille(2 ** pow + 2 * mask.dimension, image, false);
    let clone = quadrille.clone();
    clone.filter(mask);
    crop(quadrille, mask.dimension);
    crop(clone, mask.dimension);
    sources[pow] = quadrille;
    targets[pow] = clone;
  }
  source = sources[resolution.value()];
  target = targets[resolution.value()];
}

function draw() {
  background(0);
  drawQuadrille(source, {cellLength: length(width / 2), outlineWeight: length(8),
                         outline: 'cyan', tileDisplay: 0});
  drawQuadrille(target, {x: width / 2, cellLength: length(width / 2),
                         outlineWeight: length(8), tileDisplay: 0});
  target.colOffset = width / (2 * length(width / 2));
  displayMask();
  if (mouseX > 0 && mouseX < width && mouseY > 0 && mouseY <= height / 2) {
    const col = mouseX < width / 2 ? source.mouseCol : target.mouseCol;
    const ring = displayRing(source, source.mouseRow, col);
    displayHint(ring);
    displayPixel(target, target.mouseRow, col);
  }
}

function displayMask(x = 0,
                     y = width / 2,
                     cellLength = maskLength / mask.width) {
  drawQuadrille(mask, {x, y, cellLength, numberDisplay: mask.numberDisplay,
                       outlineWeight: length(8)});
  drawQuadrille(stringMask, {x, y, cellLength, numberDisplay: 0, tileDisplay: 0});
}

function displayRing(quadrille = source,
                     row = quadrille.mouseRow,
                     col = quadrille.mouseCol,
                     cellLength = length(width / 2)) {
  const dimension = mask.dimension;
  const ring = quadrille.ring(row, col, dimension);
  let stringRing = createStringQuadrille(ring);
  row += (quadrille.rowOffset ?? 0) - dimension;
  col += (quadrille.colOffset ?? 0) - dimension;
  drawQuadrille(mask, {row, col, numberDisplay: mask.numberDisplay,
                       cellLength, outlineWeight: length(8)});
  drawQuadrille(stringRing, {row, col, tileDisplay: 0, cellLength, textZoom: 1.8});
  return stringRing;
}

function displayHint(stringRing,
                     x = width / 2,
                     y = width / 2,
                     cellLength = maskLength / mask.width) {
  const stringHint = createQuadrille(mask.width, mask.height);
  visitQuadrille(stringRing, (row, col) => 
    stringHint.fill(row, col, stringRing.read(row, col) + '*' +
                              stringMask.read(row, col)));
  const hint = createNumberQuadrille(stringHint);
  drawQuadrille(hint, {x, y, cellLength,
                       numberDisplay: hint.numberDisplay,
                       outlineWeight: length(8)});
  drawQuadrille(stringHint, {x, y, cellLength, numberDisplay: 0,
                             tileDisplay: 0, textZoom: 1.6});
}

function displayPixel(quadrille = target,
                      row = quadrille.mouseRow,
                      col = quadrille.mouseCol,
                      cellLength = length(width / 2)) {
  const pixel = quadrille.ring(row, col, 0);
  const stringPixel = createStringQuadrille(pixel);
  row += quadrille.rowOffset ?? 0;
  col += quadrille.colOffset ?? 0;
  drawQuadrille(pixel, {row, col, numberDisplay: mask.numberDisplay,
                        cellLength, outlineWeight: length(8)});
  drawQuadrille(stringPixel, {row, col, cellLength, tileDisplay: 0,
                              textZoom: 1.8, stringDisplay});
}

Setup

The setup function generates and stores several kernel masks, then invokes the update function which populates dictionaries of source and target quadrilles for the mandrill image at multiple resolutions, using createQuadrille(width, image, coherence) and filter, and initializes the source and target quadrilles according to the chosen resolution.

function update() {
  // create mask from the selector's value
  stringMask = createQuadrille(masks[maskSelector.value()]);
  // convert the string mask to a numerical format
  // (details of this method are covered in a subsequent section)
  mask = createNumberQuadrille(stringMask);
  // loop to create source and target quadrilles at various resolutions
  for (let pow = 3; pow <= 5; pow++) {
    // create the source quadrille at a specific resolution
    let quadrille = createQuadrille(2 ** pow + 2 * mask.dimension, image, false);
    // clone the source and apply the filter to get the target
    let clone = quadrille.clone();
    clone.filter(mask);
    // crop both quadrilles to remove the border
    crop(quadrille, mask.dimension);
    crop(clone, mask.dimension);
    // store the processed quadrilles
    sources[pow] = quadrille;
    targets[pow] = clone;
  }
  // set the current source and target based on the chosen resolution
  source = sources[resolution.value()];
  target = targets[resolution.value()];
}

Draw

The draw function employs the source and target quadrilles to display both the original and the filtered images at a selected resolution. Depending on the mouse position over these quadrilles, the function highlights the respective pixel in the target image and presents several interactive elements. These elements facilitate understanding the convolution process: they include the current mask, the ring of neighboring source pixels (matching the dimension of the mask), the hint showcasing component-wise multiplication between the neighbor and the mask, and the highlighted pixel in the target image.

function draw() {
  background(0);
  // display original image on the left
  drawQuadrille(source, {cellLength: length(width / 2), outlineWeight: length(8),
                         outline: 'cyan', tileDisplay: 0});
  // display filtered image on the right
  drawQuadrille(target, {x: width / 2, cellLength: length(width / 2),
                         outlineWeight: length(8), tileDisplay: 0});
  // set column offset for target image
  target.colOffset = width / (2 * length(width / 2));
  // visualize the current convolution mask
  displayMask();
  // process mouse interactions within the image area
  if (mouseX > 0 && mouseX < width && mouseY > 0 && mouseY <= height / 2) {
    const col = mouseX < width / 2 ? source.mouseCol : target.mouseCol;
    // display the ring of source pixels adjacent to the current mouse position
    const ring = displayRing(source, source.mouseRow, col);
    // show the component-wise multiplication results leading to the target pixel
    displayHint(ring);
    // highlight the resulting pixel in the target image
    displayPixel(target, target.mouseRow, col);
  }
}

Each interactive element in the visualization, including the mask, the ring, the hint, and the highlighted pixel, offers two distinct modes of visualization based on the cell values. One mode presents a shade of green, varying in intensity to indicate the cell’s value relative to the minimum and maximum within the element. The other mode explicitly displays the cell’s numerical value. To achieve these dual representations, the following helper functions are employed:

function createNumberQuadrille(q) {
  const clone = q.clone();
  // convert each cell in the quadrille to a number and store in the clone
  visitQuadrille(q, (row, col) => clone.fill(row, col, toNumber(q.read(row, col))));
  // set the dimension for the convolution process
  clone.dimension = (clone.width - 1) / 2;
  // initialize and find the minimum and maximum values in the quadrille
  clone.min = clone.max = clone.read(0, 0);
  for (const cell of clone) {
    clone.min = (cell.value < clone.min) ? cell.value : clone.min;
    clone.max = (cell.value > clone.max) ? cell.value : clone.max;
  }
  // bind the custom number display function to the clone for visualization
  clone.numberDisplay = numberDisplay.bind(clone);
  return clone;
}
function createStringQuadrille(q) {
  const clone = q.clone();
  // convert each cell in the quadrille to a string for display
  visitQuadrille(q, (row, col) => clone.fill(row, col, toString(q.read(row, col))));
  return clone;
}

Display Mask

The displayMask function visualizes the current convolution mask on the canvas. It draws both the numerical and string representations of the mask, enabling a clear understanding of the mask values in the convolution process.

function displayMask(x = 0,
                     y = width / 2,
                     cellLength = maskLength / mask.width) {
  // draw the numerical mask representation
  drawQuadrille(mask, {x, y, cellLength, numberDisplay: mask.numberDisplay,
                       outlineWeight: length(8)});
  // draw the string representation of the mask
  drawQuadrille(stringMask, {x, y, cellLength, numberDisplay: 0, tileDisplay: 0});
}

Display Ring

The displayRing function showcases the ring of pixels surrounding the mouse position in the source image. This ring is crucial for understanding how each pixel in the target image is computed.

function displayRing(quadrille = source,
                     row = quadrille.mouseRow,
                     col = quadrille.mouseCol,
                     cellLength = length(width / 2)) {
  // calculate the dimension of the mask and create the ring
  const dimension = mask.dimension;
  const ring = quadrille.ring(row, col, dimension);
  let stringRing = createStringQuadrille(ring);
  // adjust row and column positions
  row += (quadrille.rowOffset ?? 0) - dimension;
  col += (quadrille.colOffset ?? 0) - dimension;
  // display the numerical mask overlaying the ring
  drawQuadrille(mask, {row, col, numberDisplay: mask.numberDisplay,
                       cellLength, outlineWeight: length(8)});
  // display the string representation of the ring
  drawQuadrille(stringRing, {row, col, tileDisplay: 0, cellLength, textZoom: 1.8});
  return stringRing;
}

Display Hint

The displayHint function shows the component-wise multiplication between the mask and the ring. This visual cue helps in understanding how the convolution operation is performed to obtain the target pixel.

function displayHint(stringRing,
                     x = width / 2,
                     y = width / 2,
                     cellLength = maskLength / mask.width) {
  // create a quadrille for the hint visualization
  const stringHint = createQuadrille(mask.width, mask.height);
  // fill the hint with the multiplication of ring and mask values
  visitQuadrille(stringRing, (row, col) => 
    stringHint.fill(row, col, stringRing.read(row, col) + '*' +
                              stringMask.read(row, col)));
  // convert the hint to a numerical representation
  const hint = createNumberQuadrille(stringHint);
  // display the numerical hint
  drawQuadrille(hint, {x, y, cellLength,
                       numberDisplay: hint.numberDisplay,
                       outlineWeight: length(8)});
  // display the string representation of the hint
  drawQuadrille(stringHint, {x, y, cellLength, numberDisplay: 0,
                             tileDisplay: 0, textZoom: 1.6});
}

Display Pixel

The displayPixel function highlights the resulting pixel in the target image. It shows both the numerical and string values of the pixel, providing insight into the final output of the convolution process.

function displayPixel(quadrille = target,
                      row = quadrille.mouseRow,
                      col = quadrille.mouseCol,
                      cellLength = length(width / 2)) {
  // extract the pixel at the specified position
  const pixel = quadrille.ring(row, col, 0);
  const stringPixel = createStringQuadrille(pixel);
  // adjust row and column positions
  row += quadrille.rowOffset ?? 0;
  col += quadrille.colOffset ?? 0;
  // display the numerical representation of the pixel
  drawQuadrille(pixel, {row, col, numberDisplay: mask.numberDisplay,
                        cellLength, outlineWeight: length(8)});
  // display the string representation of the pixel
  drawQuadrille(stringPixel, {row, col, cellLength, tileDisplay: 0,
                              textZoom: 1.8, stringDisplay});
}

Further Exploration

The Image Convolution Visualizer opens up various avenues for deeper exploration and experimentation:

  • Experiment with Custom Masks: Try designing your own convolution masks by modifying the values in the masks object. Observe how different kernel configurations affect the source image’s features, such as edges, textures, or colors.

  • Higher-Dimensional Masks: Extend the concept to larger kernel sizes, such as 7x7 or 9x9, and explore their impact on the convolution process. This requires scaling the mask visualization and handling additional computational requirements. Refer to Kernel Size and Effects for details on larger masks.

  • Interactive Kernel Editing: Enhance interactivity by allowing users to adjust mask values dynamically through input fields or sliders. This would provide immediate visual feedback and a hands-on way to learn about convolutions. Check out this Interactive Convolution Explorer for inspiration.

  • Explore Nonlinear Filters: Investigate nonlinear filtering techniques, such as median filtering or bilateral filtering, and compare their effects to linear convolution. These filters are widely used for noise reduction and image smoothing.

  • Real-World Applications: Study how convolution is applied in real-world scenarios, such as image sharpening, noise reduction, edge detection, or feature extraction in computer vision. Extend the demo to apply convolution to other types of images (e.g., real-world photographs or scanned documents).

  • Convolution in Neural Networks: For a broader perspective, research the role of convolution in convolutional neural networks (CNNs). Explore how filters are learned during training and their role in feature detection for tasks like object recognition and image classification.

  • Performance Optimization: Experiment with performance optimizations for larger images or higher resolutions by leveraging WebGL shaders or parallel processing techniques. Learn about GPU-based convolution from this guide.

Each of these explorations can deepen your understanding of image convolution and its applications, while providing an opportunity to expand the visualizer’s functionality.

References

Quadrille API

p5 API

  • createCanvas — Creates a drawing canvas on which all the 2D and 3D visuals are rendered in p5.js.
  • background — Sets the color used for the background of the canvas.
  • createSelect — Creates a dropdown menu element for user selection.
  • textAlign — Sets how text is aligned horizontally and vertically within the canvas.
  • textSize — Sets the size of the text font for subsequent text rendering.
  • text — Draws text at a specified position on the canvas.

Further Reading