Karhunen-Loeve (KL) Transform using C++ and OpenCV

Question

I am implementing a bunch of image compression algorithms as a hobby (repo: GitHub), and I wrote this implementation of the K-L transform:

kl_transform.hpp

#pragma once

#include <opencv2/opencv.hpp>

#include <vector>
#include <tuple>
#include <algorithm>

std::tuple<cv::Mat1d, cv::Mat1d, cv::Mat1d, cv::Mat1d>
kl_transform(cv::Mat& img, uint block_size, uint top_k);

cv::Mat1b kl_reconstruct(uint img_dim, cv::Mat& coeffs, 
                         cv::Mat& mu, cv::Mat& gamma_topk, uint block_size);

// It is recommended to keep the block size pretty small, <= 16, because
// finding the eigenvectors of a 32 x 32 matrix is really expensive since it
// is an O(N^3) operation and will take a huge amount of time.

kl_transform.cpp

#include "kl_transform.hpp"

cv::Mat1d find_block_mean(const cv::Mat& A, uint block_size) {
    /*
    IMPORTANT: Right now we are dealing with only square, grayscale images
    and the block_size MUST be a factor of the image size for the function 
    to work.
    */
    if(A.rows != A.cols) throw std::invalid_argument("Input image should be square and grayscale!");
    if(A.rows % block_size != 0) throw std::invalid_argument("Block size should be a factor of the image size");
    
    int n = A.rows;
    std::vector<double> mean_vec(block_size * block_size); //default value 0
    int d = n / block_size;
    int num_blocks = (n / block_size) * (n / block_size);

    for(int i = 0; i < d; i++){
        for(int j = 0; j < d; j++){
            /*
            The (i, j)-th block of size block_size x block_size starts from 
            row block_size * i and from column block_size * j. Considering these
            as the origin (0th row and col respectively), A[u][v] maps to 
            mean[u * block_size + v]
            */
            
            // SINGLE BLOCK PROCESSING
            for(int u = 0; u < block_size; u++){
                for(int v = 0; v < block_size; v++){
                    int r = block_size * i + u;
                    int c = block_size * j + v;

                    mean_vec[u * block_size + v] += A.at<double>(r, c);
                }
            }
        }
    }

    cv::Mat1d mean_mat = cv::Mat(mean_vec).clone();
    mean_mat /= num_blocks;
    mean_mat = mean_mat.reshape(1, 1);
    return mean_mat; 
}

cv::Mat1d find_img_cov(const cv::Mat& A, uint block_size) {

    cv::Mat1d mu = find_block_mean(A, block_size);

    int n = A.rows;
    int d = n / block_size;
    cv::Mat1d cov = cv::Mat1d::zeros(block_size * block_size, block_size * block_size);
    int num_blocks = (n / block_size) * (n / block_size);

    cv::Mat1d flattened(1, block_size * block_size);
    cv::Mat1d centered(1, block_size * block_size);
    cv::Mat1d outer_prod(block_size * block_size, block_size * block_size);

    for(int i = 0; i < d; i++){
        for(int j = 0; j < d; j++){

            // a different type of block processing
            cv::Rect block(j * block_size, i * block_size,
                            block_size, block_size);
            cv::Mat submatrix = A(block);

            flattened = submatrix.clone().reshape(1, 1); 
            centered = flattened - mu;
            outer_prod = centered.t() * centered;
            
            cov += outer_prod;
        }
    }

    cov /= (num_blocks - 1); // for unbiased estimator
    return cov;
}

cv::Mat1d ordered_diagonalization(const cv::Mat& A) {
    cv::Mat eigenvalues;
    cv::Mat eigenvectors;

    /*
    By a miracle we get exactly what we are looking for: a matrix with
    its rows containing the normalized eigenvectors arranged according
    to the descending order of their corresponding eigenvalues
    */
    cv::eigen(A, eigenvalues, eigenvectors);
    return eigenvectors;
}

std::tuple<cv::Mat1d, cv::Mat1d, cv::Mat1d, cv::Mat1d>
kl_transform(cv::Mat& img, uint block_size, uint top_k) {
    /*
    Returns 
    1. a matrix of size NUM_BLOCKS x top_k where each row defines
    the set of transform coefficients for each block of size block_size x block_size
    2. The covariance matrix of the original image
    3. The mu vector
    4. The gamma_topk matrix
    */

    if (top_k > static_cast<uint>(block_size * block_size))
        throw std::invalid_argument("top_k cannot exceed block_size^2");

    cv::Mat1d A;
    img.convertTo(A, CV_64F);

    int n = A.rows;
    int d = n / block_size;
    int num_blocks = (n / block_size) * (n / block_size);

    cv::Mat1d mu = find_block_mean(A, block_size);
    cv::Mat1d cov = find_img_cov(A, block_size);
    cv::Mat1d gamma = ordered_diagonalization(cov);

    cv::Mat1d gamma_topk = gamma.rowRange(0, top_k).clone();

    cv::Mat1d tf_coeffs = cv::Mat1d::zeros(num_blocks, top_k);
    int idx = 0;

    for(int i = 0; i < d; i++){
        for(int j = 0; j < d; j++){
            cv::Rect block(j * block_size, i * block_size,
                            block_size, block_size);
            cv::Mat1d submatrix = A(block);
            cv::Mat1d flattened = submatrix.clone().reshape(1, 1); 
            cv::Mat1d b = flattened - mu;
            cv::Mat1d y = gamma_topk * b.t();

            for(int k = 0; k < top_k; k++) {
                tf_coeffs(idx, k) = y.at<double>(k, 0);
            }
            idx++;
        }
    }
    return std::make_tuple(tf_coeffs, cov, mu, gamma_topk);
}

cv::Mat1b kl_reconstruct(uint img_dim, cv::Mat& coeffs, cv::Mat& mu, cv::Mat& gamma_topk, uint block_size) {
    // int n = static_cast<int>(std::sqrt(coeffs.rows)) * block_size;
    int n = img_dim;
    int d = n / block_size;
    int num_blocks = (n / block_size) * (n / block_size);

    cv::Mat1b recon = cv::Mat1b::zeros(n, n);
    cv::Mat1d reconstructed_block_matrix = gamma_topk.t() * coeffs.t();

    cv::Mat1d mu_col = mu.reshape(1, mu.total());
    cv::Mat repeated_mu = cv::repeat(mu_col, 1, reconstructed_block_matrix.cols);
    reconstructed_block_matrix += repeated_mu;

    for(int i = 0; i < d; i++){
        for(int j = 0; j < d; j++){
            int block_number = i * d + j;
            for(int u = 0; u < block_size; ++u){
                for(int v = 0; v < block_size; ++v){
                    int r = i * block_size + u;
                    int c = j * block_size + v;
                    
                    recon.at<uchar>(r, c) = static_cast<uchar>(
                                                    std::clamp(
                                                        std::round(
                                                            reconstructed_block_matrix(u * block_size + v, block_number)
                                                        ), 0.0, 255.0
                                                    )
                                            );
                }
            }
        }
    }

    return recon;
}

I would love any advice for good C++ practices, software design principles, or anything else that you may find valuable advice. The code works, and can be tested with:

#include <opencv2/opencv.hpp>
#include <iostream>
#include "../transform/kl_transform.hpp"

int main() {
    cv::Mat1b img = cv::imread("../test_imgs/<YOUR_IMAGE_NAME>.bmp", cv::IMREAD_GRAYSCALE);
    if (img.empty()) {
        std::cerr << "Error: could not load image\n";
        return -1;
    }

    std::cout << "Width: " << img.cols 
              << " Height: " << img.rows 
              << " Channels: " << img.channels() << std::endl;
    cv::imshow("Display", img);
    cv::waitKey(0);

    const int BLOCK_SIZE = 16;
    const int LEVELS = 40;

    std::tuple<cv::Mat1d, cv::Mat1d, cv::Mat1d, cv::Mat1d> tup = 
                        kl_transform(img, BLOCK_SIZE, LEVELS);
    cv::Mat1d coeffs = std::get<0>(tup);
    cv::Mat1b decoded = kl_reconstruct(img.rows, coeffs, std::get<2>(tup), std::get<3>(tup), BLOCK_SIZE);
    cv::imshow("Decoded", decoded);
    cv::waitKey(0);
}

toolic · Accepted Answer · 2026-05-21 14:34:21Z

Layout

It is great that the code uses consistent indentation and blank lines.

However, this line break seems unnecessary:

cv::Rect block(j * block_size, i * block_size,
                block_size, block_size);

This is more expected (on 1 line):

cv::Rect block(j * block_size, i * block_size, block_size, block_size);

This line is sufficiently short, and it is still shorter than other lines in your code.

The indentation of this code seems extreme, requiring unnecessary horizontal scrolling on a typical screen:

                recon.at<uchar>(r, c) = static_cast<uchar>(
                                                std::clamp(
                                                    std::round(
                                                        reconstructed_block_matrix(u * block_size + v, block_number)
                                                    ), 0.0, 255.0
                                                )
                                        );

Perhaps your editor automatically indents it that way, which is unfortunate.

DRY

This expression is repeated 12 times: n / block_size. It would be nice to reduce the repetition by assigning it to a variable.

Also, (n / block_size) * (n / block_size) can be replaced with a call to a squaring function.

Comments

Delete commented-out code to reduce clutter:

// int n = static_cast<int>(std::sqrt(coeffs.rows)) * block_size;

Naming

There are many variables with terse, vague names, such as n and d. Single-letter names are common for loop iterators, like i here:

for(int i = 0; i < d; i++){

But, d could be more descriptive.

As an opinion - I like the layout where x,y are separated from width,height. Each to one's own, of course! — Toby Speight
– Toby Speight, Commented 19 hours ago

Chris · Accepted Answer · 2026-05-22 02:41:09Z

5

Your use of transitive inclusion in your main source file is not ideal. Since you're using std::tuple, that file should include <tuple>. That header is already guarded against multiple inclusion, so there is no downside to doing this.

Rather than calling std::get several times you might use a structured binding to give names to the values returned from kl_transform and to avoid revert using the type std::tuple, rendering my first point moot.

Further reading: Explicit direct include vs non-contractual transitive include on Stack Overflow.

edited 7 hours ago

answered 20 hours ago

Chris

20k1 gold badge15 silver badges97 bronze badges

1

\$\begingroup\$ An alternative to including <tuple> is to use auto in main() to deduce tup without needing the type name. That has benefits and drawbacks - shorter code, but not necessarily clearer. A structured binding is almost certainly the best option, and also doesn't need the header to be included. \$\endgroup\$

Toby Speight
– Toby Speight

2026-05-21 15:34:20 +00:00
Commented 18 hours ago

Add a comment |

Toby Speight · Accepted Answer · 2026-05-21 15:30:59Z

Internal helper functions such as find_block_mean() should not have external linkage - put them in an anonymous namespace or use C-style static keyword.

When working with OpenCV in C++, it's helpful to use cv::Mat_<> template instead of plain cv::Mat if the type of the elements is fixed. I see that we do that (as cv::Mat1d) for our internal types, but it looks like we missed some opportunities. I might be wrong, given that I didn't get deep enough to be sure.

In this loop, we're comparing signed values u and v against unsigned block_size (and my favourite compiler options warn about that):

            // SINGLE BLOCK PROCESSING
            for(int u = 0; u < block_size; u++){
                for(int v = 0; v < block_size; v++){
                    int r = block_size * i + u;
                    int c = block_size * j + v;

                    mean_vec[u * block_size + v] += A.at<double>(r, c);
                }
            }

Better to be consistent:

            // SINGLE BLOCK PROCESSING
            for (uint u = 0;  u < block_size;  ++u) {
                auto const r = block_size * i + u;
                for (uint v = 0;  v < block_size;  ++v) {
                    auto const c = block_size * j + v;
                    mean_vec[u * block_size + v] += A.at<double>(r, c);
                }
            }

Here, I have also hoisted r to the outer loop. Although good compilers can do that for you, I think it helps the reader if we do it in the source.

This cast is unnecessary:

    if (top_k > static_cast<uint>(block_size * block_size))

block_size * block_size is already uint type; unnecessary casts add clutter to the code and shout "wolf" to readers.

This variable in kl_reconstruct() is never used and can be removed:

    int num_blocks = (n / block_size) * (n / block_size);

Similar variables in other functions might benefit from being declared const.

In main(), we return -1 on error. It's more portable to return small positive values (on Linux, for example, values outside the range 0-255 are not preserved). The constant EXIT_FAILURE (in <cstdlib>) is great for this, and makes the meaning more obvious to readers too.

Avoid std::endl, since it's often used unnecessarily - I would write a \n and explicit std::flush so that it's clear we want that output to be fully written before the next action. The statement where it's used appears to be more appropriate for std::clog stream than standard output.

Minor irritation in the layout: apart from giving keywords and operators more space to breathe, please get rid of the all trailing whitespace. To quote from another language's style guide:

Avoid trailing whitespace anywhere. Because itâ€™s usually invisible, it can be confusing: e.g. a backslash followed by a space and a newline does not count as a line continuation marker. Some editors donâ€™t preserve it and many projects have pre-commit hooks that reject it.

Oops. Tail end of this head cold has me more mixed up than I thought. — Chris
– Chris, Commented 11 hours ago
@Chris, I initially misinterpreted your remark and it turned out funnier than what you said. I thought you were going to make a joke about some pointer assignment confusing head and tail of a linked list; thank goodness we're not navigating through a cyclic graph here. — J_H
– J_H, Commented 9 hours ago
My last three days has felt like a cyclic graph involving fevers and chills. — Chris
– Chris, Commented 9 hours ago

Jerry Coffin · Accepted Answer · 2026-05-21 20:28:46Z

3

A few points it doesn't look like anybody's mentioned yet.

I'd start by defining something like:

void precondition(bool cond, char const *msg) { 
    if (!cond)
        throw std::invalid_argument(msg);
}

Then I'd replace code like:

if(A.rows != A.cols) throw std::invalid_argument("Input image should be square and grayscale!");
if(A.rows % block_size != 0) throw std::invalid_argument("Block size should be a factor of the image size");

...with:

precondition(A.rows == A.cols, "Input image must be square and grayscale!");
precondition(A.rows % block_size == 0, "Image size must be a multiple of block size");

Although the multiple occurrences of n / block_size were mentioned, I don't see any mention of the fact that at least in some cases, we have a sequence like:

int d = n / block_size;
int num_blocks = (n / block_size) * (n / block_size);

First, I'd try to come up with a somewhat more meaningful name than d, then I'd use it in defining num_blocks:

int blocks_per_row = n / block_size;
int num_blocks = blocks_per_row * blocks_per_row;

answered 13 hours ago

Jerry Coffin

34.7k4 gold badges78 silver badges146 bronze badges

\$\begingroup\$ Hi, thanks for your answer. I will get to the rest tomorrow, but would you recommend trying C++ contracts for the first point? I've only heard of them, not very familiar \$\endgroup\$

insipidintegrator
– insipidintegrator

2026-05-21 20:30:17 +00:00
Commented 13 hours ago
1

\$\begingroup\$ @insipidintegrator: I wouldn't use it yet unless this code is purely for your own use. Contracts are new with C++26. I believe both clang and gcc have at least partial implementations, but most people don't have it yet (about the only ones likely to have it are those who build the compiler from source on their own). \$\endgroup\$

Jerry Coffin
– Jerry Coffin

2026-05-21 20:36:52 +00:00
Commented 13 hours ago
\$\begingroup\$ The most recent GCC has full (not partial) contracts support. Donâ€™t know about Clang, but it is probably not far behind (it has supported contracts in forks/behind flags forever). Donâ€™t know anything about MSVC. If your compiler supports contracts, and your project doesnâ€™t have legacy support requirements, there is really no good reason not to use the built-in language contracts, rather than rolling your own. \$\endgroup\$

indi
– indi

2026-05-22 09:40:15 +00:00
Commented 20 mins ago

Add a comment |

Stack Exchange Network

Karhunen-Loeve (KL) Transform using C++ and OpenCV

kl_transform.hpp

kl_transform.cpp

4 Answers 4

Layout

DRY

Comments

Naming

You must log in to answer this question.

Hot Network Questions

kl_transform.hpp

kl_transform.cpp

4 Answers 4

Layout

Comments

Naming

You must log in to answer this question.

Related