prog3b_652/parallel2/main.cpp

#include <iostream>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <chrono>
#include <omp.h>

// =====================
// Matrizen erzeugen
// =====================
void generateRandomMatrices(int n,
                            std::vector<std::vector<double>>& A,
                            std::vector<std::vector<double>>& B,
                            double min_val = 0.0,
                            double max_val = 10.0)
{
    std::srand(static_cast<unsigned int>(std::time(nullptr)));
    A.resize(n, std::vector<double>(n));
    B.resize(n, std::vector<double>(n));

    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
        {
            double r1 = static_cast<double>(std::rand()) / RAND_MAX;
            double r2 = static_cast<double>(std::rand()) / RAND_MAX;
            A[i][j] = min_val + r1 * (max_val - min_val);
            B[i][j] = min_val + r2 * (max_val - min_val);
        }
}

// =====================
// Serielle Multiplikation
// =====================
std::vector<std::vector<double>> matmul_serial(const std::vector<std::vector<double>>& A,
                                               const std::vector<std::vector<double>>& B)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    for (int i = 0; i < n; ++i)
        for (int j = 0; j < m; ++j)
            for (int k = 0; k < p; ++k)
                C[i][j] += A[i][k] * B[k][j];

    return C;
}

// =====================
// Parallele Zeilenzerlegung
// =====================
std::vector<std::vector<double>> matmul_row_parallel(const std::vector<std::vector<double>>& A,
                                                     const std::vector<std::vector<double>>& B,
                                                     int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    omp_set_num_threads(numThreads);

    #pragma omp parallel for
    for (int i = 0; i < n; ++i)
        for (int j = 0; j < m; ++j)
            for (int k = 0; k < p; ++k)
                C[i][j] += A[i][k] * B[k][j];

    return C;
}

// =====================
// Parallele Spaltenzerlegung
// =====================
std::vector<std::vector<double>> matmul_col_parallel(const std::vector<std::vector<double>>& A,
                                                     const std::vector<std::vector<double>>& B,
                                                     int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    omp_set_num_threads(numThreads);

    #pragma omp parallel for
    for (int j = 0; j < m; ++j)
        for (int i = 0; i < n; ++i)
            for (int k = 0; k < p; ++k)
                C[i][j] += A[i][k] * B[k][j];

    return C;
}

// =====================
// Parallele Blockzerlegung
// =====================
std::vector<std::vector<double>> matmul_block_parallel(const std::vector<std::vector<double>>& A,
                                                       const std::vector<std::vector<double>>& B,
                                                       int blockSize,
                                                       int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    omp_set_num_threads(numThreads);

    #pragma omp parallel for collapse(2)
    for (int ii = 0; ii < n; ii += blockSize)
        for (int jj = 0; jj < m; jj += blockSize)
            for (int i = ii; i < std::min(ii + blockSize, n); ++i)
                for (int j = jj; j < std::min(jj + blockSize, m); ++j)
                    for (int k = 0; k < p; ++k)
                        C[i][j] += A[i][k] * B[k][j];

    return C;
}

// =====================
// Main
// =====================
int main()
{
    int N = 500; // Matrixgröße anpassen
    int numThreads = 4; // Threads für OpenMP
    int blockSize = 64; // Blockgröße für Blockmultiplikation

    std::vector<std::vector<double>> A, B;
    generateRandomMatrices(N, A, B);

    // ---------- Seriell ----------
    auto start = std::chrono::steady_clock::now();
    auto C_serial = matmul_serial(A, B);
    auto end = std::chrono::steady_clock::now();
    std::cout << "Seriell: " << std::chrono::duration<double>(end-start).count() << " s\n";

    // ---------- Zeilenparallel ----------
    start = std::chrono::steady_clock::now();
    auto C_row = matmul_row_parallel(A, B, numThreads);
    end = std::chrono::steady_clock::now();
    std::cout << "Zeilenparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";

    // ---------- Spaltenparallel ----------
    start = std::chrono::steady_clock::now();
    auto C_col = matmul_col_parallel(A, B, numThreads);
    end = std::chrono::steady_clock::now();
    std::cout << "Spaltenparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";

    // ---------- Blockparallel ----------
    start = std::chrono::steady_clock::now();
    auto C_block = matmul_block_parallel(A, B, blockSize, numThreads);
    end = std::chrono::steady_clock::now();
    std::cout << "Blockparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";

    return 0;
}