prog3b_652/parallel2/main.cpp

#include <iostream>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <chrono>
#include <cmath>
#include <omp.h>


// Matrizen erzeugen

void generateRandomMatrices(int n,
                            std::vector<std::vector<double>>& A,
                            std::vector<std::vector<double>>& B,
                            double min_val = 0.0,
                            double max_val = 10.0)
{
    std::srand(static_cast<unsigned int>(std::time(nullptr)));
    A.resize(n, std::vector<double>(n));
    B.resize(n, std::vector<double>(n));

    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
        {
            double r1 = static_cast<double>(std::rand()) / RAND_MAX;
            double r2 = static_cast<double>(std::rand()) / RAND_MAX;
            A[i][j] = min_val + r1 * (max_val - min_val);
            B[i][j] = min_val + r2 * (max_val - min_val);
        }
}


// Seriell

std::vector<std::vector<double>> matmul_serial(const std::vector<std::vector<double>>& A,
                                               const std::vector<std::vector<double>>& B)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    for (int i = 0; i < n; ++i)
        for (int j = 0; j < m; ++j)
            for (int k = 0; k < p; ++k)
                C[i][j] += A[i][k] * B[k][j];

    return C;
}


// Parallele Zeilenzerlegung

std::vector<std::vector<double>> matmul_parallel_rows(const std::vector<std::vector<double>>& A,
                                                      const std::vector<std::vector<double>>& B,
                                                      int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    #pragma omp parallel for num_threads(numThreads)
    for (int i = 0; i < n; ++i)
    {
        std::vector<double> row(m, 0.0);
        for (int j = 0; j < m; ++j)
        {
            double sum = 0.0;
            for (int k = 0; k < p; ++k)
                sum += A[i][k] * B[k][j]; //verhindert überschreibuung
            row[j] = sum;
        }
        C[i] = row;
    }

    return C;
}


// Parallele Spaltenzerlegung

std::vector<std::vector<double>> matmul_parallel_cols(const std::vector<std::vector<double>>& A,
                                                      const std::vector<std::vector<double>>& B,
                                                      int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    #pragma omp parallel for num_threads(numThreads)
    for (int j = 0; j < m; ++j)
    {
        for (int i = 0; i < n; ++i)
        {
            double sum = 0.0;
            for (int k = 0; k < p; ++k)
                sum += A[i][k] * B[k][j]; //Zwischengepeichert
            C[i][j] = sum;
        }
    }

    return C;
}


// Blockzerlegung

std::vector<std::vector<double>> matmul_block(const std::vector<std::vector<double>>& A,
                                              const std::vector<std::vector<double>>& B,
                                              int blockSize,
                                              int numThreads)
{
    int n = A.size();
    int m = B[0].size();
    int p = B.size();

    std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));

    #pragma omp parallel for collapse(2) num_threads(numThreads)
    for (int ii = 0; ii < n; ii += blockSize)
        for (int jj = 0; jj < m; jj += blockSize)
        {
            for (int i = ii; i < std::min(ii + blockSize, n); ++i)
                for (int j = jj; j < std::min(jj + blockSize, m); ++j)
                {
                    double sum = 0.0;
                    for (int k = 0; k < p; ++k)
                        sum += A[i][k] * B[k][j];
                    C[i][j] = sum;
                }
        }

    return C;
}


// Matrix ausgeben

void printMatrix(const std::vector<std::vector<double>>& M, const std::string& name)
{
    std::cout << name << " (" << M.size() << "x" << (M.empty() ? 0 : M[0].size()) << "):\n";
    for (const auto& row : M)
    {
        for (double val : row)
            std::cout << val << "\t";
        std::cout << "\n";
    }
    std::cout << "\n";
}


//Matrix Vergleich (Komponentenweise)
bool compareMatrices(const std::vector<std::vector<double>>& A,
                     const std::vector<std::vector<double>>& B)
{
    if (A.size() != B.size() || A[0].size() != B[0].size())
        return false;

    for (size_t i = 0; i < A.size(); ++i)
        for (size_t j = 0; j < A[0].size(); ++j)
            if (std::abs(A[i][j] - B[i][j]) > 1e-9)
                return false;
    return true;
}


int main()
{
    int N = 4; // für Ausgabe kleiner Matrizen <5
    int numThreads = omp_get_max_threads();
    if (numThreads == 0) numThreads = 4;

    std::vector<std::vector<double>> A, B;
    generateRandomMatrices(N, A, B);

    if (N < 5)
    {
        printMatrix(A, "Matrix A");
        printMatrix(B, "Matrix B");
    }

    // Serielle Berechnung
    auto start = std::chrono::steady_clock::now();
    auto C_serial = matmul_serial(A, B);
    auto end = std::chrono::steady_clock::now();
    double timeSerial = std::chrono::duration<double>(end - start).count();
    std::cout << "Seriell: " << timeSerial << " s\n";

    if (N < 5)
        printMatrix(C_serial, "C_serial");

    // Zeilenparallel
    start = std::chrono::steady_clock::now();
    auto C_rows = matmul_parallel_rows(A, B, numThreads);
    end = std::chrono::steady_clock::now();
    double timeRows = std::chrono::duration<double>(end - start).count();
    std::cout << "Parallel Zeilen: " << timeRows << " s | "
              << (compareMatrices(C_serial, C_rows) ? "Match" : "Mismatch") << "\n";

    if (N < 5)
        printMatrix(C_rows, "C_rows");

    // Spaltenparallel
    start = std::chrono::steady_clock::now();
    auto C_cols = matmul_parallel_cols(A, B, numThreads);
    end = std::chrono::steady_clock::now();
    double timeCols = std::chrono::duration<double>(end - start).count();
    std::cout << "Parallel Spalten: " << timeCols << " s | "
              << (compareMatrices(C_serial, C_cols) ? "Match" : "Mismatch") << "\n";

    if (N < 5)
        printMatrix(C_cols, "C_cols");

    // Blockparallel
    int blockSize = 2; // für kleine N passend
    start = std::chrono::steady_clock::now();
    auto C_block = matmul_block(A, B, blockSize, numThreads);
    end = std::chrono::steady_clock::now();
    double timeBlock = std::chrono::duration<double>(end - start).count();
    std::cout << "Parallel Block (" << blockSize << "x" << blockSize << "): "
              << timeBlock << " s | "
              << (compareMatrices(C_serial, C_block) ? "Match" : "Mismatch") << "\n";

    if (N < 5)
        printMatrix(C_block, "C_block");

    return 0;
}