164 lines
5.3 KiB
C++
164 lines
5.3 KiB
C++
#include <iostream>
|
|
#include <vector>
|
|
#include <cstdlib>
|
|
#include <ctime>
|
|
#include <chrono>
|
|
#include <omp.h>
|
|
|
|
// =====================
|
|
// Matrizen erzeugen
|
|
// =====================
|
|
void generateRandomMatrices(int n,
|
|
std::vector<std::vector<double>>& A,
|
|
std::vector<std::vector<double>>& B,
|
|
double min_val = 0.0,
|
|
double max_val = 10.0)
|
|
{
|
|
std::srand(static_cast<unsigned int>(std::time(nullptr)));
|
|
A.resize(n, std::vector<double>(n));
|
|
B.resize(n, std::vector<double>(n));
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < n; ++j)
|
|
{
|
|
double r1 = static_cast<double>(std::rand()) / RAND_MAX;
|
|
double r2 = static_cast<double>(std::rand()) / RAND_MAX;
|
|
A[i][j] = min_val + r1 * (max_val - min_val);
|
|
B[i][j] = min_val + r2 * (max_val - min_val);
|
|
}
|
|
}
|
|
|
|
// =====================
|
|
// Serielle Multiplikation
|
|
// =====================
|
|
std::vector<std::vector<double>> matmul_serial(const std::vector<std::vector<double>>& A,
|
|
const std::vector<std::vector<double>>& B)
|
|
{
|
|
int n = A.size();
|
|
int m = B[0].size();
|
|
int p = B.size();
|
|
|
|
std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < m; ++j)
|
|
for (int k = 0; k < p; ++k)
|
|
C[i][j] += A[i][k] * B[k][j];
|
|
|
|
return C;
|
|
}
|
|
|
|
// =====================
|
|
// Parallele Zeilenzerlegung
|
|
// =====================
|
|
std::vector<std::vector<double>> matmul_row_parallel(const std::vector<std::vector<double>>& A,
|
|
const std::vector<std::vector<double>>& B,
|
|
int numThreads)
|
|
{
|
|
int n = A.size();
|
|
int m = B[0].size();
|
|
int p = B.size();
|
|
|
|
std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));
|
|
|
|
omp_set_num_threads(numThreads);
|
|
|
|
#pragma omp parallel for
|
|
for (int i = 0; i < n; ++i)
|
|
for (int j = 0; j < m; ++j)
|
|
for (int k = 0; k < p; ++k)
|
|
C[i][j] += A[i][k] * B[k][j];
|
|
|
|
return C;
|
|
}
|
|
|
|
// =====================
|
|
// Parallele Spaltenzerlegung
|
|
// =====================
|
|
std::vector<std::vector<double>> matmul_col_parallel(const std::vector<std::vector<double>>& A,
|
|
const std::vector<std::vector<double>>& B,
|
|
int numThreads)
|
|
{
|
|
int n = A.size();
|
|
int m = B[0].size();
|
|
int p = B.size();
|
|
|
|
std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));
|
|
|
|
omp_set_num_threads(numThreads);
|
|
|
|
#pragma omp parallel for
|
|
for (int j = 0; j < m; ++j)
|
|
for (int i = 0; i < n; ++i)
|
|
for (int k = 0; k < p; ++k)
|
|
C[i][j] += A[i][k] * B[k][j];
|
|
|
|
return C;
|
|
}
|
|
|
|
// =====================
|
|
// Parallele Blockzerlegung
|
|
// =====================
|
|
std::vector<std::vector<double>> matmul_block_parallel(const std::vector<std::vector<double>>& A,
|
|
const std::vector<std::vector<double>>& B,
|
|
int blockSize,
|
|
int numThreads)
|
|
{
|
|
int n = A.size();
|
|
int m = B[0].size();
|
|
int p = B.size();
|
|
|
|
std::vector<std::vector<double>> C(n, std::vector<double>(m, 0.0));
|
|
|
|
omp_set_num_threads(numThreads);
|
|
|
|
#pragma omp parallel for collapse(2)
|
|
for (int ii = 0; ii < n; ii += blockSize)
|
|
for (int jj = 0; jj < m; jj += blockSize)
|
|
for (int i = ii; i < std::min(ii + blockSize, n); ++i)
|
|
for (int j = jj; j < std::min(jj + blockSize, m); ++j)
|
|
for (int k = 0; k < p; ++k)
|
|
C[i][j] += A[i][k] * B[k][j];
|
|
|
|
return C;
|
|
}
|
|
|
|
// =====================
|
|
// Main
|
|
// =====================
|
|
int main()
|
|
{
|
|
int N = 500; // Matrixgröße anpassen
|
|
int numThreads = 4; // Threads für OpenMP
|
|
int blockSize = 64; // Blockgröße für Blockmultiplikation
|
|
|
|
std::vector<std::vector<double>> A, B;
|
|
generateRandomMatrices(N, A, B);
|
|
|
|
// ---------- Seriell ----------
|
|
auto start = std::chrono::steady_clock::now();
|
|
auto C_serial = matmul_serial(A, B);
|
|
auto end = std::chrono::steady_clock::now();
|
|
std::cout << "Seriell: " << std::chrono::duration<double>(end-start).count() << " s\n";
|
|
|
|
// ---------- Zeilenparallel ----------
|
|
start = std::chrono::steady_clock::now();
|
|
auto C_row = matmul_row_parallel(A, B, numThreads);
|
|
end = std::chrono::steady_clock::now();
|
|
std::cout << "Zeilenparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";
|
|
|
|
// ---------- Spaltenparallel ----------
|
|
start = std::chrono::steady_clock::now();
|
|
auto C_col = matmul_col_parallel(A, B, numThreads);
|
|
end = std::chrono::steady_clock::now();
|
|
std::cout << "Spaltenparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";
|
|
|
|
// ---------- Blockparallel ----------
|
|
start = std::chrono::steady_clock::now();
|
|
auto C_block = matmul_block_parallel(A, B, blockSize, numThreads);
|
|
end = std::chrono::steady_clock::now();
|
|
std::cout << "Blockparallel: " << std::chrono::duration<double>(end-start).count() << " s\n";
|
|
|
|
return 0;
|
|
}
|