.NET 4 并行计算 原理探索
记得去年的一个项目中,自己实现了一套挖掘算法。由于计算量非常的大,运行一次完整的过程差不多需要一个工作日。为了减少运行时间,主要从算法及实现上来提高它的性能,包括算法优化、缓存中间结果、化简计算公式等,但效果最明显的却是采用了多线程计算。现在.NET 4推出了并行计算,真是雪中送炭啊。赶紧一睹为快!
但究竟什么是并行计算?单个CPU无所谓并行。只有在多个CPU的情况下,操作系统将任务均匀分配给各个CPU,使多个CPU能同时进行协助工作。说到这里,我不禁产生一个疑惑:多线程与并行计算到底有啥区别呢?在实现挖掘算法时,我将数据进行分割,使每个线程计算一个数据分块。这样,速度的提高基本跟cpu的个数成正比。比如2个CPU,我开启两个线程,速度基本提高一倍,但开启3个或者4个提高不是很明显(理论上应该提高不了,但的确有点提高,我想原因是线程多,被分配到CPU的机会会大点,所以有稍微提高)。
现在.NET 4 的并行计算会不会是同样的原理实现的?即换汤不换药。为此做了以下实验验证(贴代码):
代码
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
using System.Threading;
namespace TPLDemo
{
class Program
{
#region Sequential loop
static void MultiplyMatricesSequential(double[,] matA, double[,] matB,
double[,] result)
{
int matACols = matA.GetLength(1);
int matBCols = matB.GetLength(1);
int matARows = matA.GetLength(0);
for (int i = 0; i < matARows; i++)
{
for (int j = 0; j < matBCols; j++)
{
for (int k = 0; k < matACols; k++)
{
result[i, j] += matA[i,k] * matB[k,j];
}
}
}
}
#endregion
#region Parallel_Loop
static void MultiplyMatricesParallel(double[,] matA, double[,] matB, double[,] result)
{
int matACols = matA.GetLength(1);
int matBCols = matB.GetLength(1);
int matARows = matA.GetLength(0);
Parallel.For(0, matARows,i =>
{
for (int j = 0; j < matBCols; j++)
{
for (int k = 0; k < matACols; k++)
{
result[i, j] += matA[i, k] * matB[k, j];
}
}
});
}
#endregion
#region ThreadPool_Loop
static void MultiplyMatricesThreadPool(double[,] matA, double[,] matB, double[,] result)
{
int matACols = matA.GetLength(1);
int matBCols = matB.GetLength(1);
int matARows = matA.GetLength(0);
WaitCallback waitCallback = new WaitCallback(PartMultiplyMatrices);
//int halfPos = matARows / 2;
//DataParams dataParams1 = new DataParams(matA,matB,result,0,halfPos);
//DataParams dataParams2 = new DataParams(matA, matB, result, halfPos, matARows);
int pos = matARows / 4;
DataParams dataParams1 = new DataParams(matA, matB, result, 0, pos);
DataParams dataParams2 = new DataParams(matA, matB, result, pos, pos*2);
DataParams dataParams3 = new DataParams(matA, matB, result, pos * 2, pos * 3);
DataParams dataParams4 = new DataParams(matA, matB, result, pos * 3, matARows);
ThreadPool.QueueUserWorkItem(waitCallback, dataParams1);
ThreadPool.QueueUserWorkItem(waitCallback, dataParams2);
ThreadPool.QueueUserWorkItem(waitCallback, dataParams3);
ThreadPool.QueueUserWorkItem(waitCallback, dataParams4);
}
class DataParams
{
public double[,] matA;
public double[,] matB;
public double[,] result;
public int start;
public int end;
public DataParams(double[,] matA, double[,] matB, double[,] result,
int start, int end)
{
this.matA = matA;
this.matB = matB;
this.result = result;
this.start = start;
this.end = end;
}
}
static void PartMultiplyMatrices(object state)
{
DataParams dataParams = state as DataParams;
int matACols = dataParams.matA.GetLength(1);
int matBCols = dataParams.matB.GetLength(1);
int matARows = dataParams.matA.GetLength(0);
for (int i = dataParams.start; i < dataParams.end; i++)
{
for (int j = 0; j < matBCols; j++)
{
for (int k = 0; k < matACols; k++)
{
dataParams.result[i, j] += dataParams.matA[i, k] * dataParams.matB[k, j];
}
}
}
}
#endregion
#region Main
static void Main(string[] args)
{
int colCount = 500;
int rowCount = 5000;
int colCount2 = 500;
double[,] m1 = InitializeMatrix(rowCount, colCount);
double[,] m2 = InitializeMatrix(colCount, colCount2);
double[,] result = new double[rowCount, colCount2];
while (true)
{
// First do the sequential version.
Console.WriteLine("Executing sequential loop...");
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
MultiplyMatricesSequential(m1, m2, result);
stopwatch.Stop();
Console.WriteLine("Sequential loop time in milliseconds: {0}", stopwatch.ElapsedMilliseconds);
// For the skeptics.
OfferToPrint(rowCount, colCount2, result);
// Reset timer and results matrix.
stopwatch.Reset();
result = new double[rowCount, colCount2];
// Do the parallel loop.
Console.WriteLine("Executing parallel loop...");
stopwatch.Start();
MultiplyMatricesParallel(m1, m2, result);
stopwatch.Stop();
Console.WriteLine("Parallel loop time in milliseconds: {0}", stopwatch.ElapsedMilliseconds);
// For the skeptics.
OfferToPrint(rowCount, colCount2, result);
// Do the ThreadPool loop.
Console.WriteLine("Executing threadpool loop...");
stopwatch.Start();
MultiplyMatricesThreadPool(m1, m2, result);
stopwatch.Stop();
Console.WriteLine("ThreadPool loop time in milliseconds: {0}", stopwatch.ElapsedMilliseconds);
// For the skeptics.
OfferToPrint(rowCount, colCount2, result);
Console.WriteLine("Next computation? y/n");
char c = Console.ReadKey().KeyChar;
if (c == 'n' || c == 'N')
{
break;
}
}
// Keep the console window open in debug mode.
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
}
#endregion
#region Helper_Methods
static double[,] InitializeMatrix(int rows, int cols)
{
double[,] matrix = new double[rows, cols];
Random r = new Random();
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
matrix[i, j] = r.Next(100);
}
}
return matrix;
}
private static void OfferToPrint(int rowCount, int colCount, double[,] matrix)
{
Console.WriteLine("Computation complete. Print results? y/n");
char c = 'n';//Console.ReadKey().KeyChar;
if (c == 'y' || c == 'Y')
{
Console.WindowWidth = 80;
Console.WriteLine();
for (int x = 0; x < rowCount; x++)
{
Console.WriteLine("ROW {0}: ", x);
for (int y = 0; y < colCount; y++)
{
Console.Write("{0:#.##} ", matrix[x, y]);
}
Console.WriteLine();
}
}
}
#endregion
}
}
Executing sequential loop...
Sequential loop time in milliseconds: 33592
Computation complete. Print results? y/n
Executing parallel loop...
Parallel loop time in milliseconds: 17760
Computation complete. Print results? y/n
Executing threadpool loop...
ThreadPool loop time in milliseconds: 17761
Computation complete. Print results? y/n
Next computation? y/n
Executing sequential loop...
Sequential loop time in milliseconds: 33285
Computation complete. Print results? y/n
Executing parallel loop...
Parallel loop time in milliseconds: 17252
Computation complete. Print results? y/n
Executing threadpool loop...
ThreadPool loop time in milliseconds: 17252
Computation complete. Print results? y/n
Next computation? y/n
这段代码来源于微软http://msdn.microsoft.com/en-us/library/dd460713(v=VS.100).aspx
我加了一段我的多线程计算,统计结果显示,顺行执行效率最慢,而采用.NET 4并行结果库和多线程运算基本上相等,是顺序执行的两倍。
哈哈,所以还是换汤不换药!从此可推测:它是对多线程的包装啊!

浙公网安备 33010602011771号