[This documentation is for preview only, and is subject to change in later releases. Blank topics are included as placeholders.]
When a For loop has a small body, it might perform more slowly than the equivalent sequential loop. Slower performance is caused by the overhead involved in partitioning the data and the cost of invoking a delegate on each loop iteration. The following examples show two ways that you can speed up execution of loops with small bodies.
In the following example, you provide a sequential loop for the delegate body, so that the delegate is invoked only once per partition, instead of once per iteration.
Class ForRangeDemo
Public Shared Function ForRange(ByVal fromInclusive As Integer, ByVal toExclusive As Integer, ByVal body As Action(Of Integer, Integer)) As ParallelLoopResult
' ... argument checking omitted
Dim range = toExclusive - fromInclusive
' A good default is to partition based on processor count.
Dim numberOfPartitions = System.Environment.ProcessorCount
Dim stride = range / numberOfPartitions
If (range = 0) Then
numberOfPartitions = 0
End If
ForRange = Parallel.For(0, numberOfPartitions, Sub(i)
Dim start = i * stride
Dim last As Integer
If i = (numberOfPartitions - 1) Then
last = toExclusive
Else
last = start + stride
End If
body(start, last)
End Sub)
End Function
Shared Sub RangeMain()
Dim arr(1000000) As Integer
ForRange(0, arr.Length, Sub(start, last)
For x As Integer = start To last
' Small body.
arr(x) = x * x
Next
End Sub)
' Prove we correctly partitioned the source.
Dim errorCount = 0
For j As Integer = 0 To arr.Length
If (arr(j) <> j * j) Then
Console.WriteLine("error at element(j)")
errorCount = errorCount + 1
End If
Next
Console.WriteLine("Total errors: 0", errorCount)
Console.WriteLine("\nPress any key.")
Console.ReadKey()
End Sub
End Class
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
class ForRangeDemo
{
public static ParallelLoopResult ForRange(int fromInclusive, int toExclusive, Action<int, int> body)
{
// ... argument checking omitted
int range = toExclusive - fromInclusive;
// A good default is to partition based on processor count.
int numberOfPartitions = System.Environment.ProcessorCount;
int stride = range / numberOfPartitions;
if (range == 0) numberOfPartitions = 0;
return Parallel.For(0, numberOfPartitions, i =>
{
int start = i * stride;
int end = (i == numberOfPartitions - 1) ? toExclusive : start + stride;
body(start, end);
});
}
static void Main()
{
int[] arr = new int[1000000];
ForRange(0, arr.Length, (start, end) =>
{
for (int x = start; x < end; x++)
{
// Small body.
arr[x] = x * x;
}
});
// Prove we correctly partitioned the source.
int errorCount = 0;
for (int j = 0; j < arr.Length; j++)
{
if (arr[j] != j * j)
{
Console.WriteLine("error at elelemnt[j]");
errorCount++;
}
}
Console.WriteLine("Total errors: {0}\nPress any key.", errorCount);
Console.ReadKey();
}
}
In the following example, the CreateRanges method is implemented as an iterator, but it approaches the problem in a similar way as ForRange in the previous example. It performs a sequential loop for each partition inside the delegate.
Class CreateRangeDemo
Private Shared Function CreateRanges(ByVal fromInclusive As Integer, ByVal toExclusive As Integer) As IEnumerable(Of Tuple(Of Integer, Integer))
' argument checking omitted
Dim rangeSize As Integer = (toExclusive - fromInclusive) / System.Environment.ProcessorCount
Dim i As Integer = fromInclusive
While i < toExclusive
Dim from As Integer = i, [to] As Integer = Math.Min(i + rangeSize, toExclusive)
i += rangeSize
End While
Return (CreateRanges(i, rangeSize))
End Function
Private Shared Sub CreateRangeDemoMain()
Dim arr2 As Integer() = New Integer(999999) {}
Parallel.ForEach(CreateRanges(0, arr2.Length), Function(range)
For i As Integer = range.Item1 To range.Item2 - 1
' Small body.
arr2(i) = i * i
Next
Return arr2
End Function)
' Prove we correctly partitioned the source.
Dim errorCount As Integer = 0
For j As Integer = 0 To arr2.Length - 1
If arr2(j) <> j * j Then
Console.WriteLine("error at elelemnt[j]")
errorCount += 1
End If
Next
Console.WriteLine("Total errors: {0}" & vbLf & "Press any key.", errorCount)
Console.ReadKey()
End Sub
End Class
class CreateRangeDemo
{
private static IEnumerable<Tuple<int, int>> CreateRanges(int fromInclusive, int toExclusive)
{
// argument checking omitted
int rangeSize = (toExclusive - fromInclusive) / System.Environment.ProcessorCount;
for (int i = fromInclusive; i < toExclusive; i += rangeSize)
{
int from = i, to = Math.Min(i + rangeSize, toExclusive);
yield return Tuple.Create(from, to);
}
}
static void Main()
{
int[] arr2 = new int[1000000];
Parallel.ForEach(CreateRanges(0, arr2.Length), range =>
{
for (int i = range.Item1; i < range.Item2; i++)
{
// Small body.
arr2[i] = i * i;
}
});
// Prove we correctly partitioned the source.
int errorCount = 0;
for (int j = 0; j < arr2.Length; j++)
{
if (arr2[j] != j * j)
{
Console.WriteLine("error at elelemnt[j]");
errorCount++;
}
}
Console.WriteLine("Total errors: {0}\nPress any key.", errorCount);
Console.ReadKey();
}
}
These techniques work by partially bypassing the load-balancing logic inside the For method and are only useful when the loop performs a minimal amount of work. As the work becomes more computationally expensive, you will get better performance by using parallel For .
Reference
Concepts