Add new comment

Sorted map vs hashed map

In theory, hash map structure should be very fast on retrieving by key operations, close to O(1) like for an array. A sorted map (keys are sorted) should be O(log2 N) instead. The following test checks the difference.

Common scenario

We will use one tester class per type of tested structure. The keys are string[10] and are generated randomly in prepared array to avoid additional checks whether the key exists or not.

The test creates 1 million pairs <string[10], longint> then repeat N times (variable RunsCount) following tests including memory usage statistics (how to):

  • sequential access like array
  • random data access by randomly selected existing key

program HashVsSortedMap;
 
{$MODE OBJFPC}{$H+}
 
uses
  SysUtils,
  Classes,
  Math,
  Contnrs,
  FGL,
  DateUtils;
 
const
  MinTestValue = 1 - MaxInt;
  MaxTestValue = MaxInt;
 
type
  TKey = string[10]; // Max key "4294967296" is up to 10 characters
  TMySortedMap = specialize TFPGMap<TKey, longint>;
  TMyHashMap = TFPHashList;
 
  TTester = class(TObject)
  protected
    FTestItemsCount: integer;
    FUniqueKeysCount: integer;
    FKeys: array of TKey;
  public
    constructor Create(const ATestItemsCount, AUniqueKeysCount: integer); virtual;
    destructor Destroy; override;
    procedure CreateMap; virtual; abstract;
    procedure FreeMap; virtual; abstract;
    procedure Clear; virtual; abstract;
    procedure Fill; virtual; abstract;
    procedure Sort; virtual; abstract;
    function SequentialAccess: longint; virtual; abstract;
    function RandomAccess: longint; virtual; abstract;
    property TestItemsCount: integer read FTestItemsCount;
    property UniqueKeysCount: integer read FUniqueKeysCount;
  end;
 
  { TSortedMapTester }
 
  TSortedMapTester = class(TTester)
  private
    FMap: TMySortedMap;
  public
    procedure CreateMap; override;
    procedure FreeMap; override;
    procedure Clear; override;
    procedure Fill; override;
    procedure Sort; override;
    function SequentialAccess: longint; override;
    function RandomAccess: longint; override;
  end;
 
  { THashMapTester }
 
  THashMapTester = class(TTester)
  private
    FMap: TMyHashMap;
  public
    procedure CreateMap; override;
    procedure FreeMap; override;
    procedure Clear; override;
    procedure Fill; override;
    procedure Sort; override;
    function SequentialAccess: longint; override;
    function RandomAccess: longint; override;
  end;
 
  { TTester }
 
  constructor TTester.Create(const ATestItemsCount, AUniqueKeysCount: integer);
  var
    i: integer;
  begin
    inherited Create;
    FTestItemsCount := ATestItemsCount;
    FUniqueKeysCount := AUniqueKeysCount;
    SetLength(FKeys, FTestItemsCount);
    for i := 0 to FTestItemsCount - 1 do
      FKeys[i] := IntToStr(Math.RandomRange(1, FUniqueKeysCount));
  end;
 
  destructor TTester.Destroy;
  begin
    SetLength(FKeys, 0);
    inherited Destroy;
  end;
 
  { THashMapTester }
 
  procedure THashMapTester.CreateMap;
  begin
    FMap := TMyHashMap.Create;
  end;
 
  procedure THashMapTester.FreeMap;
  begin
    FMap.Free;
  end;
 
  procedure THashMapTester.Clear;
  var
    i: integer;
    p: ^longint;
  begin
    for i := 0 to FTestItemsCount - 1 do
    begin
      p := FMap.Items[i];
      Dispose(p);
    end;
    FMap.Clear;
  end;
 
  procedure THashMapTester.Fill;
  var
    i: integer;
    p: ^longint;
  begin
    for i := 1 to FTestItemsCount do
    begin
      New(p);
      p^ := Math.RandomRange(MinTestValue, MaxTestValue);
      FMap.Add(FKeys[i], p);
    end;
  end;
 
  procedure THashMapTester.Sort;
  begin
    // do nothing
  end;
 
  function THashMapTester.SequentialAccess: longint;
  var
    i: integer;
  begin
    Result := 0;
    for i := 0 to FMap.Count - 1 do
      Result := (Result + longint(FMap.Items[i]^)) div 2;
  end;
 
  function THashMapTester.RandomAccess: longint;
  var
    i: integer;
    Key: ShortString;
  begin
    Result := 0;
    for i := 1 to FTestItemsCount do
    begin
      Key := FKeys[Math.RandomRange(1, FTestItemsCount)];
      Result := (Result + longint(FMap.Find(Key)^)) div 2;
    end;
  end;
 
  { TSortedMapTester }
 
  procedure TSortedMapTester.CreateMap;
  begin
    FMap := TMySortedMap.Create;
  end;
 
  procedure TSortedMapTester.FreeMap;
  begin
    FMap.Free;
  end;
 
  procedure TSortedMapTester.Clear;
  begin
    FMap.Clear;
  end;
 
  procedure TSortedMapTester.Fill;
  var
    i: integer;
  begin
    for i := 1 to FTestItemsCount do
      FMap.Add(FKeys[i], Math.RandomRange(MinTestValue, MaxTestValue));
  end;
 
  procedure TSortedMapTester.Sort;
  begin
    FMap.Sorted := True;
  end;
 
  function TSortedMapTester.SequentialAccess: longint;
  var
    i: integer;
  begin
    Result := 0;
    for i := 0 to FMap.Count - 1 do
      Result := (Result + FMap.Data[i]) div 2;
  end;
 
  function TSortedMapTester.RandomAccess: longint;
  var
    i: integer;
    Key: TKey;
  begin
    Result := 0;
    for i := 1 to FTestItemsCount do
    begin
      Key := FKeys[Math.RandomRange(1, FTestItemsCount)];
      Result := (Result + FMap.KeyData[Key]) div 2;
    end;
  end;
 
  procedure RunTest(const Tester: TTester;
    const RunsCount: integer;
    const ShowDetails: boolean);
  var
    i, m1, m2, m3, m4, m5: integer;
    t1, t2: TDateTime;
    v, AvgSize, AvgSeqTime, AvgRndTime: longint;
    DiffMsec: int64;
  begin
    writeln('-------------------');
    writeln('Testing ', Tester.ClassName,
      ', length: ', Tester.TestItemsCount,
      ', unique keys: ', Tester.UniqueKeysCount,
      ', density: ', (Tester.UniqueKeysCount / Tester.TestItemsCount):4:2);
    writeln('-------------------');
    AvgSize := 0;
    AvgSeqTime := 0;
    AvgRndTime := 0;
    for i := 1 to RunsCount do
    begin
      writeln('Step ', i);
      m1 := GetHeapStatus.TotalAllocated;
      if ShowDetails then
        writeln('Total heap memory allocated, bytes: ', m1);
      Tester.CreateMap;
      m2 := GetHeapStatus.TotalAllocated;
      if ShowDetails then
        writeln('Empty. Size: ', m2 - m1);
 
      t1 := Now;
      Tester.Fill;
      t2 := Now;
      m3 := GetHeapStatus.TotalAllocated;
      if ShowDetails then
        writeln('Filled test data. Size: ', m3 - m1);
      AvgSize := AvgSize + m3 - m1;
      if ShowDetails then
        writeln('Elapsed time, msec: ', MilliSecondsBetween(t2, t1));
 
      t1 := Now;
      Tester.Sort;
      t2 := Now;
      m3 := GetHeapStatus.TotalAllocated;
      if ShowDetails then
      begin
        writeln('Sorted data. Size: ', m3 - m1);
        writeln('Sort elapsed time, msec: ', MilliSecondsBetween(t2, t1));
      end;
 
      t1 := Now;
      v := Tester.SequentialAccess;
      t2 := Now;
      DiffMsec := MilliSecondsBetween(t2, t1);
      if ShowDetails then
        writeln(Format(
          'Sequential access test. Elapsed, msec: %d. Average, seeks/msec: %d. Result: %d',
          [DiffMsec, Tester.TestItemsCount div DiffMsec, v]));
      AvgSeqTime := AvgSeqTime + DiffMsec;
 
      t1 := Now;
      v := Tester.RandomAccess;
      t2 := Now;
      DiffMsec := MilliSecondsBetween(t2, t1);
      if ShowDetails then
        writeln(Format(
          'Random access test. Elapsed, msec: %d. Average, seeks/msec: %d. Result: %d',
          [DiffMsec, Tester.TestItemsCount div DiffMsec, v]));
      AvgRndTime := AvgRndTime + DiffMsec;
 
      Tester.Clear;
      m4 := GetHeapStatus.TotalAllocated;
      if ShowDetails then
        writeln('Cleared. Size: ', m4 - m1);
      Tester.FreeMap;
    end;
    writeln('---');
    writeln('Avarage values. Step count: ', RunsCount);
    writeln('Memory used, bytes: ', AvgSize div RunsCount);
    writeln('Sequential access. Time elapsed, msec: ', AvgSeqTime div RunsCount);
    writeln('Sequential access. Seeks per msec: ', Tester.TestItemsCount / (AvgSeqTime / RunsCount):10:2);
    writeln('Random access. Time elapsed, msec: ', AvgRndTime div RunsCount);
    writeln('Random access. Seeks per msec: ', (Tester.TestItemsCount / (AvgRndTime / RunsCount)):10:2);
    Tester.Free;
    m5 := GetHeapStatus.TotalAllocated;
    if ShowDetails then
      writeln('Total heap memory (after disposing): ', m5);
  end;
 
var
  i, TestItemsCount, UniqueKeysCount, RunsCount, StepsCount: integer;
begin
  TestItemsCount := 1000000; // Total items count in map
  RunsCount := 10; // Number of same test runs to get average values
  StepsCount := 1; // Number of repeating with different unique keys count
  for i := 1 to StepsCount do
  begin
    UniqueKeysCount := Round(TestItemsCount * (1.0 / (i * i)));
    RunTest(TSortedMapTester.Create(TestItemsCount, UniqueKeysCount), RunsCount, false);
    RunTest(THashMapTester.Create(TestItemsCount, UniqueKeysCount), RunsCount, false);
  end;
end.

Test 1 description

Set up following values to simulate normal usage where all keys are unique (randomly generated keys are not guaranteed 100% unique values but close to it).

  RunsCount := 10;
  StepsCount := 1;

Results 1

In spite of 3.5 times more memory used, TFPHashList is about 20% faster than TFPGMap (sorted) in sequential access and is about 50% faster in random access.

-------------------
Testing TSortedMapTester, length: 1000000, unique keys: 1000000, density: 1.00
-------------------
Avarage values. Step count: 10
Memory used, bytes: 18305440
Sequential access. Time elapsed, msec: 7
Sequential access. Seeks per msec:  140845.07
Random access. Time elapsed, msec: 831
Random access. Seeks per msec:    1202.21
-------------------
Testing THashMapTester, length: 1000000, unique keys: 1000000, density: 1.00
-------------------
Avarage values. Step count: 10
Memory used, bytes: 69613088
Sequential access. Time elapsed, msec: 4
Sequential access. Seeks per msec:  204081.63
Random access. Time elapsed, msec: 456
Random access. Seeks per msec:    2191.54

Test 2

Now repeating the same test with different number of unique items. I set up following values

  RunsCount := 5;
  StepsCount := 5;

Results 2

Unique key density
Structure type
Memory used, bytes
Sequential access
Random access
Time elapsed, msec Seeks per msec Time elapsed, msec Seeks per msec
1,00 TSortedMapTester 18305440 7 135135 835 1197
1,00 THashMapTester 69599168 5 192307 454 2198

Difference 3,8 0,7 1,4 0,5 1,8
0,25 TSortedMapTester 18305440 7 142857 759 1318
0,25 THashMapTester 67889504 4 208333 374 2670

Difference 3,7 0,6 1,5 0,5 2,0
0,11 TSortedMapTester 18305440 7 142857 698 1431
0,11 THashMapTester 67878720 5 200000 333 2995

Difference 3,7 0,7 1,4 0,5 2,1
0,06 TSortedMapTester 18305440 7 138888 665 1503
0,06 THashMapTester 67832800 4 217391 255 3909

Difference 3,7 0,6 1,6 0,4 2,6
0,04 TSortedMapTester 18305440 7 142857 594 1681
0,04 THashMapTester 67833920 4 227272 223 4480

Difference 3,7 0,6 1,6 0,4 2,7

Testing platform

  • Quad core Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz (only one core is used)
  • Linux Ubuntu 12.04 64 bits
  • FPC 2.6.4 with Lazarus 1.4.2
  • Code optimisation is "o1"

See also "Bencmark: array vs TList vs dynamic array".