Happy tickets benchmark

Some countries have integer numbers printed on the transport tickets, for example, in ex-USSR. The ticket is "happy" when the sum of the first half of digits equals to the last half one. Then you should eat it and make a wish.

Example:

123456 and 111222 are not happy tickets
123123 and 123222 are the happy tickets

The brute-force test should iterate through all number combinations and calculate the count of happy tickets.

No human-like optimization is required, it is only compiler test. The test compares compiler generated code quality, not the PC performance!

Assume the count of the digits in the number is 8. Then the count of happy tickets is 4816030 including "00000000".

Programs

Assembler

This non-optimized simple assembler code can be a reference for other programs as the expected minimum of the generated code quality. In other words, the "minimally poor code".

; Asm : nasm -f elf64 -l happytickets.lst  happytickets.asm
; Link: ld -s -o helloworld helloworld.o
 
section .data
  ticket_count	    dq 0
  start_time        dq 0
  stop_time         dq 0
  clock_per_msec    dq 0
  msec              dq 0
  msec_in_sec       dq 1000
  msg_result        db '. Elapsed time, msec: '
  msglen_result     equ $-msg_result
  buf               db '123546890'
  buflen            equ $-buf
  LF                db 0xA
  LFlen             equ $-LF
  timeval:
    tv_sec  dd 0
    tv_usec dd 0  
 
section .text
  global _start
 
_start:
  call get_clock_per_millisecond
  call get_start_time
 
  ; begin calculating
  mov rbx, 0 ; store count in register
  mov r8, 10
  loop_1:  
    mov r9, 10
    loop_2:  
      mov r10, 10
      loop_3:  
        mov r11, 10
        loop_4:  
          mov r12, 10
          loop_5:  
            mov r13, 10
            loop_6:  
              mov r14, 10
              loop_7:  
                mov r15, 10
                loop_8:
                  mov rax, r8
                  add rax, r9
                  add rax, r10
                  add rax, r11
                  sub rax, aboutr12
                  sub rax, r13
                  sub rax, r14
                  sub rax, r15
                  jnz not_happy
                    inc rbx
                  not_happy:  
                    dec r15
                jnz loop_8
                dec r14
              jnz loop_7
              dec r13
            jnz loop_6
            dec r12
          jnz loop_5
          dec r11
        jnz loop_4
        dec r10
      jnz loop_3
      dec r9
    jnz loop_2
    dec r8
  jnz loop_1
 
  mov [ticket_count], rbx
  ; end of calculations
 
  call get_stop_time
 
  ; elapsed time
  mov rax, [stop_time]
  sub rax, [start_time]
  mov rdx, 0                 ; load upper half of dividend with zero
  div qword [clock_per_msec] ; divide rdx:rax by value
  mov [msec], rax
 
  ; print results
  mov rax, [ticket_count]
  call print_number
  mov rcx, msg_result
  mov rdx, msglen_result
  call print_str
  mov rax, [msec]
  call print_number
  call print_newline
 
  ; exiting program
  mov rax, 1  ; the system call for exit (sys_exit)
  mov rbx, 0  ; exit with return code of 0 (no error)
  int 0x80    ; system call
  ret	
 
;------------------------
; subroutines
 
get_timestamp:
  xor rax, rax
  cpuid
  rdtsc
  ret
 
get_start_time:  
  call get_timestamp
  mov [start_time], eax
  mov [start_time + 4], edx
  ret
 
get_stop_time:  
  call get_timestamp
  mov [stop_time], eax
  mov [stop_time + 4], edx
  ret
 
get_clock_per_millisecond:  
  call get_start_time
  ; sleep for 1 seconds and 0 nanoseconds
  mov dword [tv_sec], 1
  mov dword [tv_usec], 0
  mov eax, 162
  mov ebx, timeval
  mov ecx, 0
  int 0x80
  ;
  call get_stop_time
  mov rax, [stop_time]
  sub rax, [start_time]
  mov rdx, 0
  div qword [msec_in_sec]
  mov [clock_per_msec], rax
  ret
 
print_number:
  mov rbx, buf + buflen - 1
  mov rcx, buflen
  mov rdi, 10
  next_digit:
    mov	rdx, 0
    div	rdi
    add	rdx, 48 ; '0' ASCII code
    mov	[rbx], dl
    dec	rbx
  loop next_digit
 
  mov rcx, buf
  mov rdx, buflen
  call print_str
  ret
 
print_newline:  
  mov rcx, LF
  mov rdx, LFlen
  call print_str
  ret
 
print_str:
  mov rax, 4
  mov rbx, 1
  int 0x80
  ret

Pascal

The program will compile with FreePascal, Delphi and other.

program HappyTickets;
 
{$IFDEF FPC}
{$MODE DELPHI} // Required for FPC
{$ELSE}
{$APPTYPE CONSOLE} // required for Delphi
{$ENDIF}
 
uses
  SysUtils, DateUtils;
 
var
  n1, n2, n3, n4, n5, n6, n7, n8: {$IFDEF FPC}0..9{$ELSE}integer{$ENDIF};
  TicketsCount: longint; // Required for FPC that uses int16 instead of int32 for integer type in some cases (see notes)
  d1, d2: TDateTime;
begin
  TicketsCount := 0;
  d1 := Now;
  for n1 := 0 to 9 do
    for n2 := 0 to 9 do
      for n3 := 0 to 9 do
        for n4 := 0 to 9 do
          for n5 := 0 to 9 do
            for n6 := 0 to 9 do
              for n7 := 0 to 9 do
		for n8 := 0 to 9 do
                  if n1 + n2 + n3 + n4 = n5 + n6 + n7 + n8 then
                  {$IFDEF FPC}
		    TicketsCount := TicketsCount + 1; // Inc may be slower in FPC
		  {$ELSE}
		    Inc(TicketsCount);
		  {$ENDIF}
  d2 := Now;
  writeln('Found ', TicketsCount, ' tickets. Elapsed time, msec: ', DateUtils.MilliSecondsBetween(d1, d2));
end.

To compile from command line with FPC

fpc -O2 -Cr- HappyTickets.pas

To compile from command line with Delphi 7

dcc32 -$O+ -$R- -$Q- -$I- -$C- -$D- happytickets.pas

To compile from command line with Delphi DX 10 (other XE version should be compiled too)

dcc32 -CC -VT -NSSystem -$O+ -$R- -$Q- -$I- -$C- -$D- happytickets.pas
...
dcc64 -CC -VT -NSSystem -$O+ -$R- -$Q- -$I- -$C- -$D- happytickets.pas

Comments

In FPC 3.0.0 64bits/Linux :

  • changing the type of n1-n8 from 0..9 to longint increases the time by 10%
  • changing the type of n1-n8 from 0..9 to integer doubles the time!!

In Delphi Inc() is faster until you turn on Overflow checking. See example.

About longint type requirement. With the integer type the FPC optimizer seems to replace type with shortint (16-bits) and I see 31902 instead of 4816030 in my configuration.

In Delphi XE 10 changing the type of n1-n8 from 0..9 to integer decreases the time by 60%.

C

The program will compile with GNU C, MSVC and Borland C.

#include <stdio.h>
#include <time.h>
 
int main()
{
  unsigned char n1, n2, n3, n4, n5, n6, n7, n8;
  long int tickets_count = 0;
  clock_t t1, t2;  // forward declaration to prevent MS VC 14.0 error
  double msec;
  t1 = clock();
  for (n1 = 0; n1 < 10; n1++)
    for (n2 = 0; n2 < 10; n2++)
      for (n3 = 0; n3 < 10; n3++)
        for (n4 = 0; n4 < 10; n4++)
          for (n5 = 0; n5 < 10; n5++)
            for (n6 = 0; n6 < 10; n6++)
              for (n7 = 0; n7 < 10; n7++)
                for (n8 = 0; n8 < 10; n8++)
		  if (n1 + n2 + n3 + n4 == n5 + n6 + n7 + n8)
		    tickets_count++;
  t2 = clock();
  msec = (double)(t2 - t1) / ((double)CLOCKS_PER_SEC / 1000);
  printf("Found %ld tickets. Time elapsed: %.0f msec\n", tickets_count, msec);
  return 0;
}

To compile from command line with GNU C

gcc happytickets.c -O2 -o happytickets

To compile from command line with Microsoft VC++ (change paths if need)

set PATH=C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin;C:\Program Files (x86)\Microsoft Visual Studio 9.0\Common7\IDE;%PATH%
set LIB=C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\lib;C:\Program Files\Microsoft SDKs\Windows\v6.0A\Lib
set INCLUDE=C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include
cl.exe /O2 happytickets.c

To compile from command line with Borland C (change paths if need)

set PATH=C:\Borland\BCC55\Bin;%PATH%
bcc32.exe -O2 -I"C:\Borland\BCC55\Include" -L"C:\Borland\BCC55\Lib" happytickets.c

C#

The program will compile with Mono and Microsoft.NET.

using System;
 
namespace HappyTickets
{
  class Entry
  {
    public static void Main (string[] args)
    {
      byte n1, n2, n3, n4, n5, n6, n7, n8;
      int tickets_count = 0;
      DateTime d1 = DateTime.Now;
      for (n1 = 0; n1 < 10; n1++)
	for (n2 = 0; n2 < 10; n2++)
	  for (n3 = 0; n3 < 10; n3++)
	    for (n4 = 0; n4 < 10; n4++)
	      for (n5 = 0; n5 < 10; n5++)
		for (n6 = 0; n6 < 10; n6++)
		  for (n7 = 0; n7 < 10; n7++)
		    for (n8 = 0; n8 < 10; n8++)
		      if (n1 + n2 + n3 + n4 == n5 + n6 + n7 + n8)
			tickets_count++;
      DateTime d2 = DateTime.Now;
      int msec = (int)d2.Subtract(d1).TotalMilliseconds;
      Console.WriteLine("Found {0} tickets. Time elapsed: {1} msec", tickets_count, msec);
    }
  }
}

To compile with Mono from command line

mcs happytickets.cs -optimize

To compile with Microsoft .NET 4 from command line

rem set PATH=C:\Windows\Microsoft.NET\Framework\v4.0.30319;%PATH%
set PATH=C:\Windows\Microsoft.NET\Framework64\v4.0.30319;%PATH%
csc.exe happytickets.cs -optimize

Results

Series 1

HP ProBook 6550b, Intel(R) Core(TM) i5 CPU M 520, RAM 8 GB
Ubuntu 14.04 64 bits Linux 3.16.0-60-generic

Compiler Average time (for 10 runs), msec
NASM 2.10.09 64 bits 240
Free Pascal Compiler version 2.6.4 [2014/04/20] for x86_64 355
Free Pascal Compiler version 3.0.0 [2015/12/05] for x86_64 332
gcc version 4.8.4 (Ubuntu 4.8.4-2ubuntu1~14.04) 116
Mono C# compiler version 3.2.8.0 260

As we can see, FPC 3.0 looks about 7% better. Unfortunately, I cannot run Delphi test because of Linux so the results of your series are welcome.

Series 2

The results was gracefully provided by Norbert Saint Georges (tetrasys.fi).
Intel(R) Core(TM)i7 CPU 920 @2.67GHz
Win7 64bits

Compiler Average time (for 10 runs), msec
Pascal Code Typhon v 5500 32bits 371
Lazarus V1.4.4 Free Pascal Compiler v2.6.4 32bits 302
Lazarus V1.6.0 Free Pascal Compiler v3.0.0 32bits 260
Delphi 7 32bits 400 (with 0..9 type)
RemObject Oxygene 8.1.85.1801 (Pascal-like) 168
MS Visual C++ V10.0 64bits 205

Series 3

VirtualBox VM on Intel(R) Core(TM) i5-3470 CPU 3,2 GHz, 4 cores (2 cores for VM), RAM for VM 2 GB
Windows Server 2012 64 bits

Compiler Average time (for 10 runs), msec
Delphi 7 32 bits 155 (215 with 0..9 type)
Delphi XE 10.1 32 bits 187
Delphi XE 10.1 64 bits 203
Free Pascal Compiler version 2.6.4 [2015/07/11] for i386 189
Microsoft (R) Visual C# Compiler version 4.0.30319.33440 64bits 117
Microsoft (R) Visual C# Compiler version 4.0.30319.33440 32bits 125
Borland C++ 5.5.1 for Win32 188
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 15.00.21022.08 for 80x86 125

Series 4

The results was gracefully provided by Steven Chesser (Delphi group in Facebook).
Dell M4800, Intel(R) Core(TM) i7-4900MQ CPU @2.8GHz
Win7 64bits

Compiler Average time (for 10 runs), msec
Delphi XE8 32bits 193 (with 0..9 type)
Delphi XE8 64bits 201 (with 0..9 type)
Delphi XE10 Seattle 32bits 181 (with 0..9 type)
Delphi XE10 Seattle 64bits 202 (with 0..9 type)
Delphi 7 32bits 203
Free Pascal Compiler version 3.1.1 32bit 232
Free Pascal Compiler version 3.1.1 64bits 151

Notes. FPC 3.1.1 32bit with -O3 option: 210 msec. FPC 3.1.1 64bit with -O3 option: 154 msec.

Series 5

The results was gracefully provided by OBones (nzn.fr.delphi newsgroup).
Intel Core i7 870 @ 2.93GHz, RAM 8 GB
Windows 7 x64

Compiler Average time (for 10 runs), msec
Delphi XE7 32bits 303 (with 0..9 type)
Delphi XE7 64bits 374 (with 0..9 type)
Free Pascal Compiler version 2.6.4 32bit 260
Free Pascal Compiler version 2.6.4 64bits 240