145 lines
3.0 KiB
Plaintext
145 lines
3.0 KiB
Plaintext
|
|
Used relations
|
||
|
|
==============
|
||
|
|
|
||
|
|
let plz100 = plz feed thousand head[100] feed product
|
||
|
|
extend[ran: randint(50000)] sortby[ran asc] remove[ran] consume
|
||
|
|
|
||
|
|
let plz100Even = plz100 feed extend[PLZE: .PLZ * 2]
|
||
|
|
project[PLZE, Ort, no] consume
|
||
|
|
|
||
|
|
let plz100Odd = plz100 feed extend[PLZO: (.PLZ * 2) + 1]
|
||
|
|
project[PLZO, Ort, no] consume
|
||
|
|
|
||
|
|
Tuples: 4126700 with ~25 bytes => 98 MBytes data
|
||
|
|
|
||
|
|
Constant t_result
|
||
|
|
=================
|
||
|
|
|
||
|
|
First perform a query that generates no result tuples.
|
||
|
|
Second perform a query that generates result tuples.
|
||
|
|
All tuples can be held in memory (internal case)
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*10] {a} plz100Odd feed
|
||
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
|
||
|
|
|
||
|
|
=> Result 0
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
3.01
|
||
|
|
2.99
|
||
|
|
3.00
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
||
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
|
||
|
|
|
||
|
|
=> Result 25177798
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
112.79
|
||
|
|
112.54
|
||
|
|
113.00
|
||
|
|
|
||
|
|
Difference 112.78 - 3.00 = 109.78
|
||
|
|
per tuple 109.78 / 25177798 = 0.000004360 sec
|
||
|
|
|
||
|
|
Constant t_write
|
||
|
|
================
|
||
|
|
|
||
|
|
Internal case
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
||
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
||
|
|
|
||
|
|
=> Result 1
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
1.82
|
||
|
|
1.80
|
||
|
|
1.92
|
||
|
|
|
||
|
|
External case (each partition fits into memory)
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
|
||
|
|
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
|
||
|
|
|
||
|
|
=> Result 1
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
2.32
|
||
|
|
2.28
|
||
|
|
2.31
|
||
|
|
|
||
|
|
Difference 2.30 - 1.85 = 0.45
|
||
|
|
per tuple 0.45 / 412670 = 0.000001090 sec
|
||
|
|
|
||
|
|
This equals 9.8 MB / 0.4498103 sec => 21.78 MB/sec transfer rate of disk.
|
||
|
|
|
||
|
|
Constant t_read
|
||
|
|
================
|
||
|
|
|
||
|
|
Assumed to be symmetric => 0.000001090
|
||
|
|
|
||
|
|
Constant t_hash
|
||
|
|
===============
|
||
|
|
|
||
|
|
Result stream is cut off after one result tuple. Approximately this is the
|
||
|
|
case when the first tuple from stream A and partition 0 matches any tuple
|
||
|
|
of the corresponding partition in stream B. Then we simple adjust the size
|
||
|
|
of stream B to get a longer partitioning phase. The difference represents
|
||
|
|
the cost for hashing and write operations. With the above determined constant
|
||
|
|
t_write we can make an estimate for t_hash
|
||
|
|
|
||
|
|
External case
|
||
|
|
|
||
|
|
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
|
||
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
||
|
|
|
||
|
|
=> Result 1
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
11.95
|
||
|
|
11.88
|
||
|
|
11.93
|
||
|
|
|
||
|
|
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
|
||
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
||
|
|
|
||
|
|
=> Result 1
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
22.55
|
||
|
|
22.96
|
||
|
|
22.76
|
||
|
|
|
||
|
|
|
||
|
|
Difference 22.76 - 11.92 = 10.84
|
||
|
|
per tuple 10.84 / ( 50 * 41267 ) - t_write = 0.000004163 sec
|
||
|
|
|
||
|
|
|
||
|
|
Constant t_probe
|
||
|
|
================
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
|
||
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
||
|
|
|
||
|
|
=> Result 0
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
12.85
|
||
|
|
12.82
|
||
|
|
12.92
|
||
|
|
|
||
|
|
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
|
||
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
||
|
|
|
||
|
|
=> Result 0
|
||
|
|
=> Times in sec
|
||
|
|
|
||
|
|
16.06
|
||
|
|
16.24
|
||
|
|
16.06
|
||
|
|
|
||
|
|
Difference 16.12 - 12.86 = 3.26
|
||
|
|
per tuple 3.26 / 412670 - t_write - t_hash - t_read = 0.000001557
|
||
|
|
|