Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package datadog.trace.util;

import java.util.HashMap;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Supplier;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

/**
*
*
* <ul>
* Benchmark to illustrate the trade-offs around case-insensitive Map look-ups - using either...
* <li>(RECOMMENDED) TreeMap with Comparator of String::compareToIgnoreCase
* <li>HashMap with look-ups using String::to<X>Case
* </ul>
*
* <p>For case-insensitive lookups, TreeMap map creation is consistently faster because it avoids
* String::to<X>Case calls.
*
* <p>Despite calls to String::to<X>Case, HashMap lookups are faster in single threaded
* microbenchmark by 50% but are worse when frequently called in a multi-threaded system.
*
* <p>With many threads, the extra allocation from calling String::to<X>Case leads to frequent GCs
* which has adverse impacts on the whole system. <code>
* MacBook M1 with 1 thread (Java 21)
*
* Benchmark Mode Cnt Score Error Units
* CaseInsensitiveMapBenchmark.create_hashMap thrpt 6 994213.041 ± 15718.903 ops/s
* CaseInsensitiveMapBenchmark.create_treeMap thrpt 6 1522900.015 ± 21646.688 ops/s
*
* CaseInsensitiveMapBenchmark.get_hashMap thrpt 6 69149862.293 ± 9168648.566 ops/s
* CaseInsensitiveMapBenchmark.get_treeMap thrpt 6 42796699.230 ± 9029447.805 ops/s
* </code> <code>
* MacBook M1 with 8 threads (Java 21)
*
* Benchmark Mode Cnt Score Error Units
* CaseInsensitiveMapBenchmark.create_hashMap thrpt 6 6641003.483 ± 543210.409 ops/s
* CaseInsensitiveMapBenchmark.create_treeMap thrpt 6 10030191.764 ± 1308865.113 ops/s
*
* CaseInsensitiveMapBenchmark.get_hashMap thrpt 6 38748031.837 ± 9012072.804 ops/s
* CaseInsensitiveMapBenchmark.get_treeMap thrpt 6 173495470.789 ± 27824904.999 ops/s
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've chosen to run the benchmarks with 8 threads because I found that the total system impact was often hidden when only using a single thread.

With 8 threads, solutions that perform more allocation tend to fall behind because of stop-the-world pauses caused by garbage collection.

The thinking is that in a real world system, the tracer is often running inside many application threads, so designing for many threads typically makes sense.

public class CaseInsensitiveMapBenchmark {
static final String[] PREFIXES = {"foo", "bar", "baz", "quux"};

static final int NUM_SUFFIXES = 4;

static <T> T init(Supplier<T> supplier) {
return supplier.get();
}

static final String[] UPPER_PREFIXES =
init(
() -> {
String[] upperPrefixes = new String[PREFIXES.length];
for (int i = 0; i < PREFIXES.length; ++i) {
upperPrefixes[i] = PREFIXES[i].toUpperCase();
}
return upperPrefixes;
});

static final String[] LOOKUP_KEYS =
init(
() -> {
ThreadLocalRandom curRandom = ThreadLocalRandom.current();

String[] keys = new String[32];
for (int i = 0; i < keys.length; ++i) {
int prefixIndex = curRandom.nextInt(PREFIXES.length);
boolean toUpper = curRandom.nextBoolean();
int suffixIndex = curRandom.nextInt(NUM_SUFFIXES + 1);

String key = PREFIXES[prefixIndex] + "-" + suffixIndex;
keys[i] = toUpper ? key.toUpperCase() : key.toLowerCase();
}
return keys;
});

static int sharedLookupIndex = 0;

static String nextLookupKey() {
int localIndex = ++sharedLookupIndex;
if (localIndex >= LOOKUP_KEYS.length) {
sharedLookupIndex = localIndex = 0;
}
return LOOKUP_KEYS[localIndex];
}

@Benchmark
public void create_baseline(Blackhole blackhole) {
for (int suffix = 0; suffix < NUM_SUFFIXES; ++suffix) {
for (String prefix : PREFIXES) {
blackhole.consume(prefix + "-" + suffix);
blackhole.consume(Integer.valueOf(suffix));
}
}
for (int suffix = 0; suffix < NUM_SUFFIXES; suffix += 2) {
for (String prefix : UPPER_PREFIXES) {
blackhole.consume(prefix + "-" + suffix);
blackhole.consume(Integer.valueOf(suffix + 1));
}
}
}

@Benchmark
public void lookup_baseline(Blackhole blackhole) {
blackhole.consume(nextLookupKey());
}

@Benchmark
public HashMap<String, Integer> create_hashMap() {
return _create_hashMap();
}

static HashMap<String, Integer> _create_hashMap() {
HashMap<String, Integer> map = new HashMap<>();
for (int suffix = 0; suffix < NUM_SUFFIXES; ++suffix) {
for (String prefix : PREFIXES) {
map.put(
(prefix + "-" + suffix).toLowerCase(),
suffix); // arguable, but real caller probably doesn't know the case ahead-of-time
}
}
for (int suffix = 0; suffix < NUM_SUFFIXES; suffix += 2) {
for (String prefix : UPPER_PREFIXES) {
map.put((prefix + "-" + suffix).toLowerCase(), suffix + 1);
}
}
return map;
}

static final HashMap<String, Integer> HASH_MAP = _create_hashMap();

@Benchmark
public Integer lookup_hashMap() {
// This benchmark is still "correct" in multi-threaded context,
// Map is populated under the class initialization lock and not changed thereafter
return HASH_MAP.get(nextLookupKey().toLowerCase());
}

@Benchmark
public TreeMap<String, Integer> create_treeMap() {
return _create_treeMap();
}

static TreeMap<String, Integer> _create_treeMap() {
TreeMap<String, Integer> map = new TreeMap<>(String::compareToIgnoreCase);
for (int suffix = 0; suffix < NUM_SUFFIXES; ++suffix) {
for (String prefix : PREFIXES) {
map.put(prefix + "-" + suffix, suffix);
}
}
for (int suffix = 0; suffix < NUM_SUFFIXES; suffix += 2) {
for (String prefix : UPPER_PREFIXES) {
map.put(prefix + "-" + suffix, suffix + 1);
}
}
return map;
}

static final TreeMap<String, Integer> TREE_MAP = _create_treeMap();

@Benchmark
public Integer lookup_treeMap() {
// This benchmark is still "correct" in multi-threaded context,
// Map is populated under the initial class initialization lock and not changed thereafter
return TREE_MAP.get(nextLookupKey());
}

// TODO: Add ConcurrentSkipListMap & synchronized HashMap & TreeMap
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,18 @@
import org.openjdk.jmh.annotations.Warmup;

/**
* In contrast to java.util.Objects.hash, datadog.util.HashingUtils.hash has overrides for different
* parameter counts that allow most callers to avoid calling the var-arg version. This avoids the
* common situation where the JIT's escape analysis is unable to elide the var-arg array allocation.
*
*
* <ul>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tweaked this benchmark to match comment to adhere to the same format as the others

* Benchmark comparing HashingUtils.hash to Objects.hash
* <li>(RECOMMENDED) HashingUtils.hash - avoids var-arg creation
* <li>Object.hash - high allocation overhead from var-ags
* </ul>
*
* <p>In contrast to java.util.Objects.hash, datadog.util.HashingUtils.hash has overrides for
* different parameter counts that allow most callers to avoid calling the var-arg version. This
* avoids the common situation where the JIT's escape analysis is unable to elide the var-arg array
* allocation.
*
* <p>This results in 3-4x throughput, but more importantly no allocation as compared to GiBs / sec
* with var-args. <code>
Expand Down
128 changes: 128 additions & 0 deletions internal-api/src/jmh/java/datadog/trace/util/SetBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package datadog.trace.util;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Supplier;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
*
*
* <ul>
* Benchmark showing possible ways to represent and check if a set includes an elememt...
* <li>(RECOMMENDED) HashSet - on par with TreeSet - idiomatic
* <li>(RECOMMENDED) TreeMap - on par with HashSet - better solution if custom comparator is
* needed (see CaseInsensitiveMapBenchmark)
* <li>array - slower than HashSet
* <li>sortedArray - slowest - slower than array for common case of small arrays
* </ul>
*
* <code>
* MacBook M1 - 8 threads - Java 21
* 1/3 not found rate
*
* Benchmark Mode Cnt Score Error Units
* SetBenchmark.contains_array thrpt 6 645561886.327 ± 100781717.494 ops/s
* SetBenchmark.contains_hashSet thrpt 6 1536236680.235 ± 114966961.506 ops/s
* SetBenchmark.contains_sortedArray thrpt 6 571476939.441 ± 21334620.460 ops/s
* SetBenchmark.contains_treeSet thrpt 6 1557663759.411 ± 95343683.124 ops/s
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
public class SetBenchmark {
static final String[] STRINGS =
new String[] {
"foo",
"bar",
"baz",
"quux",
"hello",
"world",
"service",
"queryString",
"lorem",
"ipsum",
"dolem",
"sit"
};

static <T> T init(Supplier<T> supplier) {
return supplier.get();
}

static final String[] LOOKUPS =
init(
() -> {
String[] lookups = Arrays.copyOf(STRINGS, STRINGS.length * 10);

for (int i = 0; i < STRINGS.length; ++i) {
lookups[STRINGS.length + i] = new String(STRINGS[i]);
}

// 2 / 3 of the key look-ups miss the set
for (int i = STRINGS.length * 2; i < lookups.length; ++i) {
lookups[i] = "dne-" + ThreadLocalRandom.current().nextInt();
}

Collections.shuffle(Arrays.asList(lookups));
return lookups;
});

static int sharedLookupIndex = 0;

static String nextString() {
int localIndex = ++sharedLookupIndex;
if (localIndex >= LOOKUPS.length) {
sharedLookupIndex = localIndex = 0;
}
return LOOKUPS[localIndex];
}

static final String[] ARRAY = STRINGS;

@Benchmark
public boolean contains_array() {
String needle = nextString();
for (String str : ARRAY) {
if (needle.equals(str)) return true;
}
return false;
}

static final String[] SORTED_ARRAY =
init(
() -> {
String[] sorted = Arrays.copyOf(STRINGS, STRINGS.length);
Arrays.sort(sorted);
return sorted;
});

@Benchmark
public boolean contains_sortedArray() {
return (Arrays.binarySearch(SORTED_ARRAY, nextString()) != -1);
}

static final HashSet<String> HASH_SET = new HashSet<>(Arrays.asList(STRINGS));

@Benchmark
public boolean contains_hashSet() {
return HASH_SET.contains(nextString());
}

static final TreeSet<String> TREE_SET = new TreeSet<>(Arrays.asList(STRINGS));

@Benchmark
public boolean contains_treeSet() {
return HASH_SET.contains(nextString());
}
}
Loading