After further investigation, I think we found the issue. The ArraySet implementation of insert function does linear search to try and see if the key is there. This is fine with small number of requests and a small application, because the GC will run frequently and keep the array of objects small. However for larger applications, the GC cycle is much longer and the array sometimes grow to 100k or more. That means for every request it will look through the entire array to get a miss. This makes the server's speed slow down to a crawl and affects scalability.

For the solution that we found to the problem, is to make a Hash Set implementation to improve lookup, insert and removal from the array. Below is the current implementation.

struct HashSet(Key)
{
	import std.experimental.allocator : makeArray, expandArray, dispose;
	import std.experimental.allocator.building_blocks.affix_allocator : AffixAllocator;

	private {
		static if (__VERSION__ < 2074) {
			struct AW { // work around AffixAllocator limitations
				IAllocator alloc;
				alias alloc this;
				enum alignment = max(Key.alignof, int.alignof);
				void[] resolveInternalPointer(void* p) { void[] ret; alloc.resolveInternalPointer(p, ret); return ret; }
			}
			alias AllocatorType = AffixAllocator!(AW, int);
		} else {
			IAllocator AW(IAllocator a) { return a; }
			alias AllocatorType = AffixAllocator!(IAllocator, int);
		}
		//bool[Key] m_entries;
		AllocatorType m_allocator;

		static struct Bucket {
			//enum EmptyHash = hash_t.max;
			enum EmptyHash = 0;

			hash_t hash;
			Key key;
		}


		Bucket[] m_entries;
		size_t m_len;
	}

	~this()
	@trusted {
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		if (m_entries.ptr) {
			if (--allocator.prefix(m_entries) <= 0) {
				try allocator.dispose(m_entries);
				catch (Exception e) assert(false, e.msg); // should never happen
			}
		}
	}

	this(this)
	@trusted {
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		if (m_entries.ptr) {
			allocator.prefix(m_entries)++;
		}
	}

	@property HashSet dup()
	{
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		HashSet ret;
		ret.m_allocator = m_allocator;

		if (m_entries.length) {
			Bucket[] duped;
			() @trusted {
				try duped = allocator.makeArray!(Bucket)(m_entries.length);
				catch (Exception e) assert(false, e.msg);
				if (!duped.length)
					assert(false, "Failed to allocate memory for duplicated "~HashSet.stringof);
				allocator.prefix(duped) = 1;
			} ();
			duped = m_entries;
			ret.m_entries = duped;
		}

		return ret;
	}

	void setAllocator(IAllocator allocator)
	body {
		m_allocator = AllocatorType(AW(allocator));
	}

	bool opBinaryRight(string op)(Key key) if (op == "in") { return contains(key); }

	int opApply(int delegate(ref Key) @safe del)
	{
		foreach (ref b; m_entries)
			if (b.hash != Bucket.EmptyHash)
				if (auto ret = del(b.key))
					return ret;
		return 0;
	}

	void insert(Key key)
	{
		try{
			() @trusted {
				if (!m_entries.length) {
					m_entries = allocator.makeArray!Bucket(1024);
					assert(m_entries.length, "Failed to allocate memory for "~HashSet.stringof);
					allocator.prefix(m_entries) = 1;
				}

				if (m_len * 4 / 3 > m_entries.length) {
					// grow by double
					() @trusted {
						rehash();
					}();
				}

				auto mask = m_entries.length - 1;
				auto hash = hashOf(key);
				auto base = hash & mask;

				//writefln("Size %s Base %s", m_entries.length, m_entries[20]);

				while (true) {
					if (m_entries[base].hash == Bucket.EmptyHash) {

						m_entries[base].hash = hash;
						m_entries[base].key = key;
						++m_len;
						break;
					} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
						break;
					}

					base += 1;
					if (base == m_entries.length)
						base = 0;
				}
			} ();
		} catch (Exception e){
			return;
		}
	}

	void remove(Key key) {
		auto mask = m_entries.length - 1;
		auto hash = hashOf(key);
		auto base = hash & mask;

		while (true) {
			if (m_entries[base].hash == Bucket.EmptyHash) {
				break;
			} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
				--m_len;
				m_entries[base].key = Key.init;
				m_entries[base].hash = Bucket.EmptyHash;
				break;
			}

			base += 1;
			if (base == m_entries.length)
				base = 0;
		}
	}

	bool contains(Key key) {
		auto mask = m_entries.length - 1;
		auto hash = hashOf(key);
		auto base = hash & mask;

		while (true) {
			if (m_entries[base].hash == Bucket.EmptyHash) {
				return false;
			} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
				return true;
			}

			base += 1;
		}
	}

	void rehash() {
		() @trusted {
			auto oldArray = m_entries;
			m_entries = allocator.makeArray!Bucket(max(64, m_len * 2));
			m_len = 0;
			auto mask = m_entries.length - 1;
			foreach(ref bucket; oldArray){
				if (bucket.hash == Bucket.EmptyHash) {
					auto base = bucket.hash & mask;

					while (true) {
						if (m_entries[base].hash == Bucket.EmptyHash) {

							m_entries[base].hash = bucket.hash;
							m_entries[base].key = bucket.key;
							++m_len;
							break;
						} else if ((m_entries[base].hash == bucket.hash) && (bucket.key == m_entries[base].key)) {
							break;
						}

						base += 1;
						if (base == m_entries.length)
							base = 0;
					}
				}
			}
		} ();
	}

	ref allocator()
	nothrow @trusted {
		try {
			static if (__VERSION__ < 2074) auto palloc = m_allocator.parent;
			else auto palloc = m_allocator._parent;
			if (!palloc) {
				assert(vibeThreadAllocator !is null, "No theAllocator set!?");
				m_allocator = AllocatorType(AW(vibeThreadAllocator));
			}
		} catch (Exception e) assert(false, e.msg); // should never throw
		return m_allocator;
	}
}

@safe nothrow unittest {
	import std.experimental.allocator : allocatorObject;
	import std.experimental.allocator.mallocator : Mallocator;

	HashSet!(int) s;
	s.setAllocator(() @trusted { return Mallocator.instance.allocatorObject; } ());

	HashSet!(int) t;
	t = s;

	s.insert(1);
	s.insert(2);
	s.insert(3);
	s.insert(4);
	assert(s.contains(1));
	assert(s.contains(2));
	assert(s.contains(3));
	assert(s.contains(4));
	assert(!t.contains(1));

	s.insert(5);
	assert(s.contains(5));

	t = s;
	assert(t.contains(5));
	assert(t.contains(1));

	s.insert(6);
	assert(s.contains(6));
	assert(t.contains(6));

	s = HashSet!(int).init;
	assert(!s.contains(1));
	assert(t.contains(1));
	assert(t.contains(6));

	s = t.dup;
	assert(s.contains(1));
	assert(s.contains(6));

	t.remove(1);
	assert(!t.contains(1));
	assert(s.contains(1));
	assert(t.contains(2));
	assert(t.contains(6));

	t.remove(6);
	assert(!t.contains(6));
	assert(s.contains(6));
	assert(t.contains(5));
}

Then for libevent2.d change mownedObjects and sthreadObjects to use HashSet instead. The problem gets considerably better. It is even noticeable on small applications but not as much.

Any insights about this would help. Also would be great if thats adjusted in a new version for vibe.