RejectedSoftware Forums

Sign up

Slow down after a few requests when large memory is allocated

Performance deterorates with memory heavy apps.

We have been running into severe performance issues with a particular server that initializes 3 GB of memory and keeps it for the life of the server. The performance drop happens when running ab test just on ping.

  • First 10k requests (ping 6 ms) on same machine
  • Second 10k requests (ping 11 ms) on same machine
  • Third 10k requests (ping 20ms) on same machine

It then starts deteriorating. Sometimes hanging for 500ms for just pings.

I wrote a skeleton server to try and invistigate the issue. The app does not but ping back on request. I reserver large memory (2GB) and the issue is visible. if I do not reserver any memory, it works just fine with consistent ping time of 1ms.

void startServer() {
	version(linux) {
		import etc.linux.memoryerror;
		static if (is(typeof(registerMemoryErrorHandler)))
			registerMemoryErrorHandler();
	}

	auto serverSettings = new HTTPServerSettings;
	serverSettings.hostName = "searcher.ebookingservices.com";
	serverSettings.bindAddresses = options.bindip;
	serverSettings.port = options.port;

	serverSettings.errorPageHandler = toDelegate(&errorPage);
	serverSettings.options = HTTPServerOption.defaults | HTTPServerOption.errorStackTraces;

	serverSettings.accessLogFormat = "%h - %u %t \"%r\" %s %b \"%{Referer}i\" \"%{User-Agent}i\" %D";
	serverSettings.accessLogFile = options.accessLog;

	serverSettings.disableDistHost = true;

	serverSettings.useCompressionIfPossible = false;

	auto router = (new URLRouter)
		.get("/ping", &pingHandler); // used only for monitoring

	listenHTTP(serverSettings, router);
}

 void pingHandler(HTTPServerRequest req, HTTPServerResponse res) {res.writeBody("pong");}



extern(C) __gshared string[] rt_options = [ "gcopt=profile:0 minPoolSize:512 initReserve:2048 heapSizeFactor:1 help" ];

version(VibeCustomMain) {
	int main(string[] args) {
		import vibe.core.args : finalizeCommandLineOptions;
		import vibe.core.core : runEventLoop, lowerPrivileges;
		import vibe.core.log;
		import std.encoding : sanitize;

		try {
			if (!finalizeCommandLineOptions())
				return 0;
		} catch (Exception e) {
			return 1;
		}

		startServer();

		lowerPrivileges();

		try {
			return runEventLoop();
		} catch (Throwable e) {
			//logError("Unhandled exception in event loop: %s", e.msg);
			//logDiagnostic("Full exception: %s", e.toString().sanitize());
			return 1;
		}
	}
} else {
	shared static this() {
		startServer();
	}
}

if you comment this line

extern(C) __gshared string[] rt_options = [ "gcopt=profile:0 minPoolSize:512 initReserve:2048 heapSizeFactor:1 help" ];

Then it wont allocate memory and the difference in performance is quite huge.

From running a profiler on the app, it looks like with large memorry allocated, vibe array utils is called on a lot inside libevent.

Any help or insights would be appreciated.

Re: Slow down after a few requests when large memory is allocated

After further investigation, I think we found the issue. The ArraySet implementation of insert function does linear search to try and see if the key is there. This is fine with small number of requests and a small application, because the GC will run frequently and keep the array of objects small. However for larger applications, the GC cycle is much longer and the array sometimes grow to 100k or more. That means for every request it will look through the entire array to get a miss. This makes the server's speed slow down to a crawl and affects scalability.

For the solution that we found to the problem, is to make a Hash Set implementation to improve lookup, insert and removal from the array. Below is the current implementation.

struct HashSet(Key)
{
	import std.experimental.allocator : makeArray, expandArray, dispose;
	import std.experimental.allocator.building_blocks.affix_allocator : AffixAllocator;

	private {
		static if (__VERSION__ < 2074) {
			struct AW { // work around AffixAllocator limitations
				IAllocator alloc;
				alias alloc this;
				enum alignment = max(Key.alignof, int.alignof);
				void[] resolveInternalPointer(void* p) { void[] ret; alloc.resolveInternalPointer(p, ret); return ret; }
			}
			alias AllocatorType = AffixAllocator!(AW, int);
		} else {
			IAllocator AW(IAllocator a) { return a; }
			alias AllocatorType = AffixAllocator!(IAllocator, int);
		}
		//bool[Key] m_entries;
		AllocatorType m_allocator;

		static struct Bucket {
			//enum EmptyHash = hash_t.max;
			enum EmptyHash = 0;

			hash_t hash;
			Key key;
		}


		Bucket[] m_entries;
		size_t m_len;
	}

	~this()
	@trusted {
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		if (m_entries.ptr) {
			if (--allocator.prefix(m_entries) <= 0) {
				try allocator.dispose(m_entries);
				catch (Exception e) assert(false, e.msg); // should never happen
			}
		}
	}

	this(this)
	@trusted {
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		if (m_entries.ptr) {
			allocator.prefix(m_entries)++;
		}
	}

	@property HashSet dup()
	{
		static if (__VERSION__ <= 2071)
			scope (failure) assert(false);
		HashSet ret;
		ret.m_allocator = m_allocator;

		if (m_entries.length) {
			Bucket[] duped;
			() @trusted {
				try duped = allocator.makeArray!(Bucket)(m_entries.length);
				catch (Exception e) assert(false, e.msg);
				if (!duped.length)
					assert(false, "Failed to allocate memory for duplicated "~HashSet.stringof);
				allocator.prefix(duped) = 1;
			} ();
			duped = m_entries;
			ret.m_entries = duped;
		}

		return ret;
	}

	void setAllocator(IAllocator allocator)
	body {
		m_allocator = AllocatorType(AW(allocator));
	}

	bool opBinaryRight(string op)(Key key) if (op == "in") { return contains(key); }

	int opApply(int delegate(ref Key) @safe del)
	{
		foreach (ref b; m_entries)
			if (b.hash != Bucket.EmptyHash)
				if (auto ret = del(b.key))
					return ret;
		return 0;
	}

	void insert(Key key)
	{
		try{
			() @trusted {
				if (!m_entries.length) {
					m_entries = allocator.makeArray!Bucket(1024);
					assert(m_entries.length, "Failed to allocate memory for "~HashSet.stringof);
					allocator.prefix(m_entries) = 1;
				}

				if (m_len * 4 / 3 > m_entries.length) {
					// grow by double
					() @trusted {
						rehash();
					}();
				}

				auto mask = m_entries.length - 1;
				auto hash = hashOf(key);
				auto base = hash & mask;

				//writefln("Size %s Base %s", m_entries.length, m_entries[20]);

				while (true) {
					if (m_entries[base].hash == Bucket.EmptyHash) {

						m_entries[base].hash = hash;
						m_entries[base].key = key;
						++m_len;
						break;
					} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
						break;
					}

					base += 1;
					if (base == m_entries.length)
						base = 0;
				}
			} ();
		} catch (Exception e){
			return;
		}
	}

	void remove(Key key) {
		auto mask = m_entries.length - 1;
		auto hash = hashOf(key);
		auto base = hash & mask;

		while (true) {
			if (m_entries[base].hash == Bucket.EmptyHash) {
				break;
			} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
				--m_len;
				m_entries[base].key = Key.init;
				m_entries[base].hash = Bucket.EmptyHash;
				break;
			}

			base += 1;
			if (base == m_entries.length)
				base = 0;
		}
	}

	bool contains(Key key) {
		auto mask = m_entries.length - 1;
		auto hash = hashOf(key);
		auto base = hash & mask;

		while (true) {
			if (m_entries[base].hash == Bucket.EmptyHash) {
				return false;
			} else if ((m_entries[base].hash == hash) && (key == m_entries[base].key)) {
				return true;
			}

			base += 1;
		}
	}

	void rehash() {
		() @trusted {
			auto oldArray = m_entries;
			m_entries = allocator.makeArray!Bucket(max(64, m_len * 2));
			m_len = 0;
			auto mask = m_entries.length - 1;
			foreach(ref bucket; oldArray){
				if (bucket.hash == Bucket.EmptyHash) {
					auto base = bucket.hash & mask;

					while (true) {
						if (m_entries[base].hash == Bucket.EmptyHash) {

							m_entries[base].hash = bucket.hash;
							m_entries[base].key = bucket.key;
							++m_len;
							break;
						} else if ((m_entries[base].hash == bucket.hash) && (bucket.key == m_entries[base].key)) {
							break;
						}

						base += 1;
						if (base == m_entries.length)
							base = 0;
					}
				}
			}
		} ();
	}

	ref allocator()
	nothrow @trusted {
		try {
			static if (__VERSION__ < 2074) auto palloc = m_allocator.parent;
			else auto palloc = m_allocator._parent;
			if (!palloc) {
				assert(vibeThreadAllocator !is null, "No theAllocator set!?");
				m_allocator = AllocatorType(AW(vibeThreadAllocator));
			}
		} catch (Exception e) assert(false, e.msg); // should never throw
		return m_allocator;
	}
}

@safe nothrow unittest {
	import std.experimental.allocator : allocatorObject;
	import std.experimental.allocator.mallocator : Mallocator;

	HashSet!(int) s;
	s.setAllocator(() @trusted { return Mallocator.instance.allocatorObject; } ());

	HashSet!(int) t;
	t = s;

	s.insert(1);
	s.insert(2);
	s.insert(3);
	s.insert(4);
	assert(s.contains(1));
	assert(s.contains(2));
	assert(s.contains(3));
	assert(s.contains(4));
	assert(!t.contains(1));

	s.insert(5);
	assert(s.contains(5));

	t = s;
	assert(t.contains(5));
	assert(t.contains(1));

	s.insert(6);
	assert(s.contains(6));
	assert(t.contains(6));

	s = HashSet!(int).init;
	assert(!s.contains(1));
	assert(t.contains(1));
	assert(t.contains(6));

	s = t.dup;
	assert(s.contains(1));
	assert(s.contains(6));

	t.remove(1);
	assert(!t.contains(1));
	assert(s.contains(1));
	assert(t.contains(2));
	assert(t.contains(6));

	t.remove(6);
	assert(!t.contains(6));
	assert(s.contains(6));
	assert(t.contains(5));
}

Then for libevent2.d change mownedObjects and sthreadObjects to use HashSet instead. The problem gets considerably better. It is even noticeable on small applications but not as much.

Any insights about this would help. Also would be great if thats adjusted in a new version for vibe.