I have optimized a DigitalOcean Droplet using Varnish in front of Apache to serve thousands of web requests per second. When I ssh into my droplet and run:
ab -n 1024 -c 128 FULL_URL
I achieve 2000-3000 requests per second, even with higher concurrency values. Varnish is performing excellently.
However, when I run the same ApacheBench command from my local machine, I encounter frequent timeouts whenever concurrency exceeds 20-30.
Why can my setup handle hundreds of concurrent requests locally but only 20-30 over the Internet?
I then tried to run simple ExpressJS app on the server on port 8080. Then I created many concurrent requests to see what happened. I used this Go code to benchmark the endpoint.
package main
import (
"encoding/csv"
"fmt"
"net/http"
"os"
"os/exec"
"sync"
"time"
)
type stats struct {
totalRequests int
successCount int
failCount int
totalDuration time.Duration
minDuration time.Duration
maxDuration time.Duration
responseTimes []int64 // To store response times in milliseconds.
mu sync.Mutex
}
func worker(wg *sync.WaitGroup, url string, stat *stats) {
defer wg.Done()
start := time.Now()
resp, err := http.Get(url)
duration := time.Since(start)
durationMs := duration.Milliseconds()
stat.mu.Lock()
defer stat.mu.Unlock()
stat.responseTimes = append(stat.responseTimes, durationMs)
stat.totalRequests++
stat.totalDuration += duration
if duration < stat.minDuration || stat.minDuration == 0 {
stat.minDuration = duration
}
if duration > stat.maxDuration {
stat.maxDuration = duration
}
if err != nil {
stat.failCount++
return
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusBadRequest {
// body, _ := ioutil.ReadAll(resp.Body) // Read the response body.
// fmt.Printf("400 RESPONSE BODY: %s\n", string(body))
stat.failCount++
return
}
if resp.StatusCode != http.StatusOK {
stat.failCount++
return
}
stat.successCount++
}
func saveResponseTimes(responseTimes []int64, fileName string) error {
file, err := os.Create(fileName)
if err != nil {
return err
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
// Write header.
writer.Write([]string{"RequestNumber", "ResponseTime(ms)"})
// Write data.
for i, time := range responseTimes {
writer.Write([]string{fmt.Sprintf("%d", i+1), fmt.Sprintf("%d", time)})
}
return nil
}
// func runPythonScript(csvFileName, outputImage string) error {
// cmd := exec.Command("py", "plot_response_times.py", csvFileName, outputImage) // Use "py" for Windows.
// cmd.Stdout = os.Stdout
// cmd.Stderr = os.Stderr
// return cmd.Run()
// }
func main() {
const totalRequests = 1024
const concurrency = 128
const sleep = 256
url := "http://server_ip:8080/"
var wg sync.WaitGroup
stat := &stats{}
start := time.Now()
// Start benchmarking worker goroutines.
for i := 0; i < totalRequests; i++ {
wg.Add(1)
go worker(&wg, url, stat)
if i%concurrency == 0 {
time.Sleep(sleep * time.Millisecond) // Adjust delay for ramping if needed.
}
}
wg.Wait()
duration := time.Since(start)
// Save response times to a CSV file.
timestamp := time.Now().Format("2006-01-02_15-04-05")
csvFileName := fmt.Sprintf("response_times-%s.csv", timestamp)
err := saveResponseTimes(stat.responseTimes, csvFileName)
if err != nil {
fmt.Println("FAILED TO SAVE RESPONSE TIMES:", err)
return
}
fmt.Println("RESPONSE TIMES SAVED TO:", csvFileName)
// Log benchmark results.
logFileName := fmt.Sprintf("bench-%s.log", timestamp)
file, err := os.Create(logFileName)
if err != nil {
fmt.Println("FAILED TO CREATE LOG FILE:", err)
return
}
defer file.Close()
logContent := fmt.Sprintf(`
TARGET: %s
TOTAL REQUESTS: %d
CONCURRENCY: %d
SLEEP: %d ms
BENCHMARK RESULTS:
DATE: %s
TOTAL REQUESTS: %d
SUCCESS COUNT: %d
FAIL COUNT: %d
TOTAL DURATION: %v
AVERAGE RESPONSE TIME: %v
MIN RESPONSE TIME: %v
MAX RESPONSE TIME: %v
THROUGHPUT: %.2f REQUESTS/SEC
`, url, totalRequests, concurrency, sleep,
time.Now().Format("Monday, 02 January 2006 15:04:05"),
stat.totalRequests, stat.successCount, stat.failCount,
duration, stat.totalDuration/time.Duration(stat.totalRequests),
stat.minDuration, stat.maxDuration, float64(stat.totalRequests)/duration.Seconds(),
)
_, err = file.WriteString(logContent)
if err != nil {
fmt.Println("FAILED TO WRITE TO LOG FILE:", err)
return
}
fmt.Println("BENCHMARK RESULTS SAVED TO:", logFileName)
// // Run Python script to generate the graph.
// outputImage := fmt.Sprintf("response_time_distribution-%s.png", timestamp)
// err = runPythonScript(csvFileName, outputImage)
// if err != nil {
// fmt.Println("FAILED TO RUN PYTHON SCRIPT:", err)
// return
// }
fmt.Println("RESPONSE TIME DISTRIBUTION GRAPH SAVED TO:", outputImage)
}
and I get the following, what I don’t get is why some requests failed but when I benchmarked it right in the server locally every request was successful.
TARGET: http://server_ip:8080/ TOTAL REQUESTS: 1024 CONCURRENCY: 128 SLEEP: 256 ms
BENCHMARK RESULTS: DATE: Saturday, 25 January 2025 12:34:12 TOTAL REQUESTS: 1024 SUCCESS COUNT: 782 FAIL COUNT: 242
TOTAL DURATION: 23.1942974s AVERAGE RESPONSE TIME: 6.230784801s MIN RESPONSE TIME: 375.5399ms MAX RESPONSE TIME: 21.0692318s
THROUGHPUT: 44.15 REQUESTS/SEC
Can someone please help me?
This textbox defaults to using Markdown to format your answer.
You can type !ref in this text area to quickly search our full set of tutorials, documentation & marketplace offerings and insert the link!
These answers are provided by our Community. If you find them useful, show some love by clicking the heart. If you run into issues leave a comment, or add your own answer to help others.
Heya @bryanilman,
What do you mean by optimized your Apache? Did you change the default workers Apache comes with? Also, have you changed to mpm event to worker?
So first, what are Apache Workers and what are they used for:
Imagine you are in a cinema, and there are 300 chairs inside. Those are the Apache Workers and the movie is your Webiste. If someone wants to watch the movie(your website) they need to sit in the chair (Apache Workers). If all 300 chars are taken, the 301 person needs to wait for someone to leave in order for them to sit and watch the movie. That’s how ApacheWorkers work. Once all ApacheWorkers are taken, a new person that wants to open your website waits for a spot to be open and only after that they can see your website.
Usually, these limits are set by 256 by default but they can be increased.
You can open your Apache configuration file. In there you should see something similar to:
Increase both the ServerLimit and MaxClients options, restart Apache, and see if this would solve your issue.
It’s possible they do not exist (haven’t been added) and the default values are the ones above. Adding them to the prefork content of apache mods should be fine as well.
Don’t forget to restart Apache afterward.
Heya, @bryanilman
Varnish is likely handling most of the load, but it has its own limits. Check for bottlenecks there:
/etc/varnish/default.vcl
orvarnish.params
) to adjust thread settings:Also if your benchmarking script uses a domain instead of an IP, DNS lookups might be slowing it down. Use the IP directly in your tests and check if this will return different results.
Hope that this helps!