mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-10 23:52:16 +01:00
feat: add limited execution tracing support
- For every process that is spawned (every new non-trivial goroutine such as http requests, queues or tasks) start a [execution tracer](https://pkg.go.dev/runtime/trace). This allows very precise diagnosis of how each individual process over a time period. - It's safe and [fast](https://go.dev/blog/execution-traces-2024#low-overhead-tracing) to be run in production, hence no setting to disable this. There's only noticable overhead when tracing is actually performed and not continuous. - Proper tracing support would mean the codebase would be full of `trace.WithRegion` and `trace.Log`, which feels premature for this patch as there's no real-world usage yet to indicate which places would need this the most. So far only Git commands and SQL queries receive somewhat proper tracing support given that these are used throughout the codebase. - Make git commands a new process type. - Add tracing to diagnosis zip file.
This commit is contained in:
parent
a2e0dd829c
commit
3f44b97b5f
7 changed files with 82 additions and 24 deletions
|
@ -11,6 +11,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"runtime/trace"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -163,6 +164,8 @@ func InitEngine(ctx context.Context) error {
|
||||||
Logger: errorLogger,
|
Logger: errorLogger,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
xormEngine.AddHook(&TracingHook{})
|
||||||
|
|
||||||
SetDefaultEngine(ctx, xormEngine)
|
SetDefaultEngine(ctx, xormEngine)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -318,6 +321,25 @@ func SetLogSQL(ctx context.Context, on bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TracingHook struct{}
|
||||||
|
|
||||||
|
var _ contexts.Hook = &TracingHook{}
|
||||||
|
|
||||||
|
type sqlTask struct{}
|
||||||
|
|
||||||
|
func (TracingHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) {
|
||||||
|
ctx, task := trace.NewTask(c.Ctx, "sql")
|
||||||
|
ctx = context.WithValue(ctx, sqlTask{}, task)
|
||||||
|
trace.Log(ctx, "query", c.SQL)
|
||||||
|
trace.Logf(ctx, "args", "%v", c.Args)
|
||||||
|
return ctx, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (TracingHook) AfterProcess(c *contexts.ContextHook) error {
|
||||||
|
c.Ctx.Value(sqlTask{}).(*trace.Task).End()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type SlowQueryHook struct {
|
type SlowQueryHook struct {
|
||||||
Treshold time.Duration
|
Treshold time.Duration
|
||||||
Logger log.Logger
|
Logger log.Logger
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"runtime/trace"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -317,12 +318,13 @@ func (c *Command) Run(opts *RunOpts) error {
|
||||||
var finished context.CancelFunc
|
var finished context.CancelFunc
|
||||||
|
|
||||||
if opts.UseContextTimeout {
|
if opts.UseContextTimeout {
|
||||||
ctx, cancel, finished = process.GetManager().AddContext(c.parentContext, desc)
|
ctx, cancel, finished = process.GetManager().AddTypedContext(c.parentContext, desc, process.GitProcessType, true)
|
||||||
} else {
|
} else {
|
||||||
ctx, cancel, finished = process.GetManager().AddContextTimeout(c.parentContext, timeout, desc)
|
ctx, cancel, finished = process.GetManager().AddTypedContextTimeout(c.parentContext, timeout, desc, process.GitProcessType, true)
|
||||||
}
|
}
|
||||||
defer finished()
|
defer finished()
|
||||||
|
|
||||||
|
trace.Log(ctx, "command", desc)
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, c.prog, c.args...)
|
cmd := exec.CommandContext(ctx, c.prog, c.args...)
|
||||||
|
|
|
@ -7,6 +7,7 @@ package process
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
|
"runtime/trace"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
@ -126,6 +127,27 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT
|
||||||
return ctx, cancel, finished
|
return ctx, cancel, finished
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddTypedContextTimeout creates a new context and adds it as a process. Once the process is finished, finished must be called
|
||||||
|
// to remove the process from the process table. It should not be called until the process is finished but must always be called.
|
||||||
|
//
|
||||||
|
// cancel should be used to cancel the returned context, however it will not remove the process from the process table.
|
||||||
|
// finished will cancel the returned context and remove it from the process table.
|
||||||
|
//
|
||||||
|
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||||
|
// process table.
|
||||||
|
func (pm *Manager) AddTypedContextTimeout(parent context.Context, timeout time.Duration, description, processType string, currentlyRunning bool) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
||||||
|
if timeout <= 0 {
|
||||||
|
// it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct
|
||||||
|
panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately")
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel = context.WithTimeout(parent, timeout)
|
||||||
|
|
||||||
|
ctx, _, finished = pm.Add(ctx, description, cancel, processType, currentlyRunning)
|
||||||
|
|
||||||
|
return ctx, cancel, finished
|
||||||
|
}
|
||||||
|
|
||||||
// AddContextTimeout creates a new context and add it as a process. Once the process is finished, finished must be called
|
// AddContextTimeout creates a new context and add it as a process. Once the process is finished, finished must be called
|
||||||
// to remove the process from the process table. It should not be called until the process is finished but must always be called.
|
// to remove the process from the process table. It should not be called until the process is finished but must always be called.
|
||||||
//
|
//
|
||||||
|
@ -135,16 +157,7 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT
|
||||||
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||||
// process table.
|
// process table.
|
||||||
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
||||||
if timeout <= 0 {
|
return pm.AddTypedContextTimeout(parent, timeout, description, NormalProcessType, true)
|
||||||
// it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct
|
|
||||||
panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately")
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx, cancel = context.WithTimeout(parent, timeout)
|
|
||||||
|
|
||||||
ctx, _, finished = pm.Add(ctx, description, cancel, NormalProcessType, true)
|
|
||||||
|
|
||||||
return ctx, cancel, finished
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add create a new process
|
// Add create a new process
|
||||||
|
@ -159,6 +172,8 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C
|
||||||
parentPID = ""
|
parentPID = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, traceTask := trace.NewTask(ctx, processType)
|
||||||
|
|
||||||
process := &process{
|
process := &process{
|
||||||
PID: pid,
|
PID: pid,
|
||||||
ParentPID: parentPID,
|
ParentPID: parentPID,
|
||||||
|
@ -166,6 +181,7 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C
|
||||||
Start: start,
|
Start: start,
|
||||||
Cancel: cancel,
|
Cancel: cancel,
|
||||||
Type: processType,
|
Type: processType,
|
||||||
|
TraceTrask: traceTask,
|
||||||
}
|
}
|
||||||
|
|
||||||
var finished FinishedFunc
|
var finished FinishedFunc
|
||||||
|
@ -218,6 +234,7 @@ func (pm *Manager) nextPID() (start time.Time, pid IDType) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *Manager) remove(process *process) {
|
func (pm *Manager) remove(process *process) {
|
||||||
|
process.TraceTrask.End()
|
||||||
deleted := false
|
deleted := false
|
||||||
|
|
||||||
pm.mutex.Lock()
|
pm.mutex.Lock()
|
||||||
|
|
|
@ -5,12 +5,14 @@ package process
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"runtime/trace"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
SystemProcessType = "system"
|
SystemProcessType = "system"
|
||||||
RequestProcessType = "request"
|
RequestProcessType = "request"
|
||||||
|
GitProcessType = "git"
|
||||||
NormalProcessType = "normal"
|
NormalProcessType = "normal"
|
||||||
NoneProcessType = "none"
|
NoneProcessType = "none"
|
||||||
)
|
)
|
||||||
|
@ -23,6 +25,7 @@ type process struct {
|
||||||
Start time.Time
|
Start time.Time
|
||||||
Cancel context.CancelFunc
|
Cancel context.CancelFunc
|
||||||
Type string
|
Type string
|
||||||
|
TraceTrask *trace.Task
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToProcess converts a process to a externally usable Process
|
// ToProcess converts a process to a externally usable Process
|
||||||
|
|
|
@ -6,6 +6,7 @@ package common
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"runtime/trace"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/cache"
|
"code.gitea.io/gitea/modules/cache"
|
||||||
|
@ -43,6 +44,8 @@ func ProtocolMiddlewares() (handlers []any) {
|
||||||
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
|
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
|
||||||
ctx, _, finished := process.GetManager().AddTypedContext(req.Context(), fmt.Sprintf("%s: %s", req.Method, req.RequestURI), process.RequestProcessType, true)
|
ctx, _, finished := process.GetManager().AddTypedContext(req.Context(), fmt.Sprintf("%s: %s", req.Method, req.RequestURI), process.RequestProcessType, true)
|
||||||
defer finished()
|
defer finished()
|
||||||
|
trace.Log(ctx, "method", req.Method)
|
||||||
|
trace.Log(ctx, "url", req.RequestURI)
|
||||||
next.ServeHTTP(context.WrapResponseWriter(resp), req.WithContext(cache.WithCacheContext(ctx)))
|
next.ServeHTTP(context.WrapResponseWriter(resp), req.WithContext(cache.WithCacheContext(ctx)))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"archive/zip"
|
"archive/zip"
|
||||||
"fmt"
|
"fmt"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
|
"runtime/trace"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/httplib"
|
"code.gitea.io/gitea/modules/httplib"
|
||||||
|
@ -15,17 +16,12 @@ import (
|
||||||
|
|
||||||
func MonitorDiagnosis(ctx *context.Context) {
|
func MonitorDiagnosis(ctx *context.Context) {
|
||||||
seconds := ctx.FormInt64("seconds")
|
seconds := ctx.FormInt64("seconds")
|
||||||
if seconds <= 5 {
|
seconds = max(5, min(300, seconds))
|
||||||
seconds = 5
|
|
||||||
}
|
|
||||||
if seconds > 300 {
|
|
||||||
seconds = 300
|
|
||||||
}
|
|
||||||
|
|
||||||
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{
|
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{
|
||||||
ContentType: "application/zip",
|
ContentType: "application/zip",
|
||||||
Disposition: "attachment",
|
Disposition: "attachment",
|
||||||
Filename: fmt.Sprintf("gitea-diagnosis-%s.zip", time.Now().Format("20060102-150405")),
|
Filename: fmt.Sprintf("forgejo-diagnosis-%s.zip", time.Now().Format("20060102-150405")),
|
||||||
})
|
})
|
||||||
|
|
||||||
zipWriter := zip.NewWriter(ctx.Resp)
|
zipWriter := zip.NewWriter(ctx.Resp)
|
||||||
|
@ -44,14 +40,27 @@ func MonitorDiagnosis(ctx *context.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = pprof.StartCPUProfile(f)
|
if err := pprof.StartCPUProfile(f); err != nil {
|
||||||
if err == nil {
|
|
||||||
time.Sleep(time.Duration(seconds) * time.Second)
|
|
||||||
pprof.StopCPUProfile()
|
|
||||||
} else {
|
|
||||||
_, _ = f.Write([]byte(err.Error()))
|
_, _ = f.Write([]byte(err.Error()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "trace.dat", Method: zip.Deflate, Modified: time.Now()})
|
||||||
|
if err != nil {
|
||||||
|
ctx.ServerError("Failed to create zip file", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := trace.Start(f); err != nil {
|
||||||
|
_, _ = f.Write([]byte(err.Error()))
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-time.After(time.Duration(seconds) * time.Second):
|
||||||
|
case <-ctx.Done():
|
||||||
|
}
|
||||||
|
pprof.StopCPUProfile()
|
||||||
|
trace.Stop()
|
||||||
|
|
||||||
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-after.txt", Method: zip.Deflate, Modified: time.Now()})
|
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-after.txt", Method: zip.Deflate, Modified: time.Now()})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.ServerError("Failed to create zip file", err)
|
ctx.ServerError("Failed to create zip file", err)
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
{{svg "octicon-cpu" 16}}
|
{{svg "octicon-cpu" 16}}
|
||||||
{{else if eq .Process.Type "normal"}}
|
{{else if eq .Process.Type "normal"}}
|
||||||
{{svg "octicon-terminal" 16}}
|
{{svg "octicon-terminal" 16}}
|
||||||
|
{{else if eq .Process.Type "git"}}
|
||||||
|
{{svg "octicon-git-branch" 16}}
|
||||||
{{else}}
|
{{else}}
|
||||||
{{svg "octicon-code" 16}}
|
{{svg "octicon-code" 16}}
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
Loading…
Reference in a new issue