From 3f44b97b5f39ad3e8dd923528e8acd28a6f073b3 Mon Sep 17 00:00:00 2001 From: Gusted Date: Thu, 2 Jan 2025 03:43:58 +0100 Subject: [PATCH] feat: add limited execution tracing support - For every process that is spawned (every new non-trivial goroutine such as http requests, queues or tasks) start a [execution tracer](https://pkg.go.dev/runtime/trace). This allows very precise diagnosis of how each individual process over a time period. - It's safe and [fast](https://go.dev/blog/execution-traces-2024#low-overhead-tracing) to be run in production, hence no setting to disable this. There's only noticable overhead when tracing is actually performed and not continuous. - Proper tracing support would mean the codebase would be full of `trace.WithRegion` and `trace.Log`, which feels premature for this patch as there's no real-world usage yet to indicate which places would need this the most. So far only Git commands and SQL queries receive somewhat proper tracing support given that these are used throughout the codebase. - Make git commands a new process type. - Add tracing to diagnosis zip file. --- models/db/engine.go | 22 +++++++++++++++++ modules/git/command.go | 6 +++-- modules/process/manager.go | 37 +++++++++++++++++++++-------- modules/process/process.go | 3 +++ routers/common/middleware.go | 3 +++ routers/web/admin/diagnosis.go | 33 +++++++++++++++---------- templates/admin/stacktrace-row.tmpl | 2 ++ 7 files changed, 82 insertions(+), 24 deletions(-) diff --git a/models/db/engine.go b/models/db/engine.go index 61649592e7..822618a7e3 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "reflect" + "runtime/trace" "strings" "time" @@ -163,6 +164,8 @@ func InitEngine(ctx context.Context) error { Logger: errorLogger, }) + xormEngine.AddHook(&TracingHook{}) + SetDefaultEngine(ctx, xormEngine) return nil } @@ -318,6 +321,25 @@ func SetLogSQL(ctx context.Context, on bool) { } } +type TracingHook struct{} + +var _ contexts.Hook = &TracingHook{} + +type sqlTask struct{} + +func (TracingHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) { + ctx, task := trace.NewTask(c.Ctx, "sql") + ctx = context.WithValue(ctx, sqlTask{}, task) + trace.Log(ctx, "query", c.SQL) + trace.Logf(ctx, "args", "%v", c.Args) + return ctx, nil +} + +func (TracingHook) AfterProcess(c *contexts.ContextHook) error { + c.Ctx.Value(sqlTask{}).(*trace.Task).End() + return nil +} + type SlowQueryHook struct { Treshold time.Duration Logger log.Logger diff --git a/modules/git/command.go b/modules/git/command.go index a3d43aaec6..605816b7a2 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -13,6 +13,7 @@ import ( "os" "os/exec" "runtime" + "runtime/trace" "strings" "time" @@ -317,12 +318,13 @@ func (c *Command) Run(opts *RunOpts) error { var finished context.CancelFunc if opts.UseContextTimeout { - ctx, cancel, finished = process.GetManager().AddContext(c.parentContext, desc) + ctx, cancel, finished = process.GetManager().AddTypedContext(c.parentContext, desc, process.GitProcessType, true) } else { - ctx, cancel, finished = process.GetManager().AddContextTimeout(c.parentContext, timeout, desc) + ctx, cancel, finished = process.GetManager().AddTypedContextTimeout(c.parentContext, timeout, desc, process.GitProcessType, true) } defer finished() + trace.Log(ctx, "command", desc) startTime := time.Now() cmd := exec.CommandContext(ctx, c.prog, c.args...) diff --git a/modules/process/manager.go b/modules/process/manager.go index 37098ad92f..062ee1482f 100644 --- a/modules/process/manager.go +++ b/modules/process/manager.go @@ -7,6 +7,7 @@ package process import ( "context" "runtime/pprof" + "runtime/trace" "strconv" "sync" "sync/atomic" @@ -126,6 +127,27 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT return ctx, cancel, finished } +// AddTypedContextTimeout creates a new context and adds it as a process. Once the process is finished, finished must be called +// to remove the process from the process table. It should not be called until the process is finished but must always be called. +// +// cancel should be used to cancel the returned context, however it will not remove the process from the process table. +// finished will cancel the returned context and remove it from the process table. +// +// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the +// process table. +func (pm *Manager) AddTypedContextTimeout(parent context.Context, timeout time.Duration, description, processType string, currentlyRunning bool) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) { + if timeout <= 0 { + // it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct + panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately") + } + + ctx, cancel = context.WithTimeout(parent, timeout) + + ctx, _, finished = pm.Add(ctx, description, cancel, processType, currentlyRunning) + + return ctx, cancel, finished +} + // AddContextTimeout creates a new context and add it as a process. Once the process is finished, finished must be called // to remove the process from the process table. It should not be called until the process is finished but must always be called. // @@ -135,16 +157,7 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT // Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the // process table. func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) { - if timeout <= 0 { - // it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct - panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately") - } - - ctx, cancel = context.WithTimeout(parent, timeout) - - ctx, _, finished = pm.Add(ctx, description, cancel, NormalProcessType, true) - - return ctx, cancel, finished + return pm.AddTypedContextTimeout(parent, timeout, description, NormalProcessType, true) } // Add create a new process @@ -159,6 +172,8 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C parentPID = "" } + ctx, traceTask := trace.NewTask(ctx, processType) + process := &process{ PID: pid, ParentPID: parentPID, @@ -166,6 +181,7 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C Start: start, Cancel: cancel, Type: processType, + TraceTrask: traceTask, } var finished FinishedFunc @@ -218,6 +234,7 @@ func (pm *Manager) nextPID() (start time.Time, pid IDType) { } func (pm *Manager) remove(process *process) { + process.TraceTrask.End() deleted := false pm.mutex.Lock() diff --git a/modules/process/process.go b/modules/process/process.go index 06a28c4a60..8947eb252f 100644 --- a/modules/process/process.go +++ b/modules/process/process.go @@ -5,12 +5,14 @@ package process import ( "context" + "runtime/trace" "time" ) var ( SystemProcessType = "system" RequestProcessType = "request" + GitProcessType = "git" NormalProcessType = "normal" NoneProcessType = "none" ) @@ -23,6 +25,7 @@ type process struct { Start time.Time Cancel context.CancelFunc Type string + TraceTrask *trace.Task } // ToProcess converts a process to a externally usable Process diff --git a/routers/common/middleware.go b/routers/common/middleware.go index 59e59b8d3f..ebc4d62d03 100644 --- a/routers/common/middleware.go +++ b/routers/common/middleware.go @@ -6,6 +6,7 @@ package common import ( "fmt" "net/http" + "runtime/trace" "strings" "code.gitea.io/gitea/modules/cache" @@ -43,6 +44,8 @@ func ProtocolMiddlewares() (handlers []any) { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { ctx, _, finished := process.GetManager().AddTypedContext(req.Context(), fmt.Sprintf("%s: %s", req.Method, req.RequestURI), process.RequestProcessType, true) defer finished() + trace.Log(ctx, "method", req.Method) + trace.Log(ctx, "url", req.RequestURI) next.ServeHTTP(context.WrapResponseWriter(resp), req.WithContext(cache.WithCacheContext(ctx))) }) }) diff --git a/routers/web/admin/diagnosis.go b/routers/web/admin/diagnosis.go index 020554a35a..1c12259b06 100644 --- a/routers/web/admin/diagnosis.go +++ b/routers/web/admin/diagnosis.go @@ -7,6 +7,7 @@ import ( "archive/zip" "fmt" "runtime/pprof" + "runtime/trace" "time" "code.gitea.io/gitea/modules/httplib" @@ -15,17 +16,12 @@ import ( func MonitorDiagnosis(ctx *context.Context) { seconds := ctx.FormInt64("seconds") - if seconds <= 5 { - seconds = 5 - } - if seconds > 300 { - seconds = 300 - } + seconds = max(5, min(300, seconds)) httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{ ContentType: "application/zip", Disposition: "attachment", - Filename: fmt.Sprintf("gitea-diagnosis-%s.zip", time.Now().Format("20060102-150405")), + Filename: fmt.Sprintf("forgejo-diagnosis-%s.zip", time.Now().Format("20060102-150405")), }) zipWriter := zip.NewWriter(ctx.Resp) @@ -44,14 +40,27 @@ func MonitorDiagnosis(ctx *context.Context) { return } - err = pprof.StartCPUProfile(f) - if err == nil { - time.Sleep(time.Duration(seconds) * time.Second) - pprof.StopCPUProfile() - } else { + if err := pprof.StartCPUProfile(f); err != nil { _, _ = f.Write([]byte(err.Error())) } + f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "trace.dat", Method: zip.Deflate, Modified: time.Now()}) + if err != nil { + ctx.ServerError("Failed to create zip file", err) + return + } + + if err := trace.Start(f); err != nil { + _, _ = f.Write([]byte(err.Error())) + } + + select { + case <-time.After(time.Duration(seconds) * time.Second): + case <-ctx.Done(): + } + pprof.StopCPUProfile() + trace.Stop() + f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-after.txt", Method: zip.Deflate, Modified: time.Now()}) if err != nil { ctx.ServerError("Failed to create zip file", err) diff --git a/templates/admin/stacktrace-row.tmpl b/templates/admin/stacktrace-row.tmpl index 97c361ff90..048056cf4e 100644 --- a/templates/admin/stacktrace-row.tmpl +++ b/templates/admin/stacktrace-row.tmpl @@ -7,6 +7,8 @@ {{svg "octicon-cpu" 16}} {{else if eq .Process.Type "normal"}} {{svg "octicon-terminal" 16}} + {{else if eq .Process.Type "git"}} + {{svg "octicon-git-branch" 16}} {{else}} {{svg "octicon-code" 16}} {{end}}