mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-25 14:53:15 +01:00
feat: improve Forgejo diagnostics (#6470)
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6470 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Reviewed-by: Otto <otto@codeberg.org>
This commit is contained in:
commit
5bfae7241c
11 changed files with 194 additions and 40 deletions
|
@ -11,6 +11,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"runtime/trace"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -163,6 +164,8 @@ func InitEngine(ctx context.Context) error {
|
|||
Logger: errorLogger,
|
||||
})
|
||||
|
||||
xormEngine.AddHook(&TracingHook{})
|
||||
|
||||
SetDefaultEngine(ctx, xormEngine)
|
||||
return nil
|
||||
}
|
||||
|
@ -318,6 +321,25 @@ func SetLogSQL(ctx context.Context, on bool) {
|
|||
}
|
||||
}
|
||||
|
||||
type TracingHook struct{}
|
||||
|
||||
var _ contexts.Hook = &TracingHook{}
|
||||
|
||||
type sqlTask struct{}
|
||||
|
||||
func (TracingHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) {
|
||||
ctx, task := trace.NewTask(c.Ctx, "sql")
|
||||
ctx = context.WithValue(ctx, sqlTask{}, task)
|
||||
trace.Log(ctx, "query", c.SQL)
|
||||
trace.Logf(ctx, "args", "%v", c.Args)
|
||||
return ctx, nil
|
||||
}
|
||||
|
||||
func (TracingHook) AfterProcess(c *contexts.ContextHook) error {
|
||||
c.Ctx.Value(sqlTask{}).(*trace.Task).End()
|
||||
return nil
|
||||
}
|
||||
|
||||
type SlowQueryHook struct {
|
||||
Treshold time.Duration
|
||||
Logger log.Logger
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"runtime/trace"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -317,12 +318,13 @@ func (c *Command) Run(opts *RunOpts) error {
|
|||
var finished context.CancelFunc
|
||||
|
||||
if opts.UseContextTimeout {
|
||||
ctx, cancel, finished = process.GetManager().AddContext(c.parentContext, desc)
|
||||
ctx, cancel, finished = process.GetManager().AddTypedContext(c.parentContext, desc, process.GitProcessType, true)
|
||||
} else {
|
||||
ctx, cancel, finished = process.GetManager().AddContextTimeout(c.parentContext, timeout, desc)
|
||||
ctx, cancel, finished = process.GetManager().AddTypedContextTimeout(c.parentContext, timeout, desc, process.GitProcessType, true)
|
||||
}
|
||||
defer finished()
|
||||
|
||||
trace.Log(ctx, "command", desc)
|
||||
startTime := time.Now()
|
||||
|
||||
cmd := exec.CommandContext(ctx, c.prog, c.args...)
|
||||
|
|
|
@ -7,6 +7,7 @@ package process
|
|||
import (
|
||||
"context"
|
||||
"runtime/pprof"
|
||||
"runtime/trace"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
@ -126,6 +127,27 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT
|
|||
return ctx, cancel, finished
|
||||
}
|
||||
|
||||
// AddTypedContextTimeout creates a new context and adds it as a process. Once the process is finished, finished must be called
|
||||
// to remove the process from the process table. It should not be called until the process is finished but must always be called.
|
||||
//
|
||||
// cancel should be used to cancel the returned context, however it will not remove the process from the process table.
|
||||
// finished will cancel the returned context and remove it from the process table.
|
||||
//
|
||||
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||
// process table.
|
||||
func (pm *Manager) AddTypedContextTimeout(parent context.Context, timeout time.Duration, description, processType string, currentlyRunning bool) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
||||
if timeout <= 0 {
|
||||
// it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct
|
||||
panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately")
|
||||
}
|
||||
|
||||
ctx, cancel = context.WithTimeout(parent, timeout)
|
||||
|
||||
ctx, _, finished = pm.Add(ctx, description, cancel, processType, currentlyRunning)
|
||||
|
||||
return ctx, cancel, finished
|
||||
}
|
||||
|
||||
// AddContextTimeout creates a new context and add it as a process. Once the process is finished, finished must be called
|
||||
// to remove the process from the process table. It should not be called until the process is finished but must always be called.
|
||||
//
|
||||
|
@ -135,16 +157,7 @@ func (pm *Manager) AddTypedContext(parent context.Context, description, processT
|
|||
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
|
||||
// process table.
|
||||
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
|
||||
if timeout <= 0 {
|
||||
// it's meaningless to use timeout <= 0, and it must be a bug! so we must panic here to tell developers to make the timeout correct
|
||||
panic("the timeout must be greater than zero, otherwise the context will be cancelled immediately")
|
||||
}
|
||||
|
||||
ctx, cancel = context.WithTimeout(parent, timeout)
|
||||
|
||||
ctx, _, finished = pm.Add(ctx, description, cancel, NormalProcessType, true)
|
||||
|
||||
return ctx, cancel, finished
|
||||
return pm.AddTypedContextTimeout(parent, timeout, description, NormalProcessType, true)
|
||||
}
|
||||
|
||||
// Add create a new process
|
||||
|
@ -159,6 +172,8 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C
|
|||
parentPID = ""
|
||||
}
|
||||
|
||||
ctx, traceTask := trace.NewTask(ctx, processType)
|
||||
|
||||
process := &process{
|
||||
PID: pid,
|
||||
ParentPID: parentPID,
|
||||
|
@ -166,6 +181,7 @@ func (pm *Manager) Add(ctx context.Context, description string, cancel context.C
|
|||
Start: start,
|
||||
Cancel: cancel,
|
||||
Type: processType,
|
||||
TraceTrask: traceTask,
|
||||
}
|
||||
|
||||
var finished FinishedFunc
|
||||
|
@ -218,6 +234,7 @@ func (pm *Manager) nextPID() (start time.Time, pid IDType) {
|
|||
}
|
||||
|
||||
func (pm *Manager) remove(process *process) {
|
||||
process.TraceTrask.End()
|
||||
deleted := false
|
||||
|
||||
pm.mutex.Lock()
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
package process
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"runtime/pprof"
|
||||
"sort"
|
||||
"time"
|
||||
|
@ -175,13 +175,12 @@ func (pm *Manager) ProcessStacktraces(flat, noSystem bool) ([]*Process, int, int
|
|||
// Now from within the lock we need to get the goroutines.
|
||||
// Why? If we release the lock then between between filling the above map and getting
|
||||
// the stacktraces another process could be created which would then look like a dead process below
|
||||
reader, writer := io.Pipe()
|
||||
defer reader.Close()
|
||||
go func() {
|
||||
err := pprof.Lookup("goroutine").WriteTo(writer, 0)
|
||||
_ = writer.CloseWithError(err)
|
||||
}()
|
||||
stacks, err = profile.Parse(reader)
|
||||
var buf bytes.Buffer
|
||||
if err := pprof.Lookup("goroutine").WriteTo(&buf, 0); err != nil {
|
||||
return nil, 0, 0, err
|
||||
}
|
||||
|
||||
stacks, err = profile.ParseData(buf.Bytes())
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
}
|
||||
|
|
93
modules/process/manager_stacktraces_test.go
Normal file
93
modules/process/manager_stacktraces_test.go
Normal file
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2025 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestProcessStacktraces(t *testing.T) {
|
||||
_, _, finish := GetManager().AddContext(context.Background(), "Normal process")
|
||||
defer finish()
|
||||
parentCtx, _, finish := GetManager().AddContext(context.Background(), "Children normal process")
|
||||
defer finish()
|
||||
_, _, finish = GetManager().AddContext(parentCtx, "Children process")
|
||||
defer finish()
|
||||
_, _, finish = GetManager().AddTypedContext(context.Background(), "System process", SystemProcessType, true)
|
||||
defer finish()
|
||||
|
||||
t.Run("No flat with no system process", func(t *testing.T) {
|
||||
processes, processCount, _, err := GetManager().ProcessStacktraces(false, true)
|
||||
require.NoError(t, err)
|
||||
assert.EqualValues(t, 4, processCount)
|
||||
assert.Len(t, processes, 2)
|
||||
|
||||
assert.EqualValues(t, "Children normal process", processes[0].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[0].Type)
|
||||
assert.Empty(t, processes[0].ParentPID)
|
||||
assert.Len(t, processes[0].Children, 1)
|
||||
|
||||
assert.EqualValues(t, "Children process", processes[0].Children[0].Description)
|
||||
assert.EqualValues(t, processes[0].PID, processes[0].Children[0].ParentPID)
|
||||
|
||||
assert.EqualValues(t, "Normal process", processes[1].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[1].Type)
|
||||
assert.Empty(t, processes[1].ParentPID)
|
||||
assert.Empty(t, processes[1].Children)
|
||||
})
|
||||
|
||||
t.Run("Flat with no system process", func(t *testing.T) {
|
||||
processes, processCount, _, err := GetManager().ProcessStacktraces(true, true)
|
||||
require.NoError(t, err)
|
||||
assert.EqualValues(t, 4, processCount)
|
||||
assert.Len(t, processes, 3)
|
||||
|
||||
assert.EqualValues(t, "Children process", processes[0].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[0].Type)
|
||||
assert.EqualValues(t, processes[1].PID, processes[0].ParentPID)
|
||||
assert.Empty(t, processes[0].Children)
|
||||
|
||||
assert.EqualValues(t, "Children normal process", processes[1].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[1].Type)
|
||||
assert.Empty(t, processes[1].ParentPID)
|
||||
assert.Empty(t, processes[1].Children)
|
||||
|
||||
assert.EqualValues(t, "Normal process", processes[2].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[2].Type)
|
||||
assert.Empty(t, processes[2].ParentPID)
|
||||
assert.Empty(t, processes[2].Children)
|
||||
})
|
||||
|
||||
t.Run("System process", func(t *testing.T) {
|
||||
processes, processCount, _, err := GetManager().ProcessStacktraces(false, false)
|
||||
require.NoError(t, err)
|
||||
assert.EqualValues(t, 4, processCount)
|
||||
assert.Len(t, processes, 4)
|
||||
|
||||
assert.EqualValues(t, "System process", processes[0].Description)
|
||||
assert.EqualValues(t, SystemProcessType, processes[0].Type)
|
||||
assert.Empty(t, processes[0].ParentPID)
|
||||
assert.Empty(t, processes[0].Children)
|
||||
|
||||
assert.EqualValues(t, "Children normal process", processes[1].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[1].Type)
|
||||
assert.Empty(t, processes[1].ParentPID)
|
||||
assert.Len(t, processes[1].Children, 1)
|
||||
|
||||
assert.EqualValues(t, "Normal process", processes[2].Description)
|
||||
assert.EqualValues(t, NormalProcessType, processes[2].Type)
|
||||
assert.Empty(t, processes[2].ParentPID)
|
||||
assert.Empty(t, processes[2].Children)
|
||||
|
||||
// This is the "main" pid, testing code always runs in a goroutine.
|
||||
assert.EqualValues(t, "(unassociated)", processes[3].Description)
|
||||
assert.EqualValues(t, NoneProcessType, processes[3].Type)
|
||||
assert.Empty(t, processes[3].ParentPID)
|
||||
assert.Empty(t, processes[3].Children)
|
||||
})
|
||||
}
|
|
@ -5,12 +5,14 @@ package process
|
|||
|
||||
import (
|
||||
"context"
|
||||
"runtime/trace"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
SystemProcessType = "system"
|
||||
RequestProcessType = "request"
|
||||
GitProcessType = "git"
|
||||
NormalProcessType = "normal"
|
||||
NoneProcessType = "none"
|
||||
)
|
||||
|
@ -23,6 +25,7 @@ type process struct {
|
|||
Start time.Time
|
||||
Cancel context.CancelFunc
|
||||
Type string
|
||||
TraceTrask *trace.Task
|
||||
}
|
||||
|
||||
// ToProcess converts a process to a externally usable Process
|
||||
|
|
|
@ -3464,6 +3464,7 @@ monitor.process = Running Processes
|
|||
monitor.stacktrace = Stacktrace
|
||||
monitor.processes_count = %d Processes
|
||||
monitor.download_diagnosis_report = Download diagnosis report
|
||||
monitor.duration = Duration (s)
|
||||
monitor.desc = Description
|
||||
monitor.start = Start Time
|
||||
monitor.execute_time = Execution Time
|
||||
|
@ -3921,6 +3922,4 @@ filepreview.lines = Lines %[1]d to %[2]d in %[3]s
|
|||
filepreview.truncated = Preview has been truncated
|
||||
|
||||
[translation_meta]
|
||||
test = This is a test string. It is not displayed in Forgejo UI but is used for testing purposes. Feel free to enter "ok" to save time (or a fun fact of your choice) to hit that sweet 100% completion mark :)
|
||||
|
||||
|
||||
test = This is a test string. It is not displayed in Forgejo UI but is used for testing purposes. Feel free to enter "ok" to save time (or a fun fact of your choice) to hit that sweet 100% completion mark :)
|
|
@ -6,6 +6,7 @@ package common
|
|||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"runtime/trace"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/cache"
|
||||
|
@ -43,6 +44,8 @@ func ProtocolMiddlewares() (handlers []any) {
|
|||
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
|
||||
ctx, _, finished := process.GetManager().AddTypedContext(req.Context(), fmt.Sprintf("%s: %s", req.Method, req.RequestURI), process.RequestProcessType, true)
|
||||
defer finished()
|
||||
trace.Log(ctx, "method", req.Method)
|
||||
trace.Log(ctx, "url", req.RequestURI)
|
||||
next.ServeHTTP(context.WrapResponseWriter(resp), req.WithContext(cache.WithCacheContext(ctx)))
|
||||
})
|
||||
})
|
||||
|
|
|
@ -6,7 +6,9 @@ package admin
|
|||
import (
|
||||
"archive/zip"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"runtime/pprof"
|
||||
"runtime/trace"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/httplib"
|
||||
|
@ -15,17 +17,12 @@ import (
|
|||
|
||||
func MonitorDiagnosis(ctx *context.Context) {
|
||||
seconds := ctx.FormInt64("seconds")
|
||||
if seconds <= 5 {
|
||||
seconds = 5
|
||||
}
|
||||
if seconds > 300 {
|
||||
seconds = 300
|
||||
}
|
||||
seconds = max(5, min(300, seconds))
|
||||
|
||||
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{
|
||||
ContentType: "application/zip",
|
||||
Disposition: "attachment",
|
||||
Filename: fmt.Sprintf("gitea-diagnosis-%s.zip", time.Now().Format("20060102-150405")),
|
||||
Filename: fmt.Sprintf("forgejo-diagnosis-%s.zip", time.Now().Format("20060102-150405")),
|
||||
})
|
||||
|
||||
zipWriter := zip.NewWriter(ctx.Resp)
|
||||
|
@ -44,14 +41,27 @@ func MonitorDiagnosis(ctx *context.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
err = pprof.StartCPUProfile(f)
|
||||
if err == nil {
|
||||
time.Sleep(time.Duration(seconds) * time.Second)
|
||||
pprof.StopCPUProfile()
|
||||
} else {
|
||||
if err := pprof.StartCPUProfile(f); err != nil {
|
||||
_, _ = f.Write([]byte(err.Error()))
|
||||
}
|
||||
|
||||
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "trace.dat", Method: zip.Deflate, Modified: time.Now()})
|
||||
if err != nil {
|
||||
ctx.ServerError("Failed to create zip file", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := trace.Start(f); err != nil {
|
||||
_, _ = f.Write([]byte(err.Error()))
|
||||
}
|
||||
|
||||
select {
|
||||
case <-time.After(time.Duration(seconds) * time.Second):
|
||||
case <-ctx.Done():
|
||||
}
|
||||
pprof.StopCPUProfile()
|
||||
trace.Stop()
|
||||
|
||||
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-after.txt", Method: zip.Deflate, Modified: time.Now()})
|
||||
if err != nil {
|
||||
ctx.ServerError("Failed to create zip file", err)
|
||||
|
@ -64,5 +74,8 @@ func MonitorDiagnosis(ctx *context.Context) {
|
|||
ctx.ServerError("Failed to create zip file", err)
|
||||
return
|
||||
}
|
||||
// To avoid showing memory that actually can be cleaned, run the garbage
|
||||
// collector.
|
||||
runtime.GC()
|
||||
_ = pprof.Lookup("heap").WriteTo(f, 0)
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
{{svg "octicon-cpu" 16}}
|
||||
{{else if eq .Process.Type "normal"}}
|
||||
{{svg "octicon-terminal" 16}}
|
||||
{{else if eq .Process.Type "git"}}
|
||||
{{svg "octicon-git-branch" 16}}
|
||||
{{else}}
|
||||
{{svg "octicon-code" 16}}
|
||||
{{end}}
|
||||
|
|
|
@ -8,11 +8,12 @@
|
|||
<a class="{{if eq .ShowGoroutineList "stacktrace"}}active {{end}}item" href="?show=stacktrace">{{ctx.Locale.Tr "admin.monitor.stacktrace"}}</a>
|
||||
</div>
|
||||
</div>
|
||||
<form target="_blank" action="{{AppSubUrl}}/admin/monitor/diagnosis" class="ui form">
|
||||
<div class="ui inline field">
|
||||
<button class="ui primary small button">{{ctx.Locale.Tr "admin.monitor.download_diagnosis_report"}}</button>
|
||||
<input name="seconds" size="3" maxlength="3" value="10"> {{ctx.Locale.Tr "tool.raw_seconds"}}
|
||||
</div>
|
||||
<form target="_blank" action="{{AppSubUrl}}/admin/monitor/diagnosis" class="ui form tw-flex tw-gap-3">
|
||||
<label class="tw-flex tw-gap-2 tw-items-center tw-whitespace-nowrap">
|
||||
{{ctx.Locale.Tr "admin.monitor.duration"}}
|
||||
<input type="number" name="seconds" max="300" min="0" value="10">
|
||||
</label>
|
||||
<button class="ui primary small button">{{ctx.Locale.Tr "admin.monitor.download_diagnosis_report"}}</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
|
Loading…
Reference in a new issue