Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ require (
k8s.io/component-base v0.35.0 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect
sigs.k8s.io/gateway-api v1.4.0 // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
Expand Down
24 changes: 24 additions & 0 deletions internal/controller/onboarding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/clock"
ctrl "sigs.k8s.io/controller-runtime"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
logger "sigs.k8s.io/controller-runtime/pkg/log"
Expand All @@ -51,6 +52,7 @@ var errRequeue = errors.New("requeue requested")

const (
defaultWaitTime = 1 * time.Minute
smokeTestTimeout = 5 * time.Minute
testProjectName = "test"
testDomainName = "cc3test"
testImageName = "cirros-kvm"
Expand All @@ -63,6 +65,7 @@ type OnboardingController struct {
k8sclient.Client
Scheme *runtime.Scheme
TestFlavorID string
Clock clock.Clock
computeClient *gophercloud.ServiceClient
testComputeClient *gophercloud.ServiceClient
testImageClient *gophercloud.ServiceClient
Expand Down Expand Up @@ -286,6 +289,23 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis
}

if !strings.Contains(consoleOutput, server.Name) {
if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Timeout threshold is exclusive at Line 292.

Using After(...) means the timeout does not fire exactly at launched_at + 5m; it fires on the next reconcile. Make the check inclusive.

⏱️ Proposed fix
-			if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) {
+			deadline := server.LaunchedAt.Add(smokeTestTimeout)
+			if !server.LaunchedAt.IsZero() && !r.Clock.Now().Before(deadline) {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) {
deadline := server.LaunchedAt.Add(smokeTestTimeout)
if !server.LaunchedAt.IsZero() && !r.Clock.Now().Before(deadline) {
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controller/onboarding_controller.go` at line 292, The timeout check
at the line using r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout))
is exclusive and should be inclusive; change the condition to test inclusive
equality by replacing the After(...) check with a non-Before check (e.g.,
!r.Clock.Now().Before(server.LaunchedAt.Add(smokeTestTimeout))) or explicitly
include r.Clock.Now().Equal(...). Update the condition that references
server.LaunchedAt, r.Clock.Now(), and smokeTestTimeout in the reconcile logic so
the timeout fires exactly at launched_at + smokeTestTimeout rather than only
after it.

base := hv.DeepCopy()
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeOnboarding,
Status: metav1.ConditionTrue,
Reason: kvmv1.ConditionReasonTesting,
Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt),
})
if err := r.patchStatus(ctx, hv, base); err != nil {
return ctrl.Result{}, err
}
if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil {
if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
return ctrl.Result{}, fmt.Errorf("failed to delete timed out test instance %v: %w", server.ID, err)
}
}
}
return ctrl.Result{RequeueAfter: defaultWaitTime}, nil
}

Expand Down Expand Up @@ -613,6 +633,10 @@ func (r *OnboardingController) SetupWithManager(mgr ctrl.Manager) error {
}
r.testNetworkClient.ResourceBase = fmt.Sprintf("%vv2.0/", r.testNetworkClient.Endpoint)

if r.Clock == nil {
r.Clock = clock.RealClock{}
}

return ctrl.NewControllerManagedBy(mgr).
Named(OnboardingControllerName).
For(&kvmv1.Hypervisor{}).
Expand Down
104 changes: 99 additions & 5 deletions internal/controller/onboarding_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"net/http"
"os"
"time"

"github.com/gophercloud/gophercloud/v2/testhelper"
"github.com/gophercloud/gophercloud/v2/testhelper/client"
Expand All @@ -30,6 +31,8 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/clock"
clocktesting "k8s.io/utils/clock/testing"
ctrl "sigs.k8s.io/controller-runtime"

kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
Expand Down Expand Up @@ -221,6 +224,7 @@ var _ = Describe("Onboarding Controller", func() {
Client: k8sClient,
Scheme: k8sClient.Scheme(),
TestFlavorID: "1",
Clock: clock.RealClock{},
computeClient: client.ServiceClient(fakeServer),
testComputeClient: client.ServiceClient(fakeServer),
testImageClient: client.ServiceClient(fakeServer),
Expand Down Expand Up @@ -340,6 +344,12 @@ var _ = Describe("Onboarding Controller", func() {
})

Context("running tests after initial setup", func() {
var (
serverActionHandler func(http.ResponseWriter, *http.Request)
serverDeleteHandler func(http.ResponseWriter, *http.Request)
serverDetailHandler func(http.ResponseWriter, *http.Request)
)

BeforeEach(func(ctx SpecContext) {
hv := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed())
Expand Down Expand Up @@ -371,11 +381,14 @@ var _ = Describe("Onboarding Controller", func() {
Expect(err).NotTo(HaveOccurred())
})

serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) {
_, err := fmt.Fprint(w, emptyServersBody)
Expect(err).NotTo(HaveOccurred())
}
fakeServer.Mux.HandleFunc("GET /servers/detail", func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := fmt.Fprint(w, emptyServersBody)
Expect(err).NotTo(HaveOccurred())
serverDetailHandler(w, r)
})

fakeServer.Mux.HandleFunc("POST /instance-ha", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -406,15 +419,21 @@ var _ = Describe("Onboarding Controller", func() {
Expect(err).NotTo(HaveOccurred())
})

fakeServer.Mux.HandleFunc("POST /servers/server-id/action", func(w http.ResponseWriter, r *http.Request) {
serverActionHandler = func(w http.ResponseWriter, _ *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := fmt.Fprintf(w, `{"output": "FAKE CONSOLE OUTPUT\nANOTHER\nLAST LINE\nohooc--%v-%v\n"}`, hv.Name, hv.UID)
Expect(err).NotTo(HaveOccurred())

}
fakeServer.Mux.HandleFunc("POST /servers/server-id/action", func(w http.ResponseWriter, r *http.Request) {
serverActionHandler(w, r)
})
fakeServer.Mux.HandleFunc("DELETE /servers/server-id", func(w http.ResponseWriter, r *http.Request) {

serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusAccepted)
}
fakeServer.Mux.HandleFunc("DELETE /servers/server-id", func(w http.ResponseWriter, r *http.Request) {
serverDeleteHandler(w, r)
})
})

Expand Down Expand Up @@ -571,5 +590,80 @@ var _ = Describe("Onboarding Controller", func() {
})
})

When("smoke test times out waiting for console output", func() {
var serverDeletedCalled bool

BeforeEach(func(ctx SpecContext) {
By("Overriding HV status to Testing state")
hv := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed())
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeOnboarding,
Status: metav1.ConditionTrue,
Reason: kvmv1.ConditionReasonTesting,
})
Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed())

// Construct the server name the controller will look for.
serverName := fmt.Sprintf("%s-%s-%s", testPrefixName, hypervisorName, string(hv.UID))
detailCallCount := 0

// On the first reconcile GET /servers/detail returns empty so the controller
// creates the server via POST (no launched_at → timeout cannot fire yet).
// On the second reconcile GET /servers/detail returns the ACTIVE server with a
// stale launched_at so createOrGetTestServer takes the "already-running" path and
// smokeTest fires the timeout.
serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) {
if detailCallCount == 0 {
detailCallCount++
_, err := fmt.Fprint(w, emptyServersBody)
Expect(err).NotTo(HaveOccurred())
} else {
_, err := fmt.Fprintf(w,
`{"servers": [{"id": "server-id", "name": %q, "status": "ACTIVE", "OS-SRV-USG:launched_at": "2025-01-01T12:00:00.000000"}], "servers_links": []}`,
serverName)
Expect(err).NotTo(HaveOccurred())
}
}

// Set the clock to 6 minutes after the launched_at above (past the 5-minute deadline).
onboardingReconciler.Clock = clocktesting.NewFakeClock(time.Date(2025, 1, 1, 12, 6, 0, 0, time.UTC))
serverDeletedCalled = false

// Console output that does NOT contain the server name, so the timeout path is exercised.
serverActionHandler = func(w http.ResponseWriter, _ *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := fmt.Fprint(w, `{"output": "some unrelated console output\n"}`)
Expect(err).NotTo(HaveOccurred())
}
serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) {
serverDeletedCalled = true
w.WriteHeader(http.StatusAccepted)
}
})

It("should delete the stalled server and record a timeout in the status", func(ctx SpecContext) {
By("First reconcile: controller creates the ACTIVE server; launched_at is absent so timeout does not fire yet")
_, err := onboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

By("Second reconcile: GET /servers/detail returns the stale server; timeout fires and the server is deleted")
_, err = onboardingReconciler.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

By("Verifying the timed-out server was deleted")
Expect(serverDeletedCalled).To(BeTrue())

By("Verifying the onboarding condition message indicates a timeout")
hv := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed())
onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding)
Expect(onboardingCond).NotTo(BeNil())
Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting))
Expect(onboardingCond.Message).To(ContainSubstring("timeout"))
})
})

})
})
Loading