1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
|
//go:build !remote
package libpod
import (
"context"
"errors"
"fmt"
"github.com/containers/podman/v5/libpod/define"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// A service consists of one or more pods. The service container is started
// before all pods and is stopped when the last pod stops. The service
// container allows for tracking and managing the entire life cycle of service
// which may be started via `podman-play-kube`.
type Service struct {
// Pods running as part of the service.
Pods []string `json:"servicePods"`
}
// Indicates whether the pod is associated with a service container.
// The pod is expected to be updated and locked.
func (p *Pod) hasServiceContainer() bool {
return p.config.ServiceContainerID != ""
}
// Returns the pod's service container.
// The pod is expected to be updated and locked.
func (p *Pod) serviceContainer() (*Container, error) {
id := p.config.ServiceContainerID
if id == "" {
return nil, fmt.Errorf("pod has no service container: %w", define.ErrNoSuchCtr)
}
return p.runtime.state.Container(id)
}
// ServiceContainer returns the service container.
func (p *Pod) ServiceContainer() (*Container, error) {
p.lock.Lock()
defer p.lock.Unlock()
if err := p.updatePod(); err != nil {
return nil, err
}
return p.serviceContainer()
}
func (c *Container) addServicePodLocked(id string) error {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return err
}
c.state.Service.Pods = append(c.state.Service.Pods, id)
return c.save()
}
// IsService returns true when the container is a "service container".
func (c *Container) IsService() bool {
return c.config.IsService
}
// serviceContainerReport bundles information when checking whether a service
// container can be stopped.
type serviceContainerReport struct {
// Indicates whether the service container can be stopped or not.
canBeStopped bool
// Number of all known containers below the service container.
numContainers int
// Number of containers below the service containers that exited
// non-zero.
failedContainers int
}
// canStopServiceContainerLocked returns true if all pods of the service are stopped.
// Note that the method acquires the container lock.
func (c *Container) canStopServiceContainerLocked() (*serviceContainerReport, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return nil, err
}
if !c.IsService() {
return nil, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
return c.canStopServiceContainer()
}
// canStopServiceContainer returns true if all pods of the service are stopped.
// Note that the method expects the container to be locked.
func (c *Container) canStopServiceContainer() (*serviceContainerReport, error) {
report := serviceContainerReport{canBeStopped: true}
for _, id := range c.state.Service.Pods {
pod, err := c.runtime.LookupPod(id)
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return nil, err
}
status, err := pod.GetPodStatus()
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return nil, err
}
switch status {
case define.PodStateStopped, define.PodStateExited, define.PodStateErrored:
podCtrs, err := c.runtime.state.PodContainers(pod)
if err != nil {
return nil, err
}
for _, pc := range podCtrs {
if pc.IsInfra() {
continue // ignore infra containers
}
exitCode, err := c.runtime.state.GetContainerExitCode(pc.ID())
if err != nil {
return nil, err
}
if exitCode != 0 {
report.failedContainers++
}
report.numContainers++
}
default:
// Service container cannot be stopped, so we can
// return early.
report.canBeStopped = false
return &report, nil
}
}
return &report, nil
}
// Checks whether the service container can be stopped and does so.
func (p *Pod) maybeStopServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be stopped", p.ID(), serviceCtr.ID())
report, err := serviceCtr.canStopServiceContainerLocked()
if err != nil {
logrus.Errorf("Checking whether service of container %s can be stopped: %v", serviceCtr.ID(), err)
return
}
if !report.canBeStopped {
return
}
// Now either kill or stop the service container, depending on the configured exit policy.
stop := func() {
// Note that the service container runs catatonit which
// will exit gracefully on SIGINT.
logrus.Debugf("Stopping service container %s", serviceCtr.ID())
if err := serviceCtr.Kill(uint(unix.SIGINT)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) {
logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err)
}
}
kill := func() {
logrus.Debugf("Killing service container %s", serviceCtr.ID())
if err := serviceCtr.Kill(uint(unix.SIGKILL)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) {
logrus.Debugf("Error killing service container %s: %v", serviceCtr.ID(), err)
}
}
switch serviceCtr.config.KubeExitCodePropagation {
case define.KubeExitCodePropagationNone:
stop()
case define.KubeExitCodePropagationAny:
if report.failedContainers > 0 {
kill()
} else {
stop()
}
case define.KubeExitCodePropagationAll:
if report.failedContainers == report.numContainers {
kill()
} else {
stop()
}
default:
logrus.Errorf("Internal error: cannot stop service container %s: unknown exit policy %q", serviceCtr.ID(), serviceCtr.config.KubeExitCodePropagation.String())
}
})
return nil
}
// Starts the pod's service container if it's not already running.
func (p *Pod) maybeStartServiceContainer(ctx context.Context) error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
return fmt.Errorf("getting pod's service container: %w", err)
}
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return err
}
if serviceCtr.state.State == define.ContainerStateRunning {
return nil
}
// Restart will reinit among other things.
return serviceCtr.restartWithTimeout(ctx, 0)
}
// canRemoveServiceContainer returns true if all pods of the service are removed.
// Note that the method acquires the container lock.
func (c *Container) canRemoveServiceContainer() (bool, error) {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
for _, id := range c.state.Service.Pods {
if _, err := c.runtime.LookupPod(id); err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return false, err
}
return false, nil
}
return true, nil
}
// Checks whether the service container can be removed and does so.
// It also unlinks the pod from the service container.
func (p *Pod) maybeRemoveServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be removed", p.ID(), serviceCtr.ID())
canRemove, err := func() (bool, error) { // Anonymous func for easy locking
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return false, err
}
// Unlink the pod from the service container.
servicePods := make([]string, 0, len(serviceCtr.state.Service.Pods)-1)
for _, id := range serviceCtr.state.Service.Pods {
if id != p.ID() {
servicePods = append(servicePods, id)
}
}
serviceCtr.state.Service.Pods = servicePods
if err := serviceCtr.save(); err != nil {
return false, err
}
return serviceCtr.canRemoveServiceContainer()
}()
if err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Checking whether service container %s can be removed: %v", serviceCtr.ID(), err)
}
return
}
if !canRemove {
return
}
logrus.Debugf("Removing service container %s", serviceCtr.ID())
if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, nil); err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Removing service container %s: %v", serviceCtr.ID(), err)
}
}
})
return nil
}
|