Skip to content
Merged
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems in other parts of the SDK, e.g. parent close policy, we just expose the raw enum from proto. Should we do the same here? It has "unrecognized", automatically gets new values, etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't notice that, have changed to use raw proto

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved.
*
* Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Modifications copyright (C) 2017 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this material except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.temporal.failure;

/**
* Used to categorize application failures, for example, to distinguish benign errors from others.
*
* @see io.temporal.api.enums.v1.ApplicationErrorCategory
*/
public enum ApplicationErrorCategory {
UNSPECIFIED,
/** Expected application error with little/no severity. */
BENIGN,
;
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@
* <li>nonRetryable is set to false
* <li>details are set to null
* <li>stack trace is copied from the original exception
* <li>category is set to ApplicationErrorCategory.APPLICATION_ERROR_CATEGORY_UNSPECIFIED
* </ul>
*/
public final class ApplicationFailure extends TemporalFailure {
private final String type;
private final Values details;
private boolean nonRetryable;
private Duration nextRetryDelay;
private final ApplicationErrorCategory category;

/**
* New ApplicationFailure with {@link #isNonRetryable()} flag set to false.
Expand Down Expand Up @@ -92,7 +94,14 @@ public static ApplicationFailure newFailure(String message, String type, Object.
*/
public static ApplicationFailure newFailureWithCause(
String message, String type, @Nullable Throwable cause, Object... details) {
return new ApplicationFailure(message, type, false, new EncodedValues(details), cause, null);
return new ApplicationFailure(
message,
type,
false,
new EncodedValues(details),
cause,
null,
ApplicationErrorCategory.UNSPECIFIED);
}

/**
Expand All @@ -118,7 +127,13 @@ public static ApplicationFailure newFailureWithCauseAndDelay(
Duration nextRetryDelay,
Object... details) {
return new ApplicationFailure(
message, type, false, new EncodedValues(details), cause, nextRetryDelay);
message,
type,
false,
new EncodedValues(details),
cause,
nextRetryDelay,
ApplicationErrorCategory.UNSPECIFIED);
}

/**
Expand Down Expand Up @@ -153,7 +168,40 @@ public static ApplicationFailure newNonRetryableFailure(
*/
public static ApplicationFailure newNonRetryableFailureWithCause(
String message, String type, @Nullable Throwable cause, Object... details) {
return new ApplicationFailure(message, type, true, new EncodedValues(details), cause, null);
return new ApplicationFailure(
message,
type,
true,
new EncodedValues(details),
cause,
null,
ApplicationErrorCategory.UNSPECIFIED);
}

/**
* New ApplicationFailure with a specified category and {@link #isNonRetryable()} flag set to
* false.
*
* <p>Note that this exception still may not be retried by the service if its type is included in
* the doNotRetry property of the correspondent retry policy.
*
* @param message optional error message
* @param type error type
* @param category the category of the application failure.
* @param cause failure cause. Each element of the cause chain will be converted to
* ApplicationFailure for network transmission across network if it doesn't extend {@link
* TemporalFailure}
* @param details optional details about the failure. They are serialized using the same approach
* as arguments and results.
*/
public static ApplicationFailure newFailureWithCategory(
String message,
String type,
ApplicationErrorCategory category,
@Nullable Throwable cause,
Object... details) {
return new ApplicationFailure(
message, type, false, new EncodedValues(details), cause, null, category);
}

static ApplicationFailure newFromValues(
Expand All @@ -162,8 +210,10 @@ static ApplicationFailure newFromValues(
boolean nonRetryable,
Values details,
Throwable cause,
Duration nextRetryDelay) {
return new ApplicationFailure(message, type, nonRetryable, details, cause, nextRetryDelay);
Duration nextRetryDelay,
ApplicationErrorCategory category) {
return new ApplicationFailure(
message, type, nonRetryable, details, cause, nextRetryDelay, category);
}

ApplicationFailure(
Expand All @@ -172,12 +222,14 @@ static ApplicationFailure newFromValues(
boolean nonRetryable,
Values details,
Throwable cause,
Duration nextRetryDelay) {
Duration nextRetryDelay,
ApplicationErrorCategory category) {
super(getMessage(message, Objects.requireNonNull(type), nonRetryable), message, cause);
this.type = type;
this.details = details;
this.nonRetryable = nonRetryable;
this.nextRetryDelay = nextRetryDelay;
this.category = category;
}

public String getType() {
Expand Down Expand Up @@ -210,6 +262,10 @@ public void setNextRetryDelay(Duration nextRetryDelay) {
this.nextRetryDelay = nextRetryDelay;
}

public ApplicationErrorCategory getApplicationErrorCategory() {
return category;
}

private static String getMessage(String message, String type, boolean nonRetryable) {
return (Strings.isNullOrEmpty(message) ? "" : "message='" + message + "', ")
+ "type='"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import io.temporal.common.converter.EncodedValues;
import io.temporal.common.converter.FailureConverter;
import io.temporal.internal.activity.ActivityTaskHandlerImpl;
import io.temporal.internal.common.FailureUtils;
import io.temporal.internal.common.ProtobufTimeUtils;
import io.temporal.internal.sync.POJOWorkflowImplementationFactory;
import io.temporal.serviceclient.CheckedExceptionWrapper;
Expand Down Expand Up @@ -106,7 +107,8 @@ private RuntimeException failureToExceptionImpl(Failure failure, DataConverter d
cause,
info.hasNextRetryDelay()
? ProtobufTimeUtils.toJavaDuration(info.getNextRetryDelay())
: null);
: null,
FailureUtils.categoryFromProto(info.getCategory()));
}
case TIMEOUT_FAILURE_INFO:
{
Expand Down Expand Up @@ -146,13 +148,14 @@ private RuntimeException failureToExceptionImpl(Failure failure, DataConverter d
info.hasLastHeartbeatDetails()
? Optional.of(info.getLastHeartbeatDetails())
: Optional.empty();
return new ApplicationFailure(
return ApplicationFailure.newFromValues(
failure.getMessage(),
"ResetWorkflow",
false,
new EncodedValues(details, dataConverter),
cause,
null);
null,
ApplicationErrorCategory.UNSPECIFIED);
}
case ACTIVITY_FAILURE_INFO:
{
Expand Down Expand Up @@ -214,7 +217,8 @@ private RuntimeException failureToExceptionImpl(Failure failure, DataConverter d
false,
new EncodedValues(Optional.empty(), dataConverter),
cause,
null);
null,
ApplicationErrorCategory.UNSPECIFIED);
}
}

Expand Down Expand Up @@ -260,7 +264,8 @@ private Failure exceptionToFailure(Throwable throwable) {
ApplicationFailureInfo.Builder info =
ApplicationFailureInfo.newBuilder()
.setType(ae.getType())
.setNonRetryable(ae.isNonRetryable());
.setNonRetryable(ae.isNonRetryable())
.setCategory(FailureUtils.categoryToProto(ae.getApplicationErrorCategory()));
Optional<Payloads> details = ((EncodedValues) ae.getDetails()).toPayloads();
if (details.isPresent()) {
info.setDetails(details.get());
Expand Down Expand Up @@ -352,7 +357,10 @@ private Failure exceptionToFailure(Throwable throwable) {
ApplicationFailureInfo.Builder info =
ApplicationFailureInfo.newBuilder()
.setType(throwable.getClass().getName())
.setNonRetryable(false);
.setNonRetryable(false)
.setCategory(
io.temporal.api.enums.v1.ApplicationErrorCategory
.APPLICATION_ERROR_CATEGORY_UNSPECIFIED);
failure.setApplicationFailureInfo(info);
}
return failure.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import io.temporal.common.interceptors.ActivityInboundCallsInterceptor.ActivityOutput;
import io.temporal.common.interceptors.Header;
import io.temporal.common.interceptors.WorkerInterceptor;
import io.temporal.internal.common.FailureUtils;
import io.temporal.internal.worker.ActivityTaskHandler;
import io.temporal.payload.context.ActivitySerializationContext;
import io.temporal.serviceclient.CheckedExceptionWrapper;
Expand Down Expand Up @@ -122,6 +123,14 @@ public ActivityTaskHandler.Result execute(ActivityInfoInternal info, Scope metri
info.getActivityId(),
info.getActivityType(),
info.getAttempt());
} else if (FailureUtils.isBenignApplicationFailure(ex)) {
log.debug(
"{} failure. ActivityId={}, activityType={}, attempt={}",
local ? "Local activity" : "Activity",
info.getActivityId(),
info.getActivityType(),
info.getAttempt(),
ex);
} else {
log.warn(
"{} failure. ActivityId={}, activityType={}, attempt={}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import io.temporal.common.metadata.POJOActivityImplMetadata;
import io.temporal.common.metadata.POJOActivityMethodMetadata;
import io.temporal.internal.activity.ActivityTaskExecutors.ActivityTaskExecutor;
import io.temporal.internal.common.FailureUtils;
import io.temporal.internal.common.env.ReflectionUtils;
import io.temporal.internal.worker.ActivityTask;
import io.temporal.internal.worker.ActivityTaskHandler;
Expand Down Expand Up @@ -209,11 +210,13 @@ static ActivityTaskHandler.Result mapToActivityFailure(
Scope ms =
metricsScope.tagged(
ImmutableMap.of(MetricsTag.EXCEPTION, exception.getClass().getSimpleName()));
if (isLocalActivity) {
ms.counter(MetricsType.LOCAL_ACTIVITY_EXEC_FAILED_COUNTER).inc(1);
ms.counter(MetricsType.LOCAL_ACTIVITY_FAILED_COUNTER).inc(1);
} else {
ms.counter(MetricsType.ACTIVITY_EXEC_FAILED_COUNTER).inc(1);
if (!FailureUtils.isBenignApplicationFailure(exception)) {
if (isLocalActivity) {
ms.counter(MetricsType.LOCAL_ACTIVITY_EXEC_FAILED_COUNTER).inc(1);
ms.counter(MetricsType.LOCAL_ACTIVITY_FAILED_COUNTER).inc(1);
} else {
ms.counter(MetricsType.ACTIVITY_EXEC_FAILED_COUNTER).inc(1);
}
}
Failure failure = dataConverter.exceptionToFailure(exception);
RespondActivityTaskFailedRequest.Builder result =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved.
*
* Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Modifications copyright (C) 2017 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this material except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.temporal.internal.common;

import io.temporal.api.failure.v1.Failure;
import io.temporal.failure.ApplicationErrorCategory;
import io.temporal.failure.ApplicationFailure;
import javax.annotation.Nullable;

public class FailureUtils {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pedantic, but would add a private constructor here

private FailureUtils() {}

public static boolean isBenignApplicationFailure(@Nullable Throwable t) {
if (t instanceof ApplicationFailure
&& ((ApplicationFailure) t).getApplicationErrorCategory()
== ApplicationErrorCategory.BENIGN) {
return true;
}
return false;
}

public static boolean isBenignApplicationFailure(@Nullable Failure failure) {
if (failure != null
&& failure.getApplicationFailureInfo() != null
&& FailureUtils.categoryFromProto(failure.getApplicationFailureInfo().getCategory())
== ApplicationErrorCategory.BENIGN) {
return true;
}
return false;
}

public static ApplicationErrorCategory categoryFromProto(
io.temporal.api.enums.v1.ApplicationErrorCategory protoCategory) {
if (protoCategory == null) {
return ApplicationErrorCategory.UNSPECIFIED;
}
switch (protoCategory) {
case APPLICATION_ERROR_CATEGORY_BENIGN:
return ApplicationErrorCategory.BENIGN;
case APPLICATION_ERROR_CATEGORY_UNSPECIFIED:
case UNRECOGNIZED:
default:
// Fallback unrecognized or unspecified proto values as UNSPECIFIED
return ApplicationErrorCategory.UNSPECIFIED;
}
}

public static io.temporal.api.enums.v1.ApplicationErrorCategory categoryToProto(
io.temporal.failure.ApplicationErrorCategory category) {
switch (category) {
case BENIGN:
return io.temporal.api.enums.v1.ApplicationErrorCategory.APPLICATION_ERROR_CATEGORY_BENIGN;
case UNSPECIFIED:
default:
// Fallback to UNSPECIFIED for unknown values
return io.temporal.api.enums.v1.ApplicationErrorCategory
.APPLICATION_ERROR_CATEGORY_UNSPECIFIED;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import io.temporal.api.update.v1.Input;
import io.temporal.api.update.v1.Request;
import io.temporal.failure.CanceledFailure;
import io.temporal.internal.common.FailureUtils;
import io.temporal.internal.common.ProtobufTimeUtils;
import io.temporal.internal.common.UpdateMessage;
import io.temporal.internal.statemachines.WorkflowStateMachines;
Expand Down Expand Up @@ -153,7 +154,9 @@ private void completeWorkflow(@Nullable WorkflowExecutionException failure) {
metricsScope.counter(MetricsType.WORKFLOW_CANCELED_COUNTER).inc(1);
} else if (failure != null) {
workflowStateMachines.failWorkflow(failure.getFailure());
metricsScope.counter(MetricsType.WORKFLOW_FAILED_COUNTER).inc(1);
if (!FailureUtils.isBenignApplicationFailure(failure.getFailure())) {
metricsScope.counter(MetricsType.WORKFLOW_FAILED_COUNTER).inc(1);
}
} else {
ContinueAsNewWorkflowExecutionCommandAttributes attributes =
context.getContinueAsNewOnCompletion();
Expand Down
Loading
Loading