Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/file-filter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ yml: &yml
- '.github/workflows/phoenix/**'
- '.github/workflows/frontier/**'
- '.github/workflows/frontier_amd/**'
- '.github/workflows/common/**'
- '.github/scripts/**'
- '.github/workflows/bench.yml'
- '.github/workflows/test.yml'
Expand All @@ -37,3 +38,6 @@ checkall: &checkall
- *tests
- *scripts
- *yml

cases_py:
- 'toolchain/mfc/test/cases.py'
1 change: 1 addition & 0 deletions .github/scripts/submit-slurm-job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ job_device="$device"
job_interface="$interface"
job_shard="$shard"
job_cluster="$cluster"
export GITHUB_EVENT_NAME="$GITHUB_EVENT_NAME"

. ./mfc.sh load -c $compiler_flag -m $module_mode

Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/common/rebuild-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
set -e

# Number of parallel jobs: use SLURM allocation or default to 24.
# Cap at 64 to avoid overwhelming OpenMPI daemons and OS process limits with concurrent launches.
NJOBS="${SLURM_CPUS_ON_NODE:-24}"
if [ "$NJOBS" -gt 64 ]; then NJOBS=64; fi

# Clean stale build artifacts: the self-hosted runner may have a cached
# GPU build (e.g. --gpu mp) whose CMake flags are incompatible with gcov.
./mfc.sh clean

# Source retry_build() for NFS stale file handle resilience (3 attempts).
source .github/scripts/retry-build.sh

# Build MFC with gcov coverage instrumentation (CPU-only, gfortran).
retry_build ./mfc.sh build --gcov -j 8

# Run all tests in parallel, collecting per-test coverage data.
# Each test gets an isolated GCOV_PREFIX directory so .gcda files
# don't collide. Coverage is collected per-test after all tests finish.
# --gcov is required so the internal build step preserves instrumentation.
./mfc.sh test --build-coverage-cache --gcov -j "$NJOBS"
8 changes: 7 additions & 1 deletion .github/workflows/common/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,10 @@ if [ -n "${job_shard:-}" ]; then
shard_opts="--shard $job_shard"
fi

./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
# Only prune tests on PRs; master pushes must run the full suite.
prune_flag=""
if [ "${GITHUB_EVENT_NAME:-}" = "pull_request" ]; then
prune_flag="--only-changes"
fi

./mfc.sh test -v --max-attempts 3 $prune_flag -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
140 changes: 133 additions & 7 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,118 @@ jobs:
file-changes:
name: Detect File Changes
runs-on: 'ubuntu-latest'
outputs:
outputs:
checkall: ${{ steps.changes.outputs.checkall }}
cases_py: ${{ steps.changes.outputs.cases_py }}
dep_changed: ${{ steps.dep-check.outputs.dep_changed }}
steps:
- name: Clone
uses: actions/checkout@v4

- name: Detect Changes
uses: dorny/paths-filter@v3
id: changes
with:
with:
filters: ".github/file-filter.yml"

- name: Check for Fortran dependency changes
id: dep-check
env:
GH_TOKEN: ${{ github.token }}
run: |
# Detect added/removed use/include statements that change the
# Fortran dependency graph, which would make the coverage cache stale.
PR_NUMBER="${{ github.event.pull_request.number }}"
BEFORE="${{ github.event.before }}"
AFTER="${{ github.event.after }}"
if [ "${{ github.event_name }}" = "pull_request" ]; then
# Default to dep_changed=true if gh pr diff fails (safe fallback).
DIFF=$(gh pr diff "$PR_NUMBER" 2>/dev/null) || {
echo "gh pr diff failed — defaulting to dep_changed=true for safety."
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
exit 0
}
elif [ "${{ github.event_name }}" = "push" ]; then
DIFF=$(git diff "$BEFORE".."$AFTER" 2>/dev/null) || {
echo "git diff failed for push event — defaulting to dep_changed=true for safety."
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
exit 0
}
else
DIFF=""
fi
if echo "$DIFF" | \
grep -qE '^[+-][[:space:]]*(use[[:space:],]+[a-zA-Z_]|#:include[[:space:]]|include[[:space:]]+['"'"'"])'; then
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
echo "Fortran dependency change detected — will rebuild coverage cache."
else
echo "dep_changed=false" >> "$GITHUB_OUTPUT"
fi

rebuild-cache:
name: Rebuild Coverage Cache
needs: [lint-gate, file-changes]
if: >-
github.repository == 'MFlowCode/MFC' &&
(
(github.event_name == 'pull_request' &&
(needs.file-changes.outputs.cases_py == 'true' ||
needs.file-changes.outputs.dep_changed == 'true')) ||
(github.event_name == 'push' &&
(needs.file-changes.outputs.cases_py == 'true' ||
needs.file-changes.outputs.dep_changed == 'true')) ||
github.event_name == 'workflow_dispatch'
)
timeout-minutes: 240
runs-on:
group: phoenix
labels: gt
permissions:
contents: write # Required for Commit Cache to Master on push events
steps:
- name: Clone
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
clean: false

- name: Rebuild Cache via SLURM
run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/rebuild-cache.sh cpu none phoenix

- name: Print Logs
if: always()
run: cat rebuild-cache-cpu-none.out

- name: Upload Cache Artifact
if: github.event_name == 'pull_request'
uses: actions/upload-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test/test_coverage_cache.json.gz
retention-days: 1

- name: Commit Cache to Master
if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/master'
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add toolchain/mfc/test/test_coverage_cache.json.gz
if git diff --cached --quiet; then
echo "Coverage cache unchanged."
else
git commit -m "Regenerate gcov coverage cache [skip ci]"
git push origin HEAD:refs/heads/master
fi

github:
name: Github
if: needs.file-changes.outputs.checkall == 'true'
needs: [lint-gate, file-changes]
needs: [lint-gate, file-changes, rebuild-cache]
if: >-
!cancelled() &&
needs.lint-gate.result == 'success' &&
needs.file-changes.result == 'success' &&
needs.rebuild-cache.result != 'cancelled' &&
needs.file-changes.outputs.checkall == 'true'
strategy:
matrix:
os: ['ubuntu', 'macos']
Expand All @@ -98,6 +194,20 @@ jobs:
- name: Clone
uses: actions/checkout@v4

- name: Fetch master for coverage diff
run: |
git fetch origin master:master --depth=1
git fetch --deepen=200
continue-on-error: true

- name: Download Coverage Cache
if: needs.rebuild-cache.result == 'success'
uses: actions/download-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test
continue-on-error: true

- name: Setup MacOS
if: matrix.os == 'macos'
run: |
Expand Down Expand Up @@ -140,15 +250,23 @@ jobs:

- name: Test
run: |
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $ONLY_CHANGES $TEST_ALL $TEST_PCT
env:
TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}

self:
name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
needs: [lint-gate, file-changes]
needs: [lint-gate, file-changes, rebuild-cache]
if: >-
!cancelled() &&
needs.lint-gate.result == 'success' &&
needs.file-changes.result == 'success' &&
needs.rebuild-cache.result != 'cancelled' &&
github.repository == 'MFlowCode/MFC' &&
needs.file-changes.outputs.checkall == 'true' &&
github.event.pull_request.draft != true
# Frontier CCE compiler is periodically broken by toolchain updates (e.g.
# cpe/25.03 introduced an IPA SIGSEGV in CCE 19.0.0). Allow Frontier to
# fail without blocking PR merges; Phoenix remains a hard gate.
Expand Down Expand Up @@ -234,6 +352,14 @@ jobs:
# submit-slurm-job.sh can detect and cancel stale SLURM jobs on retry.
clean: false

- name: Download Coverage Cache
if: needs.rebuild-cache.result == 'success'
uses: actions/download-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test
continue-on-error: true

- name: Build (login node)
if: matrix.cluster != 'phoenix'
timeout-minutes: 60
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ __pycache__
# Auto-generated version file
toolchain/mfc/_version.py

# Raw coverage cache — legacy, not tracked (the .json.gz version IS committed)
toolchain/mfc/test/test_coverage_cache.json

# Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
toolchain/completions/mfc.bash
toolchain/completions/_mfc
Expand Down
21 changes: 17 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ endif()
# debug builds. These include optimization and debug flags, as well as some that
# are required for a successful build of MFC.

set(FYPP_GCOV_OPTS "")

if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran>:-ffree-line-length-none>
Expand All @@ -131,13 +133,20 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran>:-fprofile-arcs>
$<$<COMPILE_LANGUAGE:Fortran>:-ftest-coverage>
$<$<COMPILE_LANGUAGE:Fortran>:-O1>
)
)

add_link_options(
$<$<COMPILE_LANGUAGE:Fortran>:-lgcov>
$<$<COMPILE_LANGUAGE:Fortran>:--coverage>
)

# Override Release -O3 with -O1 for gcov: coverage instrumentation is
# inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on
# Granite Rapids) can emit instructions that older assemblers reject.
set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE)

# Use gfortran5 line markers so gcov can map coverage to .fpp sources.
set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5")
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand Down Expand Up @@ -245,8 +254,11 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
endif()
endif()

# Enable LTO/IPO if supported
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
# Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage
# instrumentation and can trigger assembler errors on newer architectures).
if (MFC_GCov)
message(STATUS "LTO/IPO disabled for gcov build")
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
if (MFC_Unified)
message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory")
elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9")
Expand Down Expand Up @@ -392,6 +404,7 @@ macro(HANDLE_SOURCES target useCommon)
--no-folding
--line-length=999
--line-numbering-mode=nocontlines
${FYPP_GCOV_OPTS}
"${fpp}" "${f90}"
DEPENDS "${fpp};${${target}_incs}"
COMMENT "Preprocessing (Fypp) ${fpp_filename}"
Expand Down
25 changes: 25 additions & 0 deletions toolchain/mfc/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,27 @@
type=str,
default=None,
),
Argument(
name="build-coverage-cache",
help="Run all tests with gcov instrumentation to build the file-level coverage cache. Pass --gcov to enable coverage instrumentation in the internal build step.",
action=ArgAction.STORE_TRUE,
default=False,
dest="build_coverage_cache",
),
Argument(
name="only-changes",
help="Only run tests whose covered files overlap with files changed since branching from master (uses file-level gcov coverage cache).",
action=ArgAction.STORE_TRUE,
default=False,
dest="only_changes",
),
Argument(
name="changes-branch",
help="Branch to compare against for --only-changes (default: master).",
type=str,
default="master",
dest="changes_branch",
),
],
mutually_exclusive=[
MutuallyExclusiveGroup(arguments=[
Expand Down Expand Up @@ -488,13 +509,17 @@
Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"),
Example("./mfc.sh test --only 3D", "Run only 3D tests"),
Example("./mfc.sh test --generate", "Regenerate golden files"),
Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed files"),
Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build file-coverage cache"),
],
key_options=[
("-j, --jobs N", "Number of parallel test jobs"),
("-o, --only PROP", "Run tests matching property"),
("-f, --from UUID", "Start from specific test"),
("--generate", "Generate/update golden files"),
("--no-build", "Skip rebuilding MFC"),
("--build-coverage-cache", "Build file-level gcov coverage cache (one-time)"),
("--only-changes", "Run tests affected by changed files (requires cache)"),
],
)

Expand Down
Loading
Loading