Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/file-filter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ checkall: &checkall
- *tests
- *scripts
- *yml

cases_py:
- 'toolchain/mfc/test/cases.py'
10 changes: 8 additions & 2 deletions .github/workflows/frontier/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@ if [ -n "$job_shard" ]; then
shard_opts="--shard $job_shard"
fi

# Only prune tests on PRs; master pushes must run the full suite.
prune_flag=""
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
prune_flag="--only-changes"
fi

if [ "$job_device" = "gpu" ]; then
rdma_opts=""
if [ "$job_cluster" = "frontier" ]; then
rdma_opts="--rdma-mpi"
fi
./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
./mfc.sh test -v -a $rdma_opts --max-attempts 3 $prune_flag -j $ngpus $device_opts $shard_opts -- -c $job_cluster
else
./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
./mfc.sh test -v -a --max-attempts 3 $prune_flag -j 32 --no-gpu $shard_opts -- -c $job_cluster
fi
10 changes: 8 additions & 2 deletions .github/workflows/frontier_amd/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@ if [ -n "$job_shard" ]; then
shard_opts="--shard $job_shard"
fi

# Only prune tests on PRs; master pushes must run the full suite.
prune_flag=""
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
prune_flag="--only-changes"
fi

if [ "$job_device" = "gpu" ]; then
rdma_opts=""
if [ "$job_cluster" = "frontier" ]; then
rdma_opts="--rdma-mpi"
fi
./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
./mfc.sh test -v -a $rdma_opts --max-attempts 3 $prune_flag -j $ngpus $device_opts $shard_opts -- -c $job_cluster
else
./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
./mfc.sh test -v -a --max-attempts 3 $prune_flag -j 32 --no-gpu $shard_opts -- -c $job_cluster
fi
21 changes: 21 additions & 0 deletions .github/workflows/phoenix/rebuild-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -e

# Number of parallel jobs: use SLURM allocation or default to 24.
# Cap at 64 to avoid overwhelming MPI's ORTE daemons with concurrent launches.
NJOBS="${SLURM_CPUS_ON_NODE:-24}"
if [ "$NJOBS" -gt 64 ]; then NJOBS=64; fi

# Clean stale build artifacts: the self-hosted runner may have a cached
# GPU build (e.g. --gpu mp) whose CMake flags are incompatible with gcov.
./mfc.sh clean

# Build MFC with gcov coverage instrumentation (CPU-only, gfortran).
# -j 8 for compilation (memory-heavy, more cores doesn't help much).
./mfc.sh build --gcov -j 8

# Run all tests in parallel, collecting per-test coverage data.
# Each test gets an isolated GCOV_PREFIX directory so .gcda files
# don't collide. Coverage is collected per-test after all tests finish.
# --gcov is required so the internal build step preserves instrumentation.
./mfc.sh test --build-coverage-cache --gcov -j "$NJOBS"
6 changes: 3 additions & 3 deletions .github/workflows/phoenix/submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ case "$script_basename" in
esac

sbatch_cpu_opts="\
#SBATCH -p cpu-small # partition
#SBATCH --ntasks-per-node=24 # Number of cores per node required
#SBATCH --mem-per-cpu=2G # Memory per core\
#SBATCH -p cpu-gnr # partition (full Granite Rapids node)
#SBATCH --exclusive # exclusive access to all cores
#SBATCH -C graniterapids # constrain to GNR architecture\
"

if [ "$job_type" = "bench" ]; then
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/phoenix/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ while [ $attempt -le $max_attempts ]; do
attempt=$((attempt + 1))
done

n_test_threads=8
# Use up to 64 parallel test threads on CPU (GNR nodes have 192 cores).
# Cap at 64 to avoid overwhelming MPI's ORTE daemons with concurrent launches.
n_test_threads=$(( SLURM_CPUS_ON_NODE > 64 ? 64 : ${SLURM_CPUS_ON_NODE:-8} ))

if [ "$job_device" = "gpu" ]; then
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
Expand All @@ -60,4 +62,10 @@ if [ "$job_device" = "gpu" ]; then
n_test_threads=`expr $gpu_count \* 2`
fi

./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
# Only prune tests on PRs; master pushes must run the full suite.
prune_flag=""
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
prune_flag="--only-changes"
fi

./mfc.sh test -v --max-attempts 3 $prune_flag -a -j $n_test_threads $device_opts -- -c phoenix
130 changes: 122 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,106 @@ jobs:
file-changes:
name: Detect File Changes
runs-on: 'ubuntu-latest'
outputs:
outputs:
checkall: ${{ steps.changes.outputs.checkall }}
cases_py: ${{ steps.changes.outputs.cases_py }}
dep_changed: ${{ steps.dep-check.outputs.dep_changed }}
steps:
- name: Clone
uses: actions/checkout@v4

- name: Detect Changes
uses: dorny/paths-filter@v3
id: changes
with:
with:
filters: ".github/file-filter.yml"

- name: Check for Fortran dependency changes
id: dep-check
env:
GH_TOKEN: ${{ github.token }}
run: |
# Detect added/removed use/include statements that change the
# Fortran dependency graph, which would make the coverage cache stale.
if [ "${{ github.event_name }}" = "pull_request" ]; then
DIFF=$(gh pr diff ${{ github.event.pull_request.number }})
elif [ "${{ github.event_name }}" = "push" ]; then
DIFF=$(git diff ${{ github.event.before }}..${{ github.event.after }} 2>/dev/null || echo "")
else
DIFF=""
fi
if echo "$DIFF" | \
grep -qP '^\+\s*(use[\s,]+\w|#:include\s|include\s+['"'"'"])'; then
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
echo "Fortran dependency change detected — will rebuild coverage cache."
else
echo "dep_changed=false" >> "$GITHUB_OUTPUT"
fi

rebuild-cache:
name: Rebuild Coverage Cache
needs: [lint-gate, file-changes]
if: >-
github.repository == 'MFlowCode/MFC' &&
(
(github.event_name == 'pull_request' &&
(needs.file-changes.outputs.cases_py == 'true' ||
needs.file-changes.outputs.dep_changed == 'true')) ||
(github.event_name == 'push' &&
(needs.file-changes.outputs.cases_py == 'true' ||
needs.file-changes.outputs.dep_changed == 'true')) ||
github.event_name == 'workflow_dispatch'
)
timeout-minutes: 240
runs-on:
group: phoenix
labels: gt
permissions:
contents: write
steps:
- name: Clone
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
clean: false

- name: Rebuild Cache via SLURM
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/rebuild-cache.sh cpu none

- name: Print Logs
if: always()
run: cat rebuild-cache-cpu-none.out

- name: Upload Cache Artifact
if: github.event_name == 'pull_request'
uses: actions/upload-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test/test_coverage_cache.json.gz
retention-days: 1

- name: Commit Cache to Master
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add toolchain/mfc/test/test_coverage_cache.json.gz
if git diff --cached --quiet; then
echo "Coverage cache unchanged."
else
git commit -m "Regenerate gcov coverage cache [skip ci]"
git push
fi

github:
name: Github
if: needs.file-changes.outputs.checkall == 'true'
needs: [lint-gate, file-changes]
needs: [lint-gate, file-changes, rebuild-cache]
if: >-
always() &&
needs.lint-gate.result == 'success' &&
needs.file-changes.result == 'success' &&
needs.rebuild-cache.result != 'cancelled' &&
needs.file-changes.outputs.checkall == 'true'
strategy:
matrix:
os: ['ubuntu', 'macos']
Expand All @@ -91,13 +175,26 @@ jobs:
intel: false

fail-fast: false
continue-on-error: true
runs-on: ${{ matrix.os }}-latest

steps:
- name: Clone
uses: actions/checkout@v4

- name: Fetch master for coverage diff
run: |
git fetch origin master:master --depth=1
git fetch --deepen=200
continue-on-error: true

- name: Download Coverage Cache
if: needs.rebuild-cache.result == 'success'
uses: actions/download-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test
continue-on-error: true

- name: Setup MacOS
if: matrix.os == 'macos'
run: |
Expand Down Expand Up @@ -159,7 +256,7 @@ jobs:
run: |
rm -f tests/failed_uuids.txt
TEST_EXIT=0
/bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $TEST_ALL $TEST_PCT || TEST_EXIT=$?
/bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $ONLY_CHANGES $TEST_ALL $TEST_PCT || TEST_EXIT=$?

# Retry only if a small number of tests failed (sporadic failures)
if [ -s tests/failed_uuids.txt ]; then
Expand All @@ -180,11 +277,19 @@ jobs:
env:
TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}

self:
name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
needs: [lint-gate, file-changes]
needs: [lint-gate, file-changes, rebuild-cache]
if: >-
always() &&
needs.lint-gate.result == 'success' &&
needs.file-changes.result == 'success' &&
needs.rebuild-cache.result != 'cancelled' &&
github.repository == 'MFlowCode/MFC' &&
needs.file-changes.outputs.checkall == 'true' &&
github.event.pull_request.draft != true
continue-on-error: false
timeout-minutes: 480
strategy:
Expand Down Expand Up @@ -265,6 +370,14 @@ jobs:
with:
clean: false

- name: Download Coverage Cache
if: needs.rebuild-cache.result == 'success'
uses: actions/download-artifact@v4
with:
name: coverage-cache
path: toolchain/mfc/test
continue-on-error: true

- name: Build
if: matrix.cluster != 'phoenix'
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3
Expand Down Expand Up @@ -299,3 +412,4 @@ jobs:
with:
name: logs-${{ strategy.job-index }}-${{ steps.log.outputs.slug }}
path: ${{ steps.log.outputs.slug }}.out

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ __pycache__
# Auto-generated version file
toolchain/mfc/_version.py

# Raw coverage cache — legacy, not tracked (the .json.gz version IS committed)
toolchain/mfc/test/test_coverage_cache.json

# Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
toolchain/completions/mfc.bash
toolchain/completions/_mfc
Expand Down
37 changes: 26 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,20 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran>:-fprofile-arcs>
$<$<COMPILE_LANGUAGE:Fortran>:-ftest-coverage>
$<$<COMPILE_LANGUAGE:Fortran>:-O1>
)
)

add_link_options(
$<$<COMPILE_LANGUAGE:Fortran>:-lgcov>
$<$<COMPILE_LANGUAGE:Fortran>:--coverage>
)

# Override Release -O3 with -O1 for gcov: coverage instrumentation is
# inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on
# Granite Rapids) can emit instructions that older assemblers reject.
set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE)

# Use gfortran5 line markers so gcov can map coverage to .fpp sources.
set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5")
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand Down Expand Up @@ -224,18 +231,25 @@ endif()

if (CMAKE_BUILD_TYPE STREQUAL "Release")
# Processor tuning: Check if we can target the host's native CPU's ISA.
CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE)
if (SUPPORTS_MARCH_NATIVE)
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-march=native>)
else()
CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE)
if (SUPPORTS_MCPU_NATIVE)
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-mcpu=native>)
# Skip for gcov builds — -march=native on newer CPUs (e.g. Granite Rapids)
# can emit instructions the system assembler doesn't support.
if (NOT MFC_GCov)
CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE)
if (SUPPORTS_MARCH_NATIVE)
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-march=native>)
else()
CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE)
if (SUPPORTS_MCPU_NATIVE)
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-mcpu=native>)
endif()
endif()
endif()

# Enable LTO/IPO if supported
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
# Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage
# instrumentation and can trigger assembler errors on newer architectures).
if (MFC_GCov)
message(STATUS "LTO/IPO disabled for gcov build")
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
if (MFC_Unified)
message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory")
elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9")
Expand Down Expand Up @@ -381,6 +395,7 @@ macro(HANDLE_SOURCES target useCommon)
--no-folding
--line-length=999
--line-numbering-mode=nocontlines
${FYPP_GCOV_OPTS}
"${fpp}" "${f90}"
DEPENDS "${fpp};${${target}_incs}"
COMMENT "Preprocessing (Fypp) ${fpp_filename}"
Expand Down
1 change: 1 addition & 0 deletions src/simulation/m_bubbles.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ module m_bubbles
use m_variables_conversion !< State variables type conversion procedures

use m_helper_basic !< Functions to compare floating point numbers
use m_helper_basic

implicit none

Expand Down
Loading
Loading