Skip to content

Commit 96b96a9

Browse files
mjkravetztorvalds
authored andcommitted
mm/hugetlb: fix huge page reservation leak in private mapping error paths
Error paths in hugetlb_cow() and hugetlb_no_page() may free a newly allocated huge page. If a reservation was associated with the huge page, alloc_huge_page() consumed the reservation while allocating. When the newly allocated page is freed in free_huge_page(), it will increment the global reservation count. However, the reservation entry in the reserve map will remain. This is not an issue for shared mappings as the entry in the reserve map indicates a reservation exists. But, an entry in a private mapping reserve map indicates the reservation was consumed and no longer exists. This results in an inconsistency between the reserve map and the global reservation count. This 'leaks' a reserved huge page. Create a new routine restore_reserve_on_error() to restore the reserve entry in these specific error paths. This routine makes use of a new function vma_add_reservation() which will add a reserve entry for a specific address/page. In general, these error paths were rarely (if ever) taken on most architectures. However, powerpc contained arch specific code that that resulted in an extra fault and execution of these error paths on all private mappings. Fixes: 67961f9 ("mm/hugetlb: fix huge page reserve accounting for private mappings) Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Mike Kravetz <[email protected]> Reported-by: Jan Stancek <[email protected]> Tested-by: Jan Stancek <[email protected]> Reviewed-by: Aneesh Kumar K.V <[email protected]> Acked-by: Hillf Danton <[email protected]> Cc: Naoya Horiguchi <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Kirill A . Shutemov <[email protected]> Cc: Dave Hansen <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent d006c71 commit 96b96a9

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

mm/hugetlb.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
18261826
* is not the case is if a reserve map was changed between calls. It
18271827
* is the responsibility of the caller to notice the difference and
18281828
* take appropriate action.
1829+
*
1830+
* vma_add_reservation is used in error paths where a reservation must
1831+
* be restored when a newly allocated huge page must be freed. It is
1832+
* to be called after calling vma_needs_reservation to determine if a
1833+
* reservation exists.
18291834
*/
18301835
enum vma_resv_mode {
18311836
VMA_NEEDS_RESV,
18321837
VMA_COMMIT_RESV,
18331838
VMA_END_RESV,
1839+
VMA_ADD_RESV,
18341840
};
18351841
static long __vma_reservation_common(struct hstate *h,
18361842
struct vm_area_struct *vma, unsigned long addr,
@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
18561862
region_abort(resv, idx, idx + 1);
18571863
ret = 0;
18581864
break;
1865+
case VMA_ADD_RESV:
1866+
if (vma->vm_flags & VM_MAYSHARE)
1867+
ret = region_add(resv, idx, idx + 1);
1868+
else {
1869+
region_abort(resv, idx, idx + 1);
1870+
ret = region_del(resv, idx, idx + 1);
1871+
}
1872+
break;
18591873
default:
18601874
BUG();
18611875
}
@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
19031917
(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
19041918
}
19051919

1920+
static long vma_add_reservation(struct hstate *h,
1921+
struct vm_area_struct *vma, unsigned long addr)
1922+
{
1923+
return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
1924+
}
1925+
1926+
/*
1927+
* This routine is called to restore a reservation on error paths. In the
1928+
* specific error paths, a huge page was allocated (via alloc_huge_page)
1929+
* and is about to be freed. If a reservation for the page existed,
1930+
* alloc_huge_page would have consumed the reservation and set PagePrivate
1931+
* in the newly allocated page. When the page is freed via free_huge_page,
1932+
* the global reservation count will be incremented if PagePrivate is set.
1933+
* However, free_huge_page can not adjust the reserve map. Adjust the
1934+
* reserve map here to be consistent with global reserve count adjustments
1935+
* to be made by free_huge_page.
1936+
*/
1937+
static void restore_reserve_on_error(struct hstate *h,
1938+
struct vm_area_struct *vma, unsigned long address,
1939+
struct page *page)
1940+
{
1941+
if (unlikely(PagePrivate(page))) {
1942+
long rc = vma_needs_reservation(h, vma, address);
1943+
1944+
if (unlikely(rc < 0)) {
1945+
/*
1946+
* Rare out of memory condition in reserve map
1947+
* manipulation. Clear PagePrivate so that
1948+
* global reserve count will not be incremented
1949+
* by free_huge_page. This will make it appear
1950+
* as though the reservation for this page was
1951+
* consumed. This may prevent the task from
1952+
* faulting in the page at a later time. This
1953+
* is better than inconsistent global huge page
1954+
* accounting of reserve counts.
1955+
*/
1956+
ClearPagePrivate(page);
1957+
} else if (rc) {
1958+
rc = vma_add_reservation(h, vma, address);
1959+
if (unlikely(rc < 0))
1960+
/*
1961+
* See above comment about rare out of
1962+
* memory condition.
1963+
*/
1964+
ClearPagePrivate(page);
1965+
} else
1966+
vma_end_reservation(h, vma, address);
1967+
}
1968+
}
1969+
19061970
struct page *alloc_huge_page(struct vm_area_struct *vma,
19071971
unsigned long addr, int avoid_reserve)
19081972
{
@@ -3498,6 +3562,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
34983562
spin_unlock(ptl);
34993563
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
35003564
out_release_all:
3565+
restore_reserve_on_error(h, vma, address, new_page);
35013566
put_page(new_page);
35023567
out_release_old:
35033568
put_page(old_page);
@@ -3680,6 +3745,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
36803745
spin_unlock(ptl);
36813746
backout_unlocked:
36823747
unlock_page(page);
3748+
restore_reserve_on_error(h, vma, address, page);
36833749
put_page(page);
36843750
goto out;
36853751
}

0 commit comments

Comments
 (0)