diff -u linux-2.6.18.ovz/include/linux/mm.h linux-2.6.18.ovz/include/linux/mm.h --- linux-2.6.18.ovz/include/linux/mm.h 2016-01-25 00:11:40.595109950 -0800 +++ linux-2.6.18.ovz/include/linux/mm.h 2016-10-26 01:59:40.000394819 -0700 @@ -1200,6 +1200,8 @@ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_COW 0x800 /* internal GUP flag */ #ifdef CONFIG_XEN typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr, diff -u linux-2.6.18.ovz/mm/memory.c linux-2.6.18.ovz/mm/memory.c --- linux-2.6.18.ovz/mm/memory.c 2016-01-25 00:11:40.702109950 -0800 +++ linux-2.6.18.ovz/mm/memory.c 2016-10-26 01:59:40.169394819 -0700 @@ -1131,6 +1131,22 @@ return NULL; } +static inline bool can_follow_write_pte(pte_t pte, struct page *page, + unsigned int flags) +{ + if (pte_write(pte)) + return true; + + /* + * Make sure that we are really following CoWed page. We do not really + * have to care about exclusiveness of the page because we only want + * to ensure that once COWed page hasn't disappeared in the meantime. + */ + if ((flags & FOLL_FORCE) && (flags & FOLL_COW)) + return page && PageAnon(page); + + return false; +} /* * Do a quick page-table lookup for a single page. @@ -1180,10 +1196,14 @@ if (!pte_present(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !pte_write(pte)) - goto unlock; + page = vm_normal_page(vma, address, pte); if (unlikely(!page)) goto bad_page; + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) { + page = NULL; + goto unlock; + } + if (flags & FOLL_GET) get_page(page); @@ -1350,6 +1370,8 @@ foll_flags |= FOLL_GET; if (!write && use_zero_page(vma)) foll_flags |= FOLL_ANON; + if (force) + foll_flags |= FOLL_FORCE; do { struct page *page; @@ -1365,11 +1387,12 @@ /* * The VM_FAULT_WRITE bit tells us that do_wp_page has * broken COW when necessary, even if maybe_mkwrite - * decided not to set pte_write. We can thus safely do - * subsequent page lookups as if they were reads. + * decided not to set pte_write. We cannot simply drop + * FOLL_WRITE here because the COWed page might be gone by + * the time we do the subsequent page lookups. */ if (ret & VM_FAULT_WRITE) - foll_flags &= ~FOLL_WRITE; + foll_flags |= FOLL_COW; switch (ret & ~VM_FAULT_WRITE) { case VM_FAULT_MINOR: @@ -1401,6 +1424,7 @@ i++; start += PAGE_SIZE; len--; + foll_flags &= ~FOLL_COW; } while (len && start < vma->vm_end); } while (len); return i;