Discussion:
[PATCH 0/9] SMB2.1 multicredit requests for writing
Pavel Shilovsky
2014-06-23 14:58:28 UTC
Permalink
The goal of this patchset is to add large MTU/multicredits support for writing from the CIFS client to servers that support SMB 2.1 and higher versions of the protocol.

First three patches simplify the writepages code. Patches #4 and #5 both fix the problem in writepages when a server changes wsize value after reconnects from the client. Patches #6 and #7 simplify the iovec write code, #8 fixes the wsize changing problem there. Finally, patch #9 introduces multicredits support for writing.

Pavel Shilovsky (9):
CIFS: Separate page processing from writepages
CIFS: Separate page sending from writepages
CIFS: Separate pages initialization from writepages
CIFS: Fix wsize usage in writepages
CIFS: Fix cifs_writev_requeue when wsize changes
CIFS: Separate filling pages from iovec write
CIFS: Separate writing from iovec write
CIFS: Fix wsize usage in iovec write
CIFS: Use multicredits for writes

fs/cifs/cifsglob.h | 17 ++
fs/cifs/cifsproto.h | 3 +
fs/cifs/cifssmb.c | 84 ++++++--
fs/cifs/file.c | 540 ++++++++++++++++++++++++++++-------------------
fs/cifs/smb1ops.c | 8 +
fs/cifs/smb2ops.c | 62 +++++-
fs/cifs/smb2pdu.c | 24 ++-
fs/cifs/smb2transport.c | 5 +
fs/cifs/transport.c | 25 ++-
9 files changed, 517 insertions(+), 251 deletions(-)
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:29 UTC
Permalink
Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 154 ++++++++++++++++++++++++++++++--------------------------
1 file changed, 83 insertions(+), 71 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 208f56e..216959e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1878,6 +1878,86 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return rc;
}

+static unsigned int
+wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
+ struct address_space *mapping,
+ struct writeback_control *wbc,
+ pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
+{
+ unsigned int nr_pages = 0, i;
+ struct page *page;
+
+ for (i = 0; i < found_pages; i++) {
+ page = wdata->pages[i];
+ /*
+ * At this point we hold neither mapping->tree_lock nor
+ * lock on the page itself: the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or even
+ * swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+
+ if (nr_pages == 0)
+ lock_page(page);
+ else if (!trylock_page(page))
+ break;
+
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ break;
+ }
+
+ if (!wbc->range_cyclic && page->index > end) {
+ *done = true;
+ unlock_page(page);
+ break;
+ }
+
+ if (*next && (page->index != *next)) {
+ /* Not next consecutive page */
+ unlock_page(page);
+ break;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) ||
+ !clear_page_dirty_for_io(page)) {
+ unlock_page(page);
+ break;
+ }
+
+ /*
+ * This actually clears the dirty bit in the radix tree.
+ * See cifs_writepage() for more commentary.
+ */
+ set_page_writeback(page);
+ if (page_offset(page) >= i_size_read(mapping->host)) {
+ *done = true;
+ unlock_page(page);
+ end_page_writeback(page);
+ break;
+ }
+
+ wdata->pages[i] = page;
+ *next = page->index + 1;
+ ++nr_pages;
+ }
+
+ /* reset index to refind any pages skipped */
+ if (nr_pages == 0)
+ *index = wdata->pages[0]->index + 1;
+
+ /* put any pages we aren't going to use */
+ for (i = nr_pages; i < found_pages; i++) {
+ page_cache_release(wdata->pages[i]);
+ wdata->pages[i] = NULL;
+ }
+
+ return nr_pages;
+}
+
static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1886,7 +1966,6 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end, index;
struct cifs_writedata *wdata;
struct TCP_Server_Info *server;
- struct page *page;
int rc = 0;

/*
@@ -1908,7 +1987,7 @@ static int cifs_writepages(struct address_space *mapping,
}
retry:
while (!done && index <= end) {
- unsigned int i, nr_pages, found_pages;
+ unsigned int nr_pages, found_pages;
pgoff_t next = 0, tofind;
struct page **pages;

@@ -1944,75 +2023,8 @@ retry:
break;
}

- nr_pages = 0;
- for (i = 0; i < found_pages; i++) {
- page = wdata->pages[i];
- /*
- * At this point we hold neither mapping->tree_lock nor
- * lock on the page itself: the page may be truncated or
- * invalidated (changing page->mapping to NULL), or even
- * swizzled back from swapper_space to tmpfs file
- * mapping
- */
-
- if (nr_pages == 0)
- lock_page(page);
- else if (!trylock_page(page))
- break;
-
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- break;
- }
-
- if (!wbc->range_cyclic && page->index > end) {
- done = true;
- unlock_page(page);
- break;
- }
-
- if (next && (page->index != next)) {
- /* Not next consecutive page */
- unlock_page(page);
- break;
- }
-
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
-
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
- break;
- }
-
- /*
- * This actually clears the dirty bit in the radix tree.
- * See cifs_writepage() for more commentary.
- */
- set_page_writeback(page);
-
- if (page_offset(page) >= i_size_read(mapping->host)) {
- done = true;
- unlock_page(page);
- end_page_writeback(page);
- break;
- }
-
- wdata->pages[i] = page;
- next = page->index + 1;
- ++nr_pages;
- }
-
- /* reset index to refind any pages skipped */
- if (nr_pages == 0)
- index = wdata->pages[0]->index + 1;
-
- /* put any pages we aren't going to use */
- for (i = nr_pages; i < found_pages; i++) {
- page_cache_release(wdata->pages[i]);
- wdata->pages[i] = NULL;
- }
+ nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
+ end, &index, &next, &done);

/* nothing to write? */
if (nr_pages == 0) {
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:31 UTC
Permalink
Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 56 ++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f143a4f..2c7fbd7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1878,6 +1878,40 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return rc;
}

+static struct cifs_writedata *
+wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
+ pgoff_t end, pgoff_t *index,
+ unsigned int *found_pages)
+{
+ unsigned int nr_pages;
+ struct page **pages;
+ struct cifs_writedata *wdata;
+
+ wdata = cifs_writedata_alloc((unsigned int)tofind,
+ cifs_writev_complete);
+ if (!wdata)
+ return NULL;
+
+ /*
+ * find_get_pages_tag seems to return a max of 256 on each
+ * iteration, so we must call it several times in order to
+ * fill the array or the wsize is effectively limited to
+ * 256 * PAGE_CACHE_SIZE.
+ */
+ *found_pages = 0;
+ pages = wdata->pages;
+ do {
+ nr_pages = find_get_pages_tag(mapping, index,
+ PAGECACHE_TAG_DIRTY, tofind,
+ pages);
+ *found_pages += nr_pages;
+ tofind -= nr_pages;
+ pages += nr_pages;
+ } while (nr_pages && tofind && *index <= end);
+
+ return wdata;
+}
+
static unsigned int
wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
struct address_space *mapping,
@@ -2040,35 +2074,17 @@ retry:
while (!done && index <= end) {
unsigned int nr_pages, found_pages;
pgoff_t next = 0, tofind;
- struct page **pages;

tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
end - index) + 1;

- wdata = cifs_writedata_alloc((unsigned int)tofind,
- cifs_writev_complete);
+ wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
+ &found_pages);
if (!wdata) {
rc = -ENOMEM;
break;
}

- /*
- * find_get_pages_tag seems to return a max of 256 on each
- * iteration, so we must call it several times in order to
- * fill the array or the wsize is effectively limited to
- * 256 * PAGE_CACHE_SIZE.
- */
- found_pages = 0;
- pages = wdata->pages;
- do {
- nr_pages = find_get_pages_tag(mapping, &index,
- PAGECACHE_TAG_DIRTY,
- tofind, pages);
- found_pages += nr_pages;
- tofind -= nr_pages;
- pages += nr_pages;
- } while (nr_pages && tofind && index <= end);
-
if (found_pages == 0) {
kref_put(&wdata->refcount, cifs_writedata_release);
break;
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:30 UTC
Permalink
Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 98 ++++++++++++++++++++++++++++++--------------------------
1 file changed, 53 insertions(+), 45 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 216959e..f143a4f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1958,6 +1958,58 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
return nr_pages;
}

+static int
+wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
+ struct address_space *mapping, struct writeback_control *wbc)
+{
+ int rc = 0;
+ struct TCP_Server_Info *server;
+ unsigned int i;
+
+ wdata->sync_mode = wbc->sync_mode;
+ wdata->nr_pages = nr_pages;
+ wdata->offset = page_offset(wdata->pages[0]);
+ wdata->pagesz = PAGE_CACHE_SIZE;
+ wdata->tailsz = min(i_size_read(mapping->host) -
+ page_offset(wdata->pages[nr_pages - 1]),
+ (loff_t)PAGE_CACHE_SIZE);
+ wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+
+ do {
+ if (wdata->cfile != NULL)
+ cifsFileInfo_put(wdata->cfile);
+ wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
+ if (!wdata->cfile) {
+ cifs_dbg(VFS, "No writable handles for inode\n");
+ rc = -EBADF;
+ break;
+ }
+ wdata->pid = wdata->cfile->pid;
+ server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+ rc = server->ops->async_writev(wdata, cifs_writedata_release);
+ } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
+
+ for (i = 0; i < nr_pages; ++i)
+ unlock_page(wdata->pages[i]);
+
+ if (!rc)
+ return rc;
+
+ /* send failure -- clean up the mess */
+ for (i = 0; i < nr_pages; ++i) {
+ if (rc == -EAGAIN)
+ redirty_page_for_writepage(wbc, wdata->pages[i]);
+ else
+ SetPageError(wdata->pages[i]);
+ end_page_writeback(wdata->pages[i]);
+ page_cache_release(wdata->pages[i]);
+ }
+ if (rc != -EAGAIN)
+ mapping_set_error(mapping, rc);
+
+ return rc;
+}
+
static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -1965,7 +2017,6 @@ static int cifs_writepages(struct address_space *mapping,
bool done = false, scanned = false, range_whole = false;
pgoff_t end, index;
struct cifs_writedata *wdata;
- struct TCP_Server_Info *server;
int rc = 0;

/*
@@ -2032,50 +2083,7 @@ retry:
continue;
}

- wdata->sync_mode = wbc->sync_mode;
- wdata->nr_pages = nr_pages;
- wdata->offset = page_offset(wdata->pages[0]);
- wdata->pagesz = PAGE_CACHE_SIZE;
- wdata->tailsz =
- min(i_size_read(mapping->host) -
- page_offset(wdata->pages[nr_pages - 1]),
- (loff_t)PAGE_CACHE_SIZE);
- wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
- wdata->tailsz;
-
- do {
- if (wdata->cfile != NULL)
- cifsFileInfo_put(wdata->cfile);
- wdata->cfile = find_writable_file(CIFS_I(mapping->host),
- false);
- if (!wdata->cfile) {
- cifs_dbg(VFS, "No writable handles for inode\n");
- rc = -EBADF;
- break;
- }
- wdata->pid = wdata->cfile->pid;
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
- rc = server->ops->async_writev(wdata,
- cifs_writedata_release);
- } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
-
- for (i = 0; i < nr_pages; ++i)
- unlock_page(wdata->pages[i]);
-
- /* send failure -- clean up the mess */
- if (rc != 0) {
- for (i = 0; i < nr_pages; ++i) {
- if (rc == -EAGAIN)
- redirty_page_for_writepage(wbc,
- wdata->pages[i]);
- else
- SetPageError(wdata->pages[i]);
- end_page_writeback(wdata->pages[i]);
- page_cache_release(wdata->pages[i]);
- }
- if (rc != -EAGAIN)
- mapping_set_error(mapping, rc);
- }
+ rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
kref_put(&wdata->refcount, cifs_writedata_release);

wbc->nr_to_write -= nr_pages;
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:32 UTC
Permalink
If a server change maximum buffer size for write (wsize) requests
on reconnect we can fail on repeating with a big size buffer on
-EAGAIN error in writepages. Fix this by checking wsize all the
time before repeating request in writepages.

Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2c7fbd7..e9dc57a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2009,19 +2009,17 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
(loff_t)PAGE_CACHE_SIZE);
wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;

- do {
- if (wdata->cfile != NULL)
- cifsFileInfo_put(wdata->cfile);
- wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
- if (!wdata->cfile) {
- cifs_dbg(VFS, "No writable handles for inode\n");
- rc = -EBADF;
- break;
- }
+ if (wdata->cfile != NULL)
+ cifsFileInfo_put(wdata->cfile);
+ wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
+ if (!wdata->cfile) {
+ cifs_dbg(VFS, "No writable handles for inode\n");
+ rc = -EBADF;
+ } else {
wdata->pid = wdata->cfile->pid;
server = tlink_tcon(wdata->cfile->tlink)->ses->server;
rc = server->ops->async_writev(wdata, cifs_writedata_release);
- } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
+ }

for (i = 0; i < nr_pages; ++i)
unlock_page(wdata->pages[i]);
@@ -2073,7 +2071,7 @@ static int cifs_writepages(struct address_space *mapping,
retry:
while (!done && index <= end) {
unsigned int nr_pages, found_pages;
- pgoff_t next = 0, tofind;
+ pgoff_t next = 0, tofind, saved_index = index;

tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
end - index) + 1;
@@ -2102,6 +2100,11 @@ retry:
rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
kref_put(&wdata->refcount, cifs_writedata_release);

+ if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
+ index = saved_index;
+ continue;
+ }
+
wbc->nr_to_write -= nr_pages;
if (wbc->nr_to_write <= 0)
done = true;
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:36 UTC
Permalink
If a server change maximum buffer size for write (wsize) requests
on reconnect we can fail on repeating with a big size buffer on
-EAGAIN error in iovec write. Fix this by checking wsize all the
time before repeating request in iovec write.

Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 62 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 887c18c..4ba59cd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2401,28 +2401,6 @@ cifs_uncached_writev_complete(struct work_struct *work)
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}

-/* attempt to send write to server, retry on any -EAGAIN errors */
-static int
-cifs_uncached_retry_writev(struct cifs_writedata *wdata)
-{
- int rc;
- struct TCP_Server_Info *server;
-
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
-
- do {
- if (wdata->cfile->invalidHandle) {
- rc = cifs_reopen_file(wdata->cfile, false);
- if (rc != 0)
- continue;
- }
- rc = server->ops->async_writev(wdata,
- cifs_uncached_writedata_release);
- } while (rc == -EAGAIN);
-
- return rc;
-}
-
static int
wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *it,
size_t *len, unsigned long nr_pages)
@@ -2472,23 +2450,28 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *it,

static int
cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,
- unsigned long nr_segs, struct cifsFileInfo *open_file,
+ unsigned long nr_segs, loff_t written,
+ struct cifsFileInfo *open_file,
struct cifs_sb_info *cifs_sb,
struct list_head *wdata_list)
{
int rc = 0;
- size_t cur_len;
+ size_t cur_len, saved_len = len;
unsigned long nr_pages, i;
struct cifs_writedata *wdata;
struct iov_iter it;
+ loff_t saved_offset = offset;
pid_t pid;
+ struct TCP_Server_Info *server;

if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
else
pid = current->tgid;

- iov_iter_init(&it, iov, nr_segs, len, 0);
+ server = tlink_tcon(open_file->tlink)->ses->server;
+
+ iov_iter_init(&it, iov, nr_segs, len, written);
do {
nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
wdata = cifs_writedata_alloc(nr_pages,
@@ -2520,10 +2503,19 @@ cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,
wdata->bytes = cur_len;
wdata->pagesz = PAGE_SIZE;
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
- rc = cifs_uncached_retry_writev(wdata);
+
+ if (!wdata->cfile->invalidHandle ||
+ !cifs_reopen_file(wdata->cfile, false))
+ rc = server->ops->async_writev(wdata,
+ cifs_uncached_writedata_release);
if (rc) {
kref_put(&wdata->refcount,
cifs_uncached_writedata_release);
+ if (rc == -EAGAIN) {
+ iov_iter_init(&it, iov, nr_segs, saved_len,
+ written + offset - saved_offset);
+ continue;
+ }
break;
}

@@ -2564,7 +2556,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
if (!tcon->ses->server->ops->async_writev)
return -ENOSYS;

- rc = cifs_write_from_iovec(*poffset, len, iov, nr_segs, open_file,
+ rc = cifs_write_from_iovec(*poffset, len, iov, nr_segs, 0, open_file,
cifs_sb, &wdata_list);

/*
@@ -2595,7 +2587,21 @@ restart_loop:

/* resend call if it's a retryable error */
if (rc == -EAGAIN) {
- rc = cifs_uncached_retry_writev(wdata);
+ struct list_head tmp_list;
+
+ INIT_LIST_HEAD(&tmp_list);
+ list_del_init(&wdata->list);
+
+ rc = cifs_write_from_iovec(wdata->offset,
+ wdata->bytes, iov, nr_segs,
+ wdata->offset - *poffset, open_file,
+ cifs_sb, &tmp_list);
+
+ if (!list_empty(&tmp_list))
+ list_splice(&tmp_list, &wdata_list);
+
+ kref_put(&wdata->refcount,
+ cifs_uncached_writedata_release);
goto restart_loop;
}
}
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:34 UTC
Permalink
Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 88 ++++++++++++++++++++++++++++++++------------------------
1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9dc57a..89d647b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2423,12 +2423,59 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
return rc;
}

+static int
+wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *it,
+ size_t *len, unsigned long nr_pages)
+{
+ int rc = 0;
+ size_t save_len, copied, bytes, cur_len = *len;
+ unsigned long i;
+
+ save_len = cur_len;
+ for (i = 0; i < nr_pages; i++) {
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
+ copied = iov_iter_copy_from_user(wdata->pages[i], it,
+ 0, bytes);
+ cur_len -= copied;
+ iov_iter_advance(it, copied);
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
+ }
+ cur_len = save_len - cur_len;
+ *len = cur_len;
+
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Return -EFAULT and let
+ * the caller free anything we allocated and bail out.
+ */
+ if (!cur_len)
+ return -EFAULT;
+
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above. Bring nr_pages down to that, and free
+ * any pages that we didn't use.
+ */
+ for ( ; nr_pages > i + 1; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+ return rc;
+}
+
static ssize_t
cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *poffset)
{
unsigned long nr_pages, i;
- size_t bytes, copied, len, cur_len;
+ size_t len, cur_len;
ssize_t total_written = 0;
loff_t offset;
struct iov_iter it;
@@ -2465,8 +2512,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,

iov_iter_init(&it, iov, nr_segs, len, 0);
do {
- size_t save_len;
-
nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
wdata = cifs_writedata_alloc(nr_pages,
cifs_uncached_writev_complete);
@@ -2481,47 +2526,14 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
break;
}

- save_len = cur_len;
- for (i = 0; i < nr_pages; i++) {
- bytes = min_t(const size_t, cur_len, PAGE_SIZE);
- copied = iov_iter_copy_from_user(wdata->pages[i], &it,
- 0, bytes);
- cur_len -= copied;
- iov_iter_advance(&it, copied);
- /*
- * If we didn't copy as much as we expected, then that
- * may mean we trod into an unmapped area. Stop copying
- * at that point. On the next pass through the big
- * loop, we'll likely end up getting a zero-length
- * write and bailing out of it.
- */
- if (copied < bytes)
- break;
- }
- cur_len = save_len - cur_len;
-
- /*
- * If we have no data to send, then that probably means that
- * the copy above failed altogether. That's most likely because
- * the address in the iovec was bogus. Set the rc to -EFAULT,
- * free anything we allocated and bail out.
- */
- if (!cur_len) {
+ rc = wdata_fill_from_iovec(wdata, &it, &cur_len, nr_pages);
+ if (rc) {
for (i = 0; i < nr_pages; i++)
put_page(wdata->pages[i]);
kfree(wdata);
- rc = -EFAULT;
break;
}

- /*
- * i + 1 now represents the number of pages we actually used in
- * the copy phase above. Bring nr_pages down to that, and free
- * any pages that we didn't use.
- */
- for ( ; nr_pages > i + 1; nr_pages--)
- put_page(wdata->pages[nr_pages - 1]);
-
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
wdata->offset = (__u64)offset;
--
1.7.10.4
Steve French
2014-06-26 05:46:23 UTC
Permalink
FYI - merge conflict in current cifs-2.6.git for-next (and current
mainline kernel) due to Al Viro's read_iter and write_iter changes.

Presumably due to changeset
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/cifs?id=16b9057804c02e2d351e9c8f606e909b43cbd9e7

Would you rebase on current cifs-2.6.git or current mainline from this patch on?
Post by Pavel Shilovsky
---
fs/cifs/file.c | 88 ++++++++++++++++++++++++++++++++------------------------
1 file changed, 50 insertions(+), 38 deletions(-)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9dc57a..89d647b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2423,12 +2423,59 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
return rc;
}
+static int
+wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *it,
+ size_t *len, unsigned long nr_pages)
+{
+ int rc = 0;
+ size_t save_len, copied, bytes, cur_len = *len;
+ unsigned long i;
+
+ save_len = cur_len;
+ for (i = 0; i < nr_pages; i++) {
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
+ copied = iov_iter_copy_from_user(wdata->pages[i], it,
+ 0, bytes);
+ cur_len -= copied;
+ iov_iter_advance(it, copied);
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
+ }
+ cur_len = save_len - cur_len;
+ *len = cur_len;
+
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Return -EFAULT and let
+ * the caller free anything we allocated and bail out.
+ */
+ if (!cur_len)
+ return -EFAULT;
+
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above. Bring nr_pages down to that, and free
+ * any pages that we didn't use.
+ */
+ for ( ; nr_pages > i + 1; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+ return rc;
+}
+
static ssize_t
cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *poffset)
{
unsigned long nr_pages, i;
- size_t bytes, copied, len, cur_len;
+ size_t len, cur_len;
ssize_t total_written = 0;
loff_t offset;
struct iov_iter it;
@@ -2465,8 +2512,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
iov_iter_init(&it, iov, nr_segs, len, 0);
do {
- size_t save_len;
-
nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
wdata = cifs_writedata_alloc(nr_pages,
cifs_uncached_writev_complete);
@@ -2481,47 +2526,14 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
break;
}
- save_len = cur_len;
- for (i = 0; i < nr_pages; i++) {
- bytes = min_t(const size_t, cur_len, PAGE_SIZE);
- copied = iov_iter_copy_from_user(wdata->pages[i], &it,
- 0, bytes);
- cur_len -= copied;
- iov_iter_advance(&it, copied);
- /*
- * If we didn't copy as much as we expected, then that
- * may mean we trod into an unmapped area. Stop copying
- * at that point. On the next pass through the big
- * loop, we'll likely end up getting a zero-length
- * write and bailing out of it.
- */
- if (copied < bytes)
- break;
- }
- cur_len = save_len - cur_len;
-
- /*
- * If we have no data to send, then that probably means that
- * the copy above failed altogether. That's most likely because
- * the address in the iovec was bogus. Set the rc to -EFAULT,
- * free anything we allocated and bail out.
- */
- if (!cur_len) {
+ rc = wdata_fill_from_iovec(wdata, &it, &cur_len, nr_pages);
+ if (rc) {
for (i = 0; i < nr_pages; i++)
put_page(wdata->pages[i]);
kfree(wdata);
- rc = -EFAULT;
break;
}
- /*
- * i + 1 now represents the number of pages we actually used in
- * the copy phase above. Bring nr_pages down to that, and free
- * any pages that we didn't use.
- */
- for ( ; nr_pages > i + 1; nr_pages--)
- put_page(wdata->pages[nr_pages - 1]);
-
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
wdata->offset = (__u64)offset;
--
1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Thanks,

Steve
Pavel Shilovsky
2014-06-26 09:33:13 UTC
Permalink
Post by Steve French
FYI - merge conflict in current cifs-2.6.git for-next (and current
mainline kernel) due to Al Viro's read_iter and write_iter changes.
Presumably due to changeset
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/cifs?id=16b9057804c02e2d351e9c8f606e909b43cbd9e7
Would you rebase on current cifs-2.6.git or current mainline from this patch on?
Yes, I started to do it but noticed that the existing code (without my
patches) has data coherency problem on reconnect during reading when
mounting with cache=none. Now, I am investigating this.
--
Best regards,
Pavel Shilovsky.
Pavel Shilovsky
2014-06-26 12:39:37 UTC
Permalink
Post by Pavel Shilovsky
Post by Steve French
FYI - merge conflict in current cifs-2.6.git for-next (and current
mainline kernel) due to Al Viro's read_iter and write_iter changes.
Presumably due to changeset
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/cifs?id=16b9057804c02e2d351e9c8f606e909b43cbd9e7
Would you rebase on current cifs-2.6.git or current mainline from this patch on?
Yes, I started to do it but noticed that the existing code (without my
patches) has data coherency problem on reconnect during reading when
mounting with cache=none. Now, I am investigating this.
Figured it out:

started to receive read data (16 iovs are expected):
Jun 26 14:46:03 adventure kernel: [11874.820889]
/home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: cifs_readv_receive:
mid=4938 offset=25308416 bytes=65536
Jun 26 14:46:03 adventure kernel: [11874.820893]
/home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: cifs_readv_receive:
total_read=84 data_offset=84
Jun 26 14:46:03 adventure kernel: [11874.820895]
/home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: 0: iov_base=f43e5340
iov_len=84
Jun 26 14:46:03 adventure kernel: [11874.820898]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 0: iov_base=ffd10000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.821739]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 1: iov_base=ffd11000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.821745]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 2: iov_base=ffd12000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.821750]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 3: iov_base=ffd13000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.823222]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 4: iov_base=ffd14000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.823229]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 5: iov_base=ffd15000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.850629]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 6: iov_base=ffd16000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.850647]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 7: iov_base=ffd17000
iov_len=4096
Jun 26 14:46:03 adventure kernel: [11874.850653]
/home/piastry/git/cifs-2.6/fs/cifs/file.c: 8: iov_base=ffd18000
iov_len=4096

stop the link here (got only 8 iovs) ...

Jun 26 14:47:26 adventure kernel: [11957.724068]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: In echo request
Jun 26 14:47:26 adventure kernel: [11957.724078]
/home/piastry/git/cifs-2.6/fs/cifs/transport.c: Sending smb:
smb_len=68
Jun 26 14:48:09 adventure kernel: [12000.850119] CIFS VFS: Server
192.168.1.34 has not responded in 120 seconds. Reconnecting...
Jun 26 14:48:09 adventure kernel: [12000.850125]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Reconnecting tcp session
Jun 26 14:48:09 adventure kernel: [12000.850128]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: marking
sessions and tcons for reconnect
Jun 26 14:48:09 adventure kernel: [12000.850131]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: tearing
down socket
Jun 26 14:48:09 adventure kernel: [12000.850133]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: State: 0x3 Flags: 0x0
Jun 26 14:48:09 adventure kernel: [12000.850186]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Post shutdown state: 0x3
Flags: 0x0
Jun 26 14:48:09 adventure kernel: [12000.850195]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: moving
mids to private list
Jun 26 14:48:09 adventure kernel: [12000.850197]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: issuing
mid callbacks
Jun 26 14:48:09 adventure kernel: [12000.850219]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: smb2_readv_callback:
mid=4938 state=8 result=0 bytes=65536

state=8 means -EAGAIN error that is set to rdata->result

Jun 26 14:49:09 adventure kernel: [12060.888067]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Socket created
Jun 26 14:49:09 adventure kernel: [12060.888074]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: sndbuf 16384 rcvbuf
87380 rcvtimeo 0x6d6
Jun 26 14:49:12 adventure kernel: [12063.898765]
/home/piastry/git/cifs-2.6/fs/cifs/cifssmb.c: total_read=32852
buflen=65620 remaining=65536
Jun 26 14:49:12 adventure kernel: [12063.898811]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: Negotiate protocol
Jun 26 14:49:12 adventure kernel: [12063.898819]
/home/piastry/git/cifs-2.6/fs/cifs/transport.c: Sending smb:
smb_len=102
Jun 26 14:49:47 adventure kernel: [12098.271408]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Received no data or
error: expecting 16052
Jun 26 14:49:47 adventure kernel: [12098.271408] got -104
Jun 26 14:49:47 adventure kernel: [12098.271413]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Reconnecting tcp session
Jun 26 14:49:47 adventure kernel: [12098.271417]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: marking
sessions and tcons for reconnect
Jun 26 14:49:47 adventure kernel: [12098.271420]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: tearing
down socket
Jun 26 14:49:47 adventure kernel: [12098.271422]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: State: 0x3 Flags: 0x0
Jun 26 14:49:47 adventure kernel: [12098.271425]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Post shutdown state: 0x3
Flags: 0x0
Jun 26 14:49:47 adventure kernel: [12098.271436]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: moving
mids to private list
Jun 26 14:49:47 adventure kernel: [12098.271439]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: cifs_reconnect: issuing
mid callbacks
Jun 26 14:49:47 adventure kernel: [12098.271456]
/home/piastry/git/cifs-2.6/fs/cifs/transport.c: cifs_sync_mid_result:
cmd=0 mid=0 state=8
Jun 26 14:49:47 adventure kernel: [12098.271458]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: Socket created
Jun 26 14:49:47 adventure kernel: [12098.271461]
/home/piastry/git/cifs-2.6/fs/cifs/connect.c: sndbuf 16384 rcvbuf
87380 rcvtimeo 0x6d6
Jun 26 14:49:47 adventure kernel: [12098.271465]
/home/piastry/git/cifs-2.6/fs/cifs/misc.c: Null buffer passed to
cifs_small_buf_release
Jun 26 14:49:47 adventure kernel: [12098.271491]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: smb2_async_readv:
offset=25439488 bytes=65536
Jun 26 14:49:57 adventure kernel: [12108.268065]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: Negotiate protocol
Jun 26 14:49:57 adventure kernel: [12108.268075]
/home/piastry/git/cifs-2.6/fs/cifs/transport.c: Sending smb:
smb_len=102

Then connection is establed and we resend read request ...

Jun 26 14:49:58 adventure kernel: [12108.958891]
/home/piastry/git/cifs-2.6/fs/cifs/smb2pdu.c: smb2_async_readv:
offset=25308416 bytes=32768 (!!!!!!!!!!!!!!!!!!!!)
Jun 26 14:49:58 adventure kernel: [12108.958897]
/home/piastry/git/cifs-2.6/fs/cifs/transport.c: Sending smb:
smb_len=113

... with wrong length (32768 instead of 65536 as it was before). The
rdata->bytes field is set to this value here:
1515 >-------length = rdata->read_into_pages(server, rdata, data_len);
1516 >-------if (length < 0)
1517 >------->-------return length;
1518
1519 >-------server->total_read += length;
1520 >-------rdata->bytes = length;

So, we got short read, update rdata->bytes and then retry the request
with this new length value. I suggest we need to add a check like this
(tested and ok):

if (rdata->result != -EAGAIN)
rdata->bytes = length;

Thoughts?
--
Best regards,
Pavel Shilovsky.
Pavel Shilovsky
2014-06-23 14:58:35 UTC
Permalink
Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/file.c | 75 +++++++++++++++++++++++++++++++++-----------------------
1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 89d647b..887c18c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2470,41 +2470,19 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *it,
return rc;
}

-static ssize_t
-cifs_iovec_write(struct file *file, const struct iovec *iov,
- unsigned long nr_segs, loff_t *poffset)
+static int
+cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,
+ unsigned long nr_segs, struct cifsFileInfo *open_file,
+ struct cifs_sb_info *cifs_sb,
+ struct list_head *wdata_list)
{
+ int rc = 0;
+ size_t cur_len;
unsigned long nr_pages, i;
- size_t len, cur_len;
- ssize_t total_written = 0;
- loff_t offset;
+ struct cifs_writedata *wdata;
struct iov_iter it;
- struct cifsFileInfo *open_file;
- struct cifs_tcon *tcon;
- struct cifs_sb_info *cifs_sb;
- struct cifs_writedata *wdata, *tmp;
- struct list_head wdata_list;
- int rc;
pid_t pid;

- len = iov_length(iov, nr_segs);
- if (!len)
- return 0;
-
- rc = generic_write_checks(file, poffset, &len, 0);
- if (rc)
- return rc;
-
- INIT_LIST_HEAD(&wdata_list);
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- open_file = file->private_data;
- tcon = tlink_tcon(open_file->tlink);
-
- if (!tcon->ses->server->ops->async_writev)
- return -ENOSYS;
-
- offset = *poffset;
-
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
else
@@ -2549,11 +2527,46 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
break;
}

- list_add_tail(&wdata->list, &wdata_list);
+ list_add_tail(&wdata->list, wdata_list);
offset += cur_len;
len -= cur_len;
} while (len > 0);

+ return rc;
+}
+
+static ssize_t
+cifs_iovec_write(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *poffset)
+{
+ size_t len;
+ ssize_t total_written = 0;
+ struct cifsFileInfo *open_file;
+ struct cifs_tcon *tcon;
+ struct cifs_sb_info *cifs_sb;
+ struct cifs_writedata *wdata, *tmp;
+ struct list_head wdata_list;
+ int rc;
+
+ len = iov_length(iov, nr_segs);
+ if (!len)
+ return 0;
+
+ rc = generic_write_checks(file, poffset, &len, 0);
+ if (rc)
+ return rc;
+
+ INIT_LIST_HEAD(&wdata_list);
+ cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+ open_file = file->private_data;
+ tcon = tlink_tcon(open_file->tlink);
+
+ if (!tcon->ses->server->ops->async_writev)
+ return -ENOSYS;
+
+ rc = cifs_write_from_iovec(*poffset, len, iov, nr_segs, open_file,
+ cifs_sb, &wdata_list);
+
/*
* If at least one write was successfully sent, then discard any rc
* value from the later writes. If the other write succeeds, then
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:33 UTC
Permalink
If wsize changes on reconnect we need to use new writedata structure
that for retrying.

Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/cifsglob.h | 2 ++
fs/cifs/cifssmb.c | 84 ++++++++++++++++++++++++++++++++++++++++++----------
fs/cifs/smb1ops.c | 7 +++++
fs/cifs/smb2ops.c | 10 +++++++
4 files changed, 87 insertions(+), 16 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de6aed8..8c53f20 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -404,6 +404,8 @@ struct smb_version_operations {
const struct cifs_fid *, u32 *);
int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
int);
+ /* writepages retry size */
+ unsigned int (*wp_retry_size)(struct inode *);
};

struct smb_version_values {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ce4e09..610f9cf 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1902,27 +1902,79 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
int i, rc;
struct inode *inode = wdata->cfile->dentry->d_inode;
struct TCP_Server_Info *server;
+ unsigned int rest_len;

- for (i = 0; i < wdata->nr_pages; i++) {
- lock_page(wdata->pages[i]);
- clear_page_dirty_for_io(wdata->pages[i]);
- }
-
+ server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+ i = 0;
+ rest_len = wdata->bytes;
do {
- server = tlink_tcon(wdata->cfile->tlink)->ses->server;
- rc = server->ops->async_writev(wdata, cifs_writedata_release);
- } while (rc == -EAGAIN);
+ struct cifs_writedata *wdata2;
+ unsigned int j, nr_pages, wsize, tailsz, cur_len;
+
+ wsize = server->ops->wp_retry_size(inode);
+ if (wsize < rest_len) {
+ nr_pages = wsize / PAGE_CACHE_SIZE;
+ if (!nr_pages) {
+ rc = -ENOTSUPP;
+ break;
+ }
+ cur_len = nr_pages * PAGE_CACHE_SIZE;
+ tailsz = PAGE_CACHE_SIZE;
+ } else {
+ nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
+ cur_len = rest_len;
+ tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
+ }

- for (i = 0; i < wdata->nr_pages; i++) {
- unlock_page(wdata->pages[i]);
- if (rc != 0) {
- SetPageError(wdata->pages[i]);
- end_page_writeback(wdata->pages[i]);
- page_cache_release(wdata->pages[i]);
+ wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
+ if (!wdata2) {
+ rc = -ENOMEM;
+ break;
}
- }

- mapping_set_error(inode->i_mapping, rc);
+ for (j = 0; j < nr_pages; j++) {
+ wdata2->pages[j] = wdata->pages[i + j];
+ lock_page(wdata2->pages[j]);
+ clear_page_dirty_for_io(wdata2->pages[j]);
+ }
+
+ wdata2->sync_mode = wdata->sync_mode;
+ wdata2->nr_pages = nr_pages;
+ wdata2->offset = page_offset(wdata2->pages[0]);
+ wdata2->pagesz = PAGE_CACHE_SIZE;
+ wdata2->tailsz = tailsz;
+ wdata2->bytes = cur_len;
+
+ wdata2->cfile = find_writable_file(CIFS_I(inode), false);
+ if (!wdata2->cfile) {
+ cifs_dbg(VFS, "No writable handles for inode\n");
+ rc = -EBADF;
+ break;
+ }
+ wdata2->pid = wdata2->cfile->pid;
+ rc = server->ops->async_writev(wdata2, cifs_writedata_release);
+
+ for (j = 0; j < nr_pages; j++) {
+ unlock_page(wdata2->pages[j]);
+ if (rc != 0 && rc != -EAGAIN) {
+ SetPageError(wdata2->pages[j]);
+ end_page_writeback(wdata2->pages[j]);
+ page_cache_release(wdata2->pages[j]);
+ }
+ }
+
+ if (rc) {
+ kref_put(&wdata2->refcount, cifs_writedata_release);
+ if (rc == -EAGAIN)
+ continue;
+ mapping_set_error(inode->i_mapping, rc);
+ break;
+ }
+
+ rest_len -= cur_len;
+ i += nr_pages;
+ } while (i < wdata->nr_pages);
+
kref_put(&wdata->refcount, cifs_writedata_release);
}

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d1fdfa8..8a96342 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock)
return oplock == OPLOCK_READ;
}

+static unsigned int
+cifs_wp_retry_size(struct inode *inode)
+{
+ return CIFS_SB(inode->i_sb)->wsize;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1078,6 +1084,7 @@ struct smb_version_operations smb1_operations = {
.query_mf_symlink = cifs_query_mf_symlink,
.create_mf_symlink = cifs_create_mf_symlink,
.is_read_op = cifs_is_read_op,
+ .wp_retry_size = cifs_wp_retry_size,
#ifdef CONFIG_CIFS_XATTR
.query_all_EAs = CIFSSMBQAllEAs,
.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 787844b..e35ce5b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1104,6 +1104,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch)
return le32_to_cpu(lc->lcontext.LeaseState);
}

+static unsigned int
+smb2_wp_retry_size(struct inode *inode)
+{
+ return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
+ SMB2_MAX_BUFFER_SIZE);
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -1177,6 +1184,7 @@ struct smb_version_operations smb20_operations = {
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
+ .wp_retry_size = smb2_wp_retry_size,
};

struct smb_version_operations smb21_operations = {
@@ -1252,6 +1260,7 @@ struct smb_version_operations smb21_operations = {
.create_lease_buf = smb2_create_lease_buf,
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
+ .wp_retry_size = smb2_wp_retry_size,
};

struct smb_version_operations smb30_operations = {
@@ -1330,6 +1339,7 @@ struct smb_version_operations smb30_operations = {
.parse_lease_buf = smb3_parse_lease_buf,
.clone_range = smb2_clone_range,
.validate_negotiate = smb3_validate_negotiate,
+ .wp_retry_size = smb2_wp_retry_size,
};

struct smb_version_values smb20_values = {
--
1.7.10.4
Pavel Shilovsky
2014-06-23 14:58:37 UTC
Permalink
If we negotiate SMB2.1 and higher version of the protocol and
a server supports large write buffer size, we need to consume 1
credit per 65536 bytes. So, we need to know how many credits
we have and obtain the required number of them before constructing
a writedata structure in writepages and iovec write.

Signed-off-by: Pavel Shilovsky <pshilovsky-***@public.gmane.org>
---
fs/cifs/cifsglob.h | 15 ++++++++++++++
fs/cifs/cifsproto.h | 3 +++
fs/cifs/file.c | 34 +++++++++++++++++++++++++++----
fs/cifs/smb1ops.c | 1 +
fs/cifs/smb2ops.c | 52 +++++++++++++++++++++++++++++++++++++++++++++--
fs/cifs/smb2pdu.c | 24 ++++++++++++++++++----
fs/cifs/smb2transport.c | 5 +++++
fs/cifs/transport.c | 25 ++++++++++++++++-------
8 files changed, 142 insertions(+), 17 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8c53f20..95d6867 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -406,6 +406,9 @@ struct smb_version_operations {
int);
/* writepages retry size */
unsigned int (*wp_retry_size)(struct inode *);
+ /* get mtu credits */
+ int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
+ unsigned int *, unsigned int *);
};

struct smb_version_values {
@@ -642,6 +645,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add,
}

static inline void
+add_credits_and_wake(struct TCP_Server_Info *server, const unsigned int add,
+ const int optype)
+{
+ if (add) {
+ server->ops->add_credits(server, add, optype);
+ wake_up(&server->request_q);
+ }
+}
+
+static inline void
set_credits(struct TCP_Server_Info *server, const int val)
{
server->ops->set_credits(server, val);
@@ -1075,6 +1088,7 @@ struct cifs_writedata {
int result;
unsigned int pagesz;
unsigned int tailsz;
+ unsigned int credits;
unsigned int nr_pages;
struct page *pages[];
};
@@ -1400,6 +1414,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
#define CIFS_NEG_OP 0x0200 /* negotiate request */
#define CIFS_OP_MASK 0x0380 /* mask request type */
+#define CIFS_HAS_CREDITS 0x0400 /* already has credits */

/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ca7980a..009e5cb 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -89,6 +89,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
struct smb_rqst *);
extern int cifs_check_receive(struct mid_q_entry *mid,
struct TCP_Server_Info *server, bool log_error);
+extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
+ unsigned int size, unsigned int *num,
+ unsigned int *credits);
extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
int * /* type of buf returned */ , const int flags);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4ba59cd..d8ff8b6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2046,6 +2046,7 @@ static int cifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
+ struct TCP_Server_Info *server;
bool done = false, scanned = false, range_whole = false;
pgoff_t end, index;
struct cifs_writedata *wdata;
@@ -2068,23 +2069,30 @@ static int cifs_writepages(struct address_space *mapping,
range_whole = true;
scanned = true;
}
+ server = cifs_sb_master_tcon(cifs_sb)->ses->server;
retry:
while (!done && index <= end) {
- unsigned int nr_pages, found_pages;
+ unsigned int nr_pages, found_pages, wsize, credits;
pgoff_t next = 0, tofind, saved_index = index;

- tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
- end - index) + 1;
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+ &wsize, &credits);
+ if (rc)
+ break;
+
+ tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;

wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
&found_pages);
if (!wdata) {
rc = -ENOMEM;
+ add_credits_and_wake(server, credits, 0);
break;
}

if (found_pages == 0) {
kref_put(&wdata->refcount, cifs_writedata_release);
+ add_credits_and_wake(server, credits, 0);
break;
}

@@ -2094,10 +2102,16 @@ retry:
/* nothing to write? */
if (nr_pages == 0) {
kref_put(&wdata->refcount, cifs_writedata_release);
+ add_credits_and_wake(server, credits, 0);
continue;
}

+ wdata->credits = credits;
+
rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
+ if (rc)
+ add_credits_and_wake(server, wdata->credits, 0);
+
kref_put(&wdata->refcount, cifs_writedata_release);

if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
@@ -2473,17 +2487,26 @@ cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,

iov_iter_init(&it, iov, nr_segs, len, written);
do {
- nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+ unsigned int wsize, credits;
+
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+ &wsize, &credits);
+ if (rc)
+ break;
+
+ nr_pages = get_numpages(wsize, len, &cur_len);
wdata = cifs_writedata_alloc(nr_pages,
cifs_uncached_writev_complete);
if (!wdata) {
rc = -ENOMEM;
+ add_credits_and_wake(server, credits, 0);
break;
}

rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
if (rc) {
kfree(wdata);
+ add_credits_and_wake(server, credits, 0);
break;
}

@@ -2492,6 +2515,7 @@ cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,
for (i = 0; i < nr_pages; i++)
put_page(wdata->pages[i]);
kfree(wdata);
+ add_credits_and_wake(server, credits, 0);
break;
}

@@ -2503,12 +2527,14 @@ cifs_write_from_iovec(loff_t offset, size_t len, const struct iovec *iov,
wdata->bytes = cur_len;
wdata->pagesz = PAGE_SIZE;
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+ wdata->credits = credits;

if (!wdata->cfile->invalidHandle ||
!cifs_reopen_file(wdata->cfile, false))
rc = server->ops->async_writev(wdata,
cifs_uncached_writedata_release);
if (rc) {
+ add_credits_and_wake(server, wdata->credits, 0);
kref_put(&wdata->refcount,
cifs_uncached_writedata_release);
if (rc == -EAGAIN) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 8a96342..5e8c22d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1025,6 +1025,7 @@ struct smb_version_operations smb1_operations = {
.set_credits = cifs_set_credits,
.get_credits_field = cifs_get_credits_field,
.get_credits = cifs_get_credits,
+ .wait_mtu_credits = cifs_wait_mtu_credits,
.get_next_mid = cifs_get_next_mid,
.read_data_offset = cifs_read_data_offset,
.read_data_length = cifs_read_data_length,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e35ce5b..fecc2de 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -112,6 +112,53 @@ smb2_get_credits(struct mid_q_entry *mid)
return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest);
}

+static int
+smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
+ unsigned int *num, unsigned int *credits)
+{
+ int rc = 0;
+ unsigned int scredits;
+
+ spin_lock(&server->req_lock);
+ while (1) {
+ if (server->credits <= 0) {
+ spin_unlock(&server->req_lock);
+ cifs_num_waiters_inc(server);
+ rc = wait_event_killable(server->request_q,
+ has_credits(server, &server->credits));
+ cifs_num_waiters_dec(server);
+ if (rc)
+ return rc;
+ spin_lock(&server->req_lock);
+ } else {
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&server->req_lock);
+ return -ENOENT;
+ }
+
+ scredits = server->credits;
+ /* can deadlock with reopen */
+ if (scredits == 1) {
+ *num = SMB2_MAX_BUFFER_SIZE;
+ *credits = 0;
+ break;
+ }
+
+ /* leave one credit for a possible reopen */
+ scredits--;
+ *num = min_t(unsigned int, size,
+ scredits * SMB2_MAX_BUFFER_SIZE);
+
+ *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
+ server->credits -= *credits;
+ server->in_flight++;
+ break;
+ }
+ }
+ spin_unlock(&server->req_lock);
+ return rc;
+}
+
static __u64
smb2_get_next_mid(struct TCP_Server_Info *server)
{
@@ -182,8 +229,6 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
- /* set it to the maximum buffer size value we can send with 1 credit */
- wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);

return wsize;
}
@@ -1120,6 +1165,7 @@ struct smb_version_operations smb20_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = cifs_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
@@ -1196,6 +1242,7 @@ struct smb_version_operations smb21_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
@@ -1272,6 +1319,7 @@ struct smb_version_operations smb30_operations = {
.set_credits = smb2_set_credits,
.get_credits_field = smb2_get_credits_field,
.get_credits = smb2_get_credits,
+ .wait_mtu_credits = smb2_wait_mtu_credits,
.get_next_mid = smb2_get_next_mid,
.read_data_offset = smb2_read_data_offset,
.read_data_length = smb2_read_data_length,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0b260d..56bf028 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1906,15 +1906,20 @@ int
smb2_async_writev(struct cifs_writedata *wdata,
void (*release)(struct kref *kref))
{
- int rc = -EACCES;
+ int rc = -EACCES, flags = 0;
struct smb2_write_req *req = NULL;
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
+ struct TCP_Server_Info *server = tcon->ses->server;
struct kvec iov;
struct smb_rqst rqst;

rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
- if (rc)
+ if (rc) {
+ if (rc == -EAGAIN && wdata->credits)
+ /* credits was reseted by reconnect */
+ wdata->credits = 0;
goto async_writev_out;
+ }

req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);

@@ -1947,9 +1952,20 @@ smb2_async_writev(struct cifs_writedata *wdata,

inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);

+ if (wdata->credits) {
+ req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
+ SMB2_MAX_BUFFER_SIZE));
+ spin_lock(&server->req_lock);
+ server->credits += wdata->credits -
+ le16_to_cpu(req->hdr.CreditCharge);
+ spin_unlock(&server->req_lock);
+ wake_up(&server->request_q);
+ flags = CIFS_HAS_CREDITS;
+ }
+
kref_get(&wdata->refcount);
- rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
- smb2_writev_callback, wdata, 0);
+ rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata,
+ flags);

if (rc) {
kref_put(&wdata->refcount, release);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 59c748c..5111e72 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
static inline void
smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
{
+ unsigned int i, num = le16_to_cpu(hdr->CreditCharge);
+
hdr->MessageId = get_next_mid64(server);
+ /* skip message numbers according to CreditCharge field */
+ for (i = 1; i < num; i++)
+ get_next_mid(server);
}

static struct mid_q_entry *
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 18cd565..0b5d5fc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
return wait_for_free_credits(server, timeout, val);
}

+int
+cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
+ unsigned int *num, unsigned int *credits)
+{
+ *num = size;
+ *credits = 0;
+ return 0;
+}
+
static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
@@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
{
int rc, timeout, optype;
struct mid_q_entry *mid;
+ unsigned int credits = 0;

timeout = flags & CIFS_TIMEOUT_MASK;
optype = flags & CIFS_OP_MASK;

- rc = wait_for_free_request(server, timeout, optype);
- if (rc)
- return rc;
+ if ((flags & CIFS_HAS_CREDITS) == 0) {
+ rc = wait_for_free_request(server, timeout, optype);
+ if (rc)
+ return rc;
+ credits = 1;
+ }

mutex_lock(&server->srv_mutex);
mid = server->ops->setup_async_request(server, rqst);
if (IS_ERR(mid)) {
mutex_unlock(&server->srv_mutex);
- add_credits(server, 1, optype);
- wake_up(&server->request_q);
+ add_credits_and_wake(server, credits, optype);
return PTR_ERR(mid);
}

@@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
return 0;

cifs_delete_mid(mid);
- add_credits(server, 1, optype);
- wake_up(&server->request_q);
+ add_credits_and_wake(server, credits, optype);
return rc;
}
--
1.7.10.4
Steve French
2014-06-26 05:52:27 UTC
Permalink
Partially merged (first 5 ie those which merged cleanly) into
cifs-2.6.git for-next (for further testing).
Post by Pavel Shilovsky
The goal of this patchset is to add large MTU/multicredits support for writing from the CIFS client to servers that support SMB 2.1 and higher versions of the protocol.
First three patches simplify the writepages code. Patches #4 and #5 both fix the problem in writepages when a server changes wsize value after reconnects from the client. Patches #6 and #7 simplify the iovec write code, #8 fixes the wsize changing problem there. Finally, patch #9 introduces multicredits support for writing.
CIFS: Separate page processing from writepages
CIFS: Separate page sending from writepages
CIFS: Separate pages initialization from writepages
CIFS: Fix wsize usage in writepages
CIFS: Fix cifs_writev_requeue when wsize changes
CIFS: Separate filling pages from iovec write
CIFS: Separate writing from iovec write
CIFS: Fix wsize usage in iovec write
CIFS: Use multicredits for writes
fs/cifs/cifsglob.h | 17 ++
fs/cifs/cifsproto.h | 3 +
fs/cifs/cifssmb.c | 84 ++++++--
fs/cifs/file.c | 540 ++++++++++++++++++++++++++++-------------------
fs/cifs/smb1ops.c | 8 +
fs/cifs/smb2ops.c | 62 +++++-
fs/cifs/smb2pdu.c | 24 ++-
fs/cifs/smb2transport.c | 5 +
fs/cifs/transport.c | 25 ++-
9 files changed, 517 insertions(+), 251 deletions(-)
--
1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Thanks,

Steve
Loading...