Skip to content

Commit

Permalink
staging: lustre: osc: revise unstable pages accounting
Browse files Browse the repository at this point in the history
A few changes are made in this patch for unstable pages tracking:

1. Remove kernel NFS unstable pages tracking because it killed
   performance
2. Track unstable pages as part of LRU cache. Otherwise Lustre
   can use much more memory than max_cached_mb
3. Remove obd_unstable_pages tracking to avoid using global
   atomic counter
4. Make unstable pages track optional. Tracking unstable pages is
   turned off by default, and can be controlled by
   llite.*.unstable_stats.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4841
Reviewed-on: http://review.whamcloud.com/10003
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Jinshan Xiong authored and Greg Kroah-Hartman committed Aug 21, 2016
1 parent 96c5336 commit d806f30
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 145 deletions.
35 changes: 24 additions & 11 deletions drivers/staging/lustre/lustre/include/cl_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -1039,23 +1039,32 @@ do { \
} \
} while (0)

static inline int __page_in_use(const struct cl_page *page, int refc)
{
if (page->cp_type == CPT_CACHEABLE)
++refc;
LASSERT(atomic_read(&page->cp_ref) > 0);
return (atomic_read(&page->cp_ref) > refc);
}

#define cl_page_in_use(pg) __page_in_use(pg, 1)
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)

static inline struct page *cl_page_vmpage(struct cl_page *page)
{
LASSERT(page->cp_vmpage);
return page->cp_vmpage;
}

/**
* Check if a cl_page is in use.
*
* Client cache holds a refcount, this refcount will be dropped when
* the page is taken out of cache, see vvp_page_delete().
*/
static inline bool __page_in_use(const struct cl_page *page, int refc)
{
return (atomic_read(&page->cp_ref) > refc + 1);
}

/**
* Caller itself holds a refcount of cl_page.
*/
#define cl_page_in_use(pg) __page_in_use(pg, 1)
/**
* Caller doesn't hold a refcount.
*/
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)

/** @} cl_page */

/** \addtogroup cl_lock cl_lock
Expand Down Expand Up @@ -2330,6 +2339,10 @@ struct cl_client_cache {
* Lock to protect ccc_lru list
*/
spinlock_t ccc_lru_lock;
/**
* Set if unstable check is enabled
*/
unsigned int ccc_unstable_check:1;
/**
* # of unstable pages for this mount point
*/
Expand Down
1 change: 0 additions & 1 deletion drivers/staging/lustre/lustre/include/obd_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ extern int at_early_margin;
extern int at_extra;
extern unsigned int obd_sync_filter;
extern unsigned int obd_max_dirty_pages;
extern atomic_t obd_unstable_pages;
extern atomic_t obd_dirty_pages;
extern atomic_t obd_dirty_transit_pages;
extern char obd_jobid_var[];
Expand Down
41 changes: 38 additions & 3 deletions drivers/staging/lustre/lustre/llite/lproc_llite.c
Original file line number Diff line number Diff line change
Expand Up @@ -828,10 +828,45 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
pages = atomic_read(&cache->ccc_unstable_nr);
mb = (pages * PAGE_SIZE) >> 20;

return sprintf(buf, "unstable_pages: %8d\n"
"unstable_mb: %8d\n", pages, mb);
return sprintf(buf, "unstable_check: %8d\n"
"unstable_pages: %8d\n"
"unstable_mb: %8d\n",
cache->ccc_unstable_check, pages, mb);
}
LUSTRE_RO_ATTR(unstable_stats);

static ssize_t unstable_stats_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer,
size_t count)
{
struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
ll_kobj);
char kernbuf[128];
int val, rc;

if (!count)
return 0;
if (count < 0 || count >= sizeof(kernbuf))
return -EINVAL;

if (copy_from_user(kernbuf, buffer, count))
return -EFAULT;
kernbuf[count] = 0;

buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
kernbuf;
rc = lprocfs_write_helper(buffer, count, &val);
if (rc < 0)
return rc;

/* borrow lru lock to set the value */
spin_lock(&sbi->ll_cache->ccc_lru_lock);
sbi->ll_cache->ccc_unstable_check = !!val;
spin_unlock(&sbi->ll_cache->ccc_lru_lock);

return count;
}
LUSTRE_RW_ATTR(unstable_stats);

static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
char *buf)
Expand Down
2 changes: 0 additions & 2 deletions drivers/staging/lustre/lustre/obdclass/class_obd.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ unsigned int obd_dump_on_eviction;
EXPORT_SYMBOL(obd_dump_on_eviction);
unsigned int obd_max_dirty_pages = 256;
EXPORT_SYMBOL(obd_max_dirty_pages);
atomic_t obd_unstable_pages;
EXPORT_SYMBOL(obd_unstable_pages);
atomic_t obd_dirty_pages;
EXPORT_SYMBOL(obd_dirty_pages);
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
Expand Down
96 changes: 4 additions & 92 deletions drivers/staging/lustre/lustre/osc/osc_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1384,13 +1384,11 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \
"unstable_pages: %d/%d dropped: %ld avail: %ld, " \
"reserved: %ld, flight: %d } lru {in list: %d, " \
"left: %d, waiters: %d }" fmt, \
"dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \
"lru {in list: %d, left: %d, waiters: %d }" fmt, \
__tmp->cl_import->imp_obd->obd_name, \
__tmp->cl_dirty, __tmp->cl_dirty_max, \
atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
__tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
atomic_read(&__tmp->cl_lru_in_list), \
Expand Down Expand Up @@ -1542,8 +1540,7 @@ static int osc_enter_cache_try(struct client_obd *cli,
return 0;

if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
atomic_read(&obd_unstable_pages) + 1 +
atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
osc_consume_write_grant(cli, &oap->oap_brw_page);
if (transient) {
cli->cl_dirty_transit += PAGE_SIZE;
Expand Down Expand Up @@ -1671,8 +1668,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
ocw->ocw_rc = -EDQUOT;
/* we can't dirty more */
if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
(atomic_read(&obd_unstable_pages) + 1 +
atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
(atomic_read(&obd_dirty_pages) + 1 > obd_max_dirty_pages)) {
CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
cli->cl_dirty,
cli->cl_dirty_max, obd_max_dirty_pages);
Expand Down Expand Up @@ -1843,84 +1839,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
ar->ar_force_sync = 0;
}

/**
* Performs "unstable" page accounting. This function balances the
* increment operations performed in osc_inc_unstable_pages. It is
* registered as the RPC request callback, and is executed when the
* bulk RPC is committed on the server. Thus at this point, the pages
* involved in the bulk transfer are no longer considered unstable.
*/
void osc_dec_unstable_pages(struct ptlrpc_request *req)
{
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
int page_count = desc->bd_iov_count;
int i;

/* No unstable page tracking */
if (!cli->cl_cache)
return;

LASSERT(page_count >= 0);

for (i = 0; i < page_count; i++)
dec_node_page_state(desc->bd_iov[i].bv_page, NR_UNSTABLE_NFS);

atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);

atomic_sub(page_count, &cli->cl_unstable_count);
LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);

atomic_sub(page_count, &obd_unstable_pages);
LASSERT(atomic_read(&obd_unstable_pages) >= 0);

wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
}

/* "unstable" page accounting. See: osc_dec_unstable_pages. */
void osc_inc_unstable_pages(struct ptlrpc_request *req)
{
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
long page_count = desc->bd_iov_count;
int i;

/* No unstable page tracking */
if (!cli->cl_cache)
return;

LASSERT(page_count >= 0);

for (i = 0; i < page_count; i++)
inc_node_page_state(desc->bd_iov[i].bv_page, NR_UNSTABLE_NFS);

LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);

LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
atomic_add(page_count, &cli->cl_unstable_count);

LASSERT(atomic_read(&obd_unstable_pages) >= 0);
atomic_add(page_count, &obd_unstable_pages);

/*
* If the request has already been committed (i.e. brw_commit
* called via rq_commit_cb), we need to undo the unstable page
* increments we just performed because rq_commit_cb wont be
* called again.
*/
spin_lock(&req->rq_lock);
if (unlikely(req->rq_committed)) {
/* Drop lock before calling osc_dec_unstable_pages */
spin_unlock(&req->rq_lock);
osc_dec_unstable_pages(req);
} else {
req->rq_unstable = 1;
spin_unlock(&req->rq_lock);
}
}

/* this must be called holding the loi list lock to give coverage to exit_cache,
* async_flag maintenance, and oap_request
*/
Expand All @@ -1932,9 +1850,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
__u64 xid = 0;

if (oap->oap_request) {
if (!rc)
osc_inc_unstable_pages(oap->oap_request);

xid = ptlrpc_req_xid(oap->oap_request);
ptlrpc_req_finished(oap->oap_request);
oap->oap_request = NULL;
Expand Down Expand Up @@ -2421,9 +2336,6 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
return rc;
}

if (osc_over_unstable_soft_limit(cli))
brw_flags |= OBD_BRW_SOFT_SYNC;

oap->oap_cmd = cmd;
oap->oap_page_off = ops->ops_from;
oap->oap_count = ops->ops_to - ops->ops_from;
Expand Down
2 changes: 1 addition & 1 deletion drivers/staging/lustre/lustre/osc/osc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
void osc_inc_unstable_pages(struct ptlrpc_request *req);
void osc_dec_unstable_pages(struct ptlrpc_request *req);
int osc_over_unstable_soft_limit(struct client_obd *cli);
bool osc_over_unstable_soft_limit(struct client_obd *cli);

struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
Expand Down
Loading

0 comments on commit d806f30

Please sign in to comment.