Index: 2.6.16-rc5/block/fingerprinting.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6.16-rc5/block/fingerprinting.c 2006-03-17 19:21:41.000000000 -0600 @@ -0,0 +1,150 @@ +/* + * block/fingerprinting.c + * + * Jake Moilanen + * Copyright (C) 2006 IBM + * + * I/O Workload Fingerprinting + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of + * the License. +*/ + +/* TODOS: + * - Abstract so no so IO specific + * - Abstract types + */ + +#include +#include +#include + +/* This assumes that address matches up w/ head_pos */ +static void update_avg_dist(struct fp_snapshot * ss, long head_pos) +{ + long long tmp_dist; + unsigned long total_ops = ss->reads + ss->writes; + long dummy; + + /* set it the first time through */ + if (!ss->head_pos) { + ss->head_pos = head_pos; + return; + } + tmp_dist = ss->head_pos - head_pos; + if (tmp_dist < 0) + tmp_dist = -tmp_dist; + + tmp_dist = tmp_dist - ss->avg_dist; + + divll(&tmp_dist, total_ops, &dummy); + ss->avg_dist += tmp_dist; + + ss->head_pos = head_pos; + +} + +static void update_avg_size(struct fp_snapshot * ss, unsigned long size) +{ + unsigned long total_ops = ss->reads + ss->writes; + long long tmp_size; + long dummy; + + tmp_size = size - ss->avg_size; + divll(&tmp_size, total_ops, &dummy); + ss->avg_size += tmp_size; +// ss->avg_size += (size - ss->avg_size) / total_ops; +} + +void update_fp_snapshot(struct bio * bio) +{ + struct fp_snapshot * ss = bio->bi_bdev->bd_disk->fp_ss; + + /* update type */ + if (bio_data_dir(bio) == READ) + ss->reads++; + else + ss->writes++; + + /* update pattern */ +// update_avg_dist(ss, bio_to_phys(bio)); + update_avg_dist(ss, bio->bi_sector); + + /* update size */ +// update_avg_size(ss, bio_iovec(bio)->bv_len); + update_avg_size(ss, bio_sectors(bio)); + +} + +/* Use this when there's multiple disks, and need to consolidate to a + * system wide fingerprint + */ +void consolidate_fp_snapshot(struct fp_snapshot * master, struct fp_snapshot * instance) +{ + unsigned long total_ops; + long dummy; + long long total_dist; + long long total_size; + + BUG_ON(!master); + BUG_ON(!instance); + + total_dist = master->avg_dist * (master->reads + master->writes); + total_size = master->avg_size * (master->reads + master->writes); + + /* update operations */ + master->reads += instance->reads; + master->writes += instance->writes; + total_ops = master->reads + master->writes; + + /* update distance */ + total_dist += (instance->avg_dist * (instance->reads + instance->writes)); + if (total_ops) { + divll(&total_dist, total_ops, &dummy); + master->avg_dist = total_dist; + } else + master->avg_dist = 0; + + /* update size */ + total_size += (instance->avg_size * (instance->reads + instance->writes)); + if (total_ops) { + divll(&total_size, total_ops, &dummy); + master->avg_size = total_size; + } else + master->avg_size = 0; +} + +void reset_fp_snapshot(struct fp_snapshot * ss) +{ + memset(ss, 0, sizeof(struct fp_snapshot)); +} + +void reset_fp(struct fingerprint * fp) +{ + memset(fp, 0, sizeof(struct fingerprint)); +} + +//void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss, struct block_device * dev) +void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss) +{ + /* type */ + if (fp_ss->reads > (fp_ss->writes * FP_CLASS_READ_WRITE_RATIO)) + fp->type = FP_TYPE_READ; + else + fp->type = FP_TYPE_WRITE; + + /* pattern */ +// if (fp_ss->avg_dist >= (block_size(dev) * FP_CLASS_PATTERN_RAND)) + if (fp_ss->avg_dist >= (512 * FP_CLASS_PATTERN_RAND)) + fp->pattern = FP_PATTERN_RAND; + else + fp->pattern = FP_PATTERN_SEQ; + + /* size */ + if (fp_ss->avg_size > FP_CLASS_SIZE_LARGE) + fp->size = FP_SIZE_LARGE; + else + fp->size = FP_SIZE_SMALL; +} Index: 2.6.16-rc5/block/ll_rw_blk.c =================================================================== --- 2.6.16-rc5.orig/block/ll_rw_blk.c 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/block/ll_rw_blk.c 2006-03-20 14:32:06.000000000 -0600 @@ -28,6 +28,7 @@ #include #include #include +#include /* * for max sense size @@ -2791,6 +2792,9 @@ rw = bio_data_dir(bio); sync = bio_sync(bio); +#ifdef CONFIG_FINGERPRINTING + update_fp_snapshot(bio); +#endif /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even Index: 2.6.16-rc5/include/linux/fingerprinting.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6.16-rc5/include/linux/fingerprinting.h 2006-03-17 19:21:41.000000000 -0600 @@ -0,0 +1,127 @@ +#ifndef __LINUX_FINGERPRINTING_H +#define __LINUX_FINGERPRINTING_H + +/* + * include/linux/fingerprinting.h + * + * Jake Moilanen + * Copyright (C) 2006 IBM + * + * I/O Workload Fingerprinting + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of + * the License. +*/ + +#include +#include + +#define FP_TYPE_READ 0 +#define FP_TYPE_WRITE 1 +#define FP_PATTERN_SEQ 0 +#define FP_PATTERN_RAND 1 +#define FP_SIZE_SMALL 0 +#define FP_SIZE_LARGE 1 +#define FP_NUM_POINTS (2 * 2 * 2) + +struct fingerprint { + __u8 type; + __u8 pattern; + __u8 size; +}; + +struct fp_snapshot { + /* type */ + unsigned long reads; + unsigned long writes; + /* pattern */ + unsigned long head_pos; + unsigned long avg_dist; + /* size */ + unsigned long avg_size; +}; + +/* Number of reads/writes before classified as read */ +#define FP_CLASS_READ_WRITE_RATIO 2 + +/* Number of sectors before pattern is random */ +#define FP_CLASS_PATTERN_RAND 25 + +/* Number of sectors before size is large */ +#define FP_CLASS_SIZE_LARGE 8 + +extern void update_fp_snapshot(struct bio * bio); +extern void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss); +extern void reset_fp_snapshot(struct fp_snapshot * ss); +extern void reset_fp(struct fingerprint * fp); +extern void consolidate_fp_snapshot(struct fp_snapshot * master, struct fp_snapshot * instance); +extern int fingerprint_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int fingerprint_snapshot_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); + +/* XXX do this more intelligently */ +#ifndef DIVLL_OP +#define DIVLL_OP +#if BITS_PER_LONG >= 64 + +static inline void divll(long long *n, long div, long *rem) +{ + *rem = *n % div; + *n /= div; +} + +#else + +static inline void divl(int32_t high, int32_t low, + int32_t div, + int32_t *q, int32_t *r) +{ + int64_t n = (u_int64_t)high << 32 | low; + int64_t d = (u_int64_t)div << 31; + int32_t q1 = 0; + int c = 32; + while (n > 0xffffffff) { + q1 <<= 1; + if (n >= d) { + n -= d; + q1 |= 1; + } + d >>= 1; + c--; + } + q1 <<= c; + if (n) { + low = n; + *q = q1 | (low / div); + *r = low % div; + } else { + *r = 0; + *q = q1; + } + return; +} + +static inline void divll(long long *n, long div, long *rem) +{ + int32_t low, high; + low = *n & 0xffffffff; + high = *n >> 32; + if (high) { + int32_t high1 = high % div; + int32_t low1 = low; + high /= div; + divl(high1, low1, div, &low, (int32_t *)rem); + *n = (int64_t)high << 32 | low; + } else { + *n = low / div; + *rem = low % div; + } +} +#endif + +#endif /* #ifndef divll */ + +#endif /* __LINUX_FINGERPRINTINT_H */ Index: 2.6.16-rc5/include/linux/genhd.h =================================================================== --- 2.6.16-rc5.orig/include/linux/genhd.h 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/include/linux/genhd.h 2006-03-20 14:32:06.000000000 -0600 @@ -16,6 +16,7 @@ #include #include #include +#include enum { /* These three have identical behaviour; use the second one if DOS FDISK gets @@ -126,6 +127,7 @@ #else struct disk_stats dkstats; #endif + struct fp_snapshot * fp_ss; }; /* Structure for sysfs attributes on block devices */ Index: 2.6.16-rc5/block/genhd.c =================================================================== --- 2.6.16-rc5.orig/block/genhd.c 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/block/genhd.c 2006-03-20 14:32:06.000000000 -0600 @@ -469,6 +469,20 @@ jiffies_to_msecs(disk_stat_read(disk, io_ticks)), jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); } +static ssize_t disk_fp_read(struct gendisk * disk, char *page) +{ + return sprintf(page, "reads: %llx\n" + "writes: %llx\n" + "head_pos: %llx\n" + "avg_dist: %llx\n" + "avg_size: %llx\n", + (unsigned long long)disk->fp_ss->reads, + (unsigned long long)disk->fp_ss->writes, + (unsigned long long)disk->fp_ss->head_pos, + (unsigned long long)disk->fp_ss->avg_dist, + (unsigned long long)disk->fp_ss->avg_size); +} + static struct disk_attribute disk_attr_uevent = { .attr = {.name = "uevent", .mode = S_IWUSR }, .store = disk_uevent_store @@ -493,6 +507,10 @@ .attr = {.name = "stat", .mode = S_IRUGO }, .show = disk_stats_read }; +static struct disk_attribute disk_attr_fp = { + .attr = {.name = "fp", .mode = S_IRUGO }, + .show = disk_fp_read +}; static struct attribute * default_attrs[] = { &disk_attr_uevent.attr, @@ -501,6 +519,7 @@ &disk_attr_removable.attr, &disk_attr_size.attr, &disk_attr_stat.attr, + &disk_attr_fp.attr, NULL, }; @@ -712,6 +731,10 @@ kobject_init(&disk->kobj); rand_initialize_disk(disk); } + + disk->fp_ss = kmalloc(sizeof(struct fp_snapshot), GFP_KERNEL); + memset(disk->fp_ss, 0, sizeof(struct fp_snapshot)); + return disk; } Index: 2.6.16-rc5/block/Kconfig =================================================================== --- 2.6.16-rc5.orig/block/Kconfig 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/block/Kconfig 2006-03-17 19:21:41.000000000 -0600 @@ -12,3 +12,9 @@ bigger than 2TB. Otherwise say N. source block/Kconfig.iosched + +config FINGERPRINTING + bool "I/O Workload Fingerprinting" + help + Say Y here if you want workload data to be classified and + used to tune the I/O schedulers. Otherwise say N. \ No newline at end of file Index: 2.6.16-rc5/block/Makefile =================================================================== --- 2.6.16-rc5.orig/block/Makefile 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/block/Makefile 2006-03-17 19:21:41.000000000 -0600 @@ -8,3 +8,5 @@ obj-$(CONFIG_IOSCHED_AS) += as-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o + +obj-$(CONFIG_FINGERPRINTING) += fingerprinting.o Index: 2.6.16-rc5/fs/proc/proc_misc.c =================================================================== --- 2.6.16-rc5.orig/fs/proc/proc_misc.c 2006-03-17 19:19:42.000000000 -0600 +++ 2.6.16-rc5/fs/proc/proc_misc.c 2006-03-20 14:32:08.000000000 -0600 @@ -53,6 +53,10 @@ #include #include "internal.h" +#ifdef CONFIG_FINGERPRINTING +#include +#endif + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -695,6 +699,49 @@ }; #endif +#ifdef CONFIG_FINGERPRINTING +int fingerprint_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i; + int n = 0; + struct fingerprint * fp = (struct fingerprint *)data; + + if (fp->type == FP_TYPE_READ) + n = sprintf(page, "read\n"); + else + n = sprintf(page, "write\n"); + + if (fp->pattern == FP_PATTERN_SEQ) + n += sprintf(page+n, "sequential\n"); + else + n += sprintf(page+n, "random\n"); + + if (fp->size == FP_SIZE_SMALL) + n += sprintf(page+n, "small\n"); + else + n += sprintf(page+n, "large\n"); + + return proc_calc_metrics(page, start, off, count, eof, n); +} + +int fingerprint_snapshot_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i; + int n = 0; + struct fp_snapshot * ss = (struct fp_snapshot *)data; + + n = sprintf(page, "read: %ld\n", ss->reads); + n += sprintf(page+n, "write: %ld\n", ss->writes); + + n += sprintf(page+n, "avg_dist: %ld\n", ss->avg_dist); + n += sprintf(page+n, "avg_size: %ld\n", ss->avg_size); + + return proc_calc_metrics(page, start, off, count, eof, n); +} +#endif /* CONFIG_FINGERPRINTING */ + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, struct file_operations *f)