#!/usr/bin/env python """ Generate reports from raw comScore market-share data. Requires that you have gnuplot installed. -s = tabulate or plot marketshare trends -u = tabulate or plot userbase trends -d = tabulate or plot changes in userbase by month -w = generate HTML table to stdout; without this, make a plot to a file -t = generate text table to stdout -T = include totals column -n = suppress deletion of generated data file The raw data is assumed to be in comscore.dat. """ import os, sys, getopt, tempfile, copy class comScore: def __init__(self, data): self.data = data # # Framework code # def arithmetize(self): "Turn data to numeric, excluding top row, left column, and - entries." for i in range(1, len(self.data)): for j in range(1, len(self.data[0])): if self.data[i][j] != '-': self.data[i][j] = float(self.data[i][j]) def unarithmetize(self): "Turn numeric table data back to strings." w = len(self.data[0]) d = len(self.data) for i in range(1, d): for j in range(1, w): if self.data[i][j] != '-': self.data[i][j] = "%.2f" % self.data[i][j] def emit(self): "Ship transformed self.data to a file for plotting." (h, name) = tempfile.mkstemp() ofp = open(name, "w") d = len(self.data) for i in range(d): ofp.write("\t".join(self.data[i]) + "\n") ofp.close() return name def textize(self, ofp=sys.stdout): "Dump data as a tab-separated-values file." for i in range(len(self.data)): ofp.write("\t".join(self.data[i]) + "\n") def webize(self, ofp=sys.stdout): "Generate a table suitable for web display from specified self.data." d = len(self.data) w = len(self.data[0]) for i in range(d): self.data[i][0] = self.data[i][0][:3] + " " + self.data[i][0][3:] ofp.write("\n") for j in range(w): ofp.write("") for i in range(d): ofp.write("") ofp.write("\n") ofp.write("
" + self.data[i][j] + "
\n") def lastmonth(self): "Return last month for which report is valid." return self.data[len(self.data)-1][0] def select(self, platform): "Select out data for a single platform." i = self.data[0].index(platform) for j in range(len(self.data)): self.data[j] = [self.data[j][0], self.data[j][i]] self.data.pop(0) # # Data reduction # def usercount(self): "Multiply market shares by smartphone userbase size (last column)." w = len(self.data[0]) d = len(self.data) self.data[0][w-1] = "Total" for i in range(1, d): for j in range(1, w-1): if self.data[i][j] != '-': self.data[i][j] *= self.data[i][w-1] self.data[i][j] /= 100.0 def deltas(self): "Turn self.data into a differences table." w = len(self.data[0]) d = len(self.data) differences = copy.deepcopy(self.data) for i in range(2, d): for j in range(1, w): if self.data[i][j] == '-' or self.data[i-1][j] == '-': differences [i][j] = '-' else: differences [i][j] = self.data[i][j] - self.data[i-1][j] # Remove first row, for which there is no corresponding delta. self.data = differences[:1] + differences[2:] def coreplot(n): plot = """ set terminal png nocrop enhanced set key outside right top vertical Right noreverse noenhanced autotitles nobox set datafile missing '-' set style data linespoints set xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0 set xtics norangelimit set xtics () plot '%(input)s' using 2:xtic(1) title columnheader(2), \ """ for i in range(3, n): plot += " '' using %d:xtic(1) title columnheader(%d), " % (i, i) plot += " '' using %d:xtic(1) title columnheader(%d)" % (n, n) return plot predictive = """ set terminal png nocrop enhanced set datafile missing '-' set style data points set xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0 set xtics norangelimit set xtics () set xdata time set timefmt '%%b%%Y' set xtics format '%%b%%Y' unset key set grid offset=10*365*24*60*60 f(x)=1.e-7*m*(x-offset)+b fit f(x) '%(input)s' using 1:2 via m,b plot '%(input)s' using 1:2, f(x) """ def gnuplot(inputname, plot): "Generate a derived plot." plot = plot % {"input" : inputname} print plot ofp = os.popen("gnuplot -", "w") ofp.write(plot) ofp.close() def grab(filename): "Grab the contents of a data file." lines = [] for line in open(filename): if line[0] != '#': lines.append(line.strip().split("\t")) return lines if __name__ == '__main__': (options, arguments) = getopt.getopt(sys.argv[1:], "suwdtmp:Tn") plotprefix = "" tabulate = False textdump = False datedump = False share = False user = False deltas = False total = False predict = None remove = True basedata = grab('comscore.dat') info = comScore(basedata) for (opt, val) in options: if opt == '-s': share = True elif opt == '-u': user = True elif opt == '-d': deltas = True elif opt == '-w': tabulate = True elif opt == '-t': textdump = True elif opt == '-m': datedump = True elif opt == '-p': predict = val elif opt == '-n': remove = False elif opt == '-T': total = True if user: title = "Userbase by platform, " else: title = "Market-share per platform, " if deltas: title += "change per month, " if user: yformat = "set format y '%%.2fM'\n" title += "units of 1M users." else: yformat = "set format y '%%.0f%%%%'\n" title += "units of 1%%." if user or deltas: info.arithmetize() if user: info.usercount() if deltas: info.deltas() info.unarithmetize() if tabulate: info.webize() elif textdump: info.textize() elif datedump: sys.stdout.write(info.lastmonth()) elif predict: info.select(predict) title = 'set title "%s: %s (prediction)"\n' % (predict, title) table = info.emit() gnuplot(table, title + yformat + predictive) if remove: os.remove(table) else: print >>sys.stderr, "Table at", table elif total: title = "set title '%s'\n" % title table = info.emit() gnuplot(table, title + yformat + coreplot(6)) if remove: os.remove(table) else: print >>sys.stderr, "Table at", table else: title = "set title '%s'\n" % title table = info.emit() gnuplot(table, title + yformat + coreplot(5)) if remove: os.remove(table) else: print >>sys.stderr, "Table at", table # End