Source code for physalia.models

"""Models that require persistence."""

import csv
import os
import sys
from string import Template
from itertools import groupby
from collections import OrderedDict
import bisect
from scipy.stats import ttest_ind
import numpy
from physalia.utils.symbols import GREEK_ALPHABET

[docs]class Measurement(object): """Energy measurement information. Attributes: timestamp When the execution started. use_case Key identifier of the use case. app_pkg Package of the app. app_version Version of the app. device_model Device where the measurements were performed. duration Time it takes to execute the use case. energy_consumption Mean of the measurements. power_meter Name of the power meter used. """ # pylint: disable=too-many-instance-attributes # pylint: disable=too-many-arguments # Eight is reasonable in this case. csv_storage = "./db.csv" COLUMN_APP_PKG = 2 COLUMN_USE_CASE = 1 def __init__( self, timestamp, use_case, app_pkg, app_version, device_model, duration, energy_consumption, power_meter="NA" ): # noqa: D102 self.persisted = False self.timestamp = timestamp self.use_case = use_case self.app_pkg = app_pkg self.app_version = app_version self.device_model = device_model self.duration = duration self.energy_consumption = energy_consumption self.power_meter = power_meter
[docs] def persist(self): """Store measurement in the database.""" if self.persisted: return False self.save_to_csv(self.csv_storage) self.persisted = True return True
[docs] def save_to_csv(self, filename): """Store measurements in a CSV file.""" with open(filename, 'a') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerow([ self.timestamp, self.use_case, self.app_pkg, self.app_version, self.device_model, self.duration, self.energy_consumption, self.power_meter ])
def __str__(self): """Get description of the measurement.""" return ( "Measurement for {}:\n" " {: <20}{:.4f}J\n" " {: <20}{}s\n" " {: <20}{}\n" " {: <20}{}" ).format(self.use_case, "Energy consumption:", self.energy_consumption, "Duration:", self.duration, "Power meter:", self.power_meter, "Phone:", self.device_model) @classmethod
[docs] def clear_database(cls): """Clear database. Deletes CSV data file.""" try: os.remove(cls.csv_storage) except OSError: pass
@classmethod def _get_unique_from_column(cls, column_index): """Get unique values of the given column.""" with open(cls.csv_storage, 'rb') as csvfile: csv_reader = csv.reader(csvfile) return {row[column_index] for row in csv_reader} @classmethod
[docs] def get_unique_apps(cls): """Get all unique apps existing in the database. Returns: List of unique apps. """ return cls._get_unique_from_column(cls.COLUMN_APP_PKG)
@classmethod
[docs] def get_unique_use_cases(cls): """Get all unique use cases. Returns: List of unique use cases. """ return cls._get_unique_from_column(cls.COLUMN_USE_CASE)
@classmethod
[docs] def get_all_entries_of_app(cls, app, use_case): """Get all entries that have a specific app and use case. If the use_case is None, all use_cases are retrieved. """ with open(cls.csv_storage, 'rb') as csvfile: csv_reader = csv.reader(csvfile) return [ Measurement(*row) for row in csv_reader if row[cls.COLUMN_USE_CASE] == use_case or use_case is None and row[cls.COLUMN_APP_PKG] == app ]
@classmethod
[docs] def mean_energy_consumption(cls, measurements): """Get mean energy consumption from a set of measurements.""" len_measurements = len(measurements) if len_measurements == 0: raise Exception("Empty sample.") energy_consumptions = [ float(measurement.energy_consumption) for measurement in measurements ] return sum(energy_consumptions) / len_measurements
@classmethod
[docs] def describe(cls, measurements): """Descriptive statistics for a set of measurements. Get descriptive statistics for time and energy consumption for a set of measurements. Returns: Tuple of Energy consumption mean, std, Duration mean, std. """ len_measurements = len(measurements) if len_measurements == 0: return energy_consumptions = [ float(measurement.energy_consumption) for measurement in measurements ] energy_consumption_mean = sum(energy_consumptions) / len_measurements energy_consumption_std = numpy.std(energy_consumptions) durations = [ float(measurement.duration) for measurement in measurements ] duration_mean = sum(durations) / len_measurements duration_std = numpy.std(durations) return ( energy_consumption_mean, energy_consumption_std, duration_mean, duration_std, )
@classmethod
[docs] def describe_app_use_case(cls, app, use_case): """Descriptive statistics for a stored App use case. Get descriptive statistics for time and energy consumption of an application use case. Args: app (string): Application package. use_case (string): Name of the use case Returns: Tuple of Energy consumption mean, std, Duration mean, std. """ measurements = cls.get_all_entries_of_app(app, use_case) return cls.describe(measurements)
@classmethod
[docs] def hypothesis_test(cls, sample_a, sample_b): """Perform hypothesis test over two samples of measurements. Uses Welch's t-test to check whether energy consumption is different in the populations of samples a and b. Args: sample_a (list of Measurement): measurements of sample a sample_b (list of Measurement): measurements of sample b Returns: t (float): The calculated t-statistic prob (float): The two-tailed p-value """ return ttest_ind( [measurement.energy_consumption for measurement in sample_a], [measurement.energy_consumption for measurement in sample_b], equal_var=False )
@classmethod
[docs] def fancy_hypothesis_test(cls, sample_a, sample_b, name_a, name_b, out=sys.stdout): """Perform and describe hypothesis testing of 2 samples. Args: sample_a (list of Measurement): measurements of sample a sample_b (list of Measurement): measurements of sample b sample_a (String): population name of sample a sample_b (String): population name of sample b out (file): data stream for output """ alpha = 0.05 _, pvalue = cls.hypothesis_test(sample_a, sample_b) rejected_null_h = alpha <= pvalue out.write( Template( "Hypothesis testing:\n" "\t$H0: $mu {name_a} = $mu {name_b}.\n" "\t$H1: $mu {name_a} $neq $mu {name_b}.\n" "\n" ).substitute(GREEK_ALPHABET).format(name_a=name_a, name_b=name_b) ) out.write(u"Applying Welch's t-test with {alpha_letter}=0.05, the null" " hypothesis is{negate} rejected (p-value={pvalue}).\n".format( negate=" not" if rejected_null_h else "", pvalue="<0.001" if pvalue < 0.001 else "{:.3f}".format(pvalue), alpha_letter=GREEK_ALPHABET['alpha'] )) if rejected_null_h: out.write("Thus, it was not possible to find evidence that" " the means of populations {name_a} and {name_b}" " are different.\n".format(name_a=name_a, name_b=name_b)) else: out.write("Thus, one can say that the means of populations" " \"{name_a}\" and \"{name_b}\" are different.\n" "".format(name_a=name_a, name_b=name_b)) return cls.hypothesis_test(sample_a, sample_b)
@classmethod
[docs] def get_energy_ranking(cls): """Ranking of the energy consumption of all apps. Get apps aggregated and sorted by mean energy consumption. Returns: OrderedDict with key=app_pkg and value=energy_consumption """ with open(cls.csv_storage, 'rb') as csvfile: csv_reader = csv.reader(csvfile) data = [Measurement(*row) for row in csv_reader] data = sorted(data, key=lambda msrmnt: msrmnt.app_pkg) grouped_data = { k: Measurement.mean_energy_consumption(list(group)) for (k, group) in groupby( data, key=lambda msrmnt: msrmnt.app_pkg ) } sorted_data = OrderedDict(sorted( grouped_data.items(), key=lambda (key, energy_consumption): energy_consumption )) return sorted_data
@classmethod
[docs] def get_position_in_ranking(cls, measurements): """Get the position in ranking of a given sample of measurements.""" energy_ranking = cls.get_energy_ranking() consumptions = energy_ranking.values() energy_consumption = cls.mean_energy_consumption(measurements) return ( bisect.bisect_left(consumptions, energy_consumption)+1, len(consumptions) )