diff options
author | Gabriel Burt <gabriel.burt@gmail.com> | 2010-03-13 13:11:36 -0800 |
---|---|---|
committer | Gabriel Burt <gabriel.burt@gmail.com> | 2010-03-13 13:13:53 -0800 |
commit | 04f1df53a00ddf89b20f516fa1eb2dba131af133 (patch) | |
tree | b2c3b3a0a5f3ff2bed5d563189fedd463b4f5e73 /extras | |
parent | c09ebdacc42bd74be416bbccc598e4bb380dde08 (diff) |
[extras/metrics] Support incremental download/load
The database keeps track of the last report it imported, and imports any
new ones, and the download now happens via rsync.
Diffstat (limited to 'extras')
-rw-r--r-- | extras/metrics/Database.cs | 181 | ||||
-rw-r--r-- | extras/metrics/Main.cs | 18 | ||||
-rw-r--r-- | extras/metrics/Makefile.am | 6 | ||||
-rw-r--r-- | extras/metrics/MetaMetrics.cs | 29 | ||||
-rw-r--r-- | extras/metrics/Metric.cs | 16 | ||||
-rw-r--r-- | extras/metrics/MultiUserSample.cs | 11 | ||||
-rw-r--r-- | extras/metrics/User.cs | 45 | ||||
-rwxr-xr-x | extras/metrics/fetch-metrics | 23 | ||||
-rw-r--r-- | extras/metrics/metrics.csproj | 6 |
9 files changed, 257 insertions, 78 deletions
diff --git a/extras/metrics/Database.cs b/extras/metrics/Database.cs index 8bad4addf..37e17b404 100644 --- a/extras/metrics/Database.cs +++ b/extras/metrics/Database.cs @@ -32,64 +32,163 @@ using Hyena.Data.Sqlite; using Hyena.Json; using Mono.Data.Sqlite; using System.Collections.Generic; +using System.Text.RegularExpressions; +using ICSharpCode.SharpZipLib.GZip; namespace metrics { - public class Database + public class Config { - const string db_path = "metrics.db"; + [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)] + public long Id; - public static HyenaSqliteConnection Open () + [DatabaseColumn] + public string Key; + + [DatabaseColumn] + public string Value; + } + + public class Database : HyenaSqliteConnection + { + public Database (string db_path) : base (db_path) { HyenaSqliteCommand.LogAll = ApplicationContext.CommandLine.Contains ("debug-sql"); - var db = new HyenaSqliteConnection (db_path); - db.Execute ("PRAGMA cache_size = ?", 32768 * 2); - db.Execute ("PRAGMA synchronous = OFF"); - db.Execute ("PRAGMA temp_store = MEMORY"); - db.Execute ("PRAGMA count_changes = OFF"); - SampleProvider = new SqliteModelProvider<MultiUserSample> (db, "Samples", true); - return db; + Execute ("PRAGMA cache_size = ?", 32768 * 4); + Execute ("PRAGMA synchronous = OFF"); + Execute ("PRAGMA temp_store = MEMORY"); + Execute ("PRAGMA count_changes = OFF"); + + Config = new SqliteModelProvider<Config> (this, "Config", true); + SampleProvider = new SqliteModelProvider<MultiUserSample> (this, "Samples", true); + MetricProvider = new SqliteModelProvider<Metric> (this, "Metrics", true); + Users = new SqliteModelProvider<User> (this, "Users", true); + + Execute ("CREATE INDEX IF NOT EXISTS SampleUserMetricIndex ON Samples (UserID, MetricID)"); + } + + public SqliteModelProvider<Config> Config { get; private set; } + public SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; } + public SqliteModelProvider<Metric> MetricProvider { get; private set; } + public SqliteModelProvider<User> Users { get; private set; } + + private const string collapse_source_metric = "Banshee/Configuration/sources."; + private static char [] collapse_source_chars = new char [] {'-', '/', '.', '_'}; + private Dictionary<string, Metric> metrics = new Dictionary<string, Metric> (); + public Metric GetMetric (string name) + { + Metric metric; + if (metrics.TryGetValue (name, out metric)) + return metric; + + metric = MetricProvider.FetchFirstMatching ("Name = ?", name); + if (metric == null) { + metric = new Metric () { Name = name }; + MetricProvider.Save (metric); + } + + metrics[name] = metric; + return metric; } - public static bool Exists { get { return System.IO.File.Exists (db_path); } } + private Dictionary<string, User> users = new Dictionary<string, User> (); + public User GetUser (string guid) + { + User user; + if (users.TryGetValue (guid, out user)) + return user; + + user = Users.FetchFirstMatching ("Guid = ?", guid); + if (user == null) { + user = new User () { Guid = guid }; + Users.Save (user); + } - public static SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; } + users[guid] = user; + return user; + } - public static void Import () + public static bool Exists (string db_path) { - using (var db = Open ()) { - var sample_provider = SampleProvider; - db.BeginTransaction (); - foreach (var file in System.IO.Directory.GetFiles ("data")) { - Log.InformationFormat ("Importing {0}", file); - - try { - var o = new Deserializer (System.IO.File.ReadAllText (file)).Deserialize () as JsonObject; - - string user_id = (string) o["ID"]; - int format_version = (int) o["FormatVersion"]; - if (format_version != MetricsCollection.FormatVersion) { - Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version); - continue; - } + return System.IO.File.Exists (db_path); + } + + private Config LastReportNumber { + get { + return Config.FetchFirstMatching ("Key = 'LastReportNumber'") ?? new Config () { Key = "LastReportNumber", Value = "0" }; + } + } - var metrics = o["Metrics"] as JsonObject; - try { - foreach (string metric_name in metrics.Keys) { - var samples = metrics[metric_name] as JsonArray; - foreach (JsonArray sample in samples) { - sample_provider.Save (MultiUserSample.Import (user_id, metric_name, (string)sample[0], (object)sample[1])); - } + private Regex report_number_regex = new Regex ("data/(.{24}).json.gz", RegexOptions.Compiled); + + public void Import () + { + var db = this; + var sample_provider = SampleProvider; + + var last_config = LastReportNumber; + long last_report_number = Int64.Parse (last_config.Value); + + db.BeginTransaction (); + foreach (var file in System.IO.Directory.GetFiles ("data")) { + var match = report_number_regex.Match (file); + if (!match.Success) { + continue; + } + + long num = Int64.Parse (match.Groups[1].Captures[0].Value); + if (num <= last_report_number) { + continue; + } + + last_report_number = num; + Log.DebugFormat ("Importing {0}", file); + + try { + JsonObject o = null; + using (var stream = System.IO.File.OpenRead (file)) { + using (var gzip_stream = new GZipInputStream (stream)) { + using (var txt_stream = new System.IO.StreamReader (gzip_stream)) { + o = new Deserializer (txt_stream.ReadToEnd ()).Deserialize () as JsonObject; } - } catch { - throw; } - } catch (Exception e) { - Log.Exception (String.Format ("Failed to read {0}", file), e); } + + if (o == null) + throw new Exception ("Unable to parse JSON; empty file, maybe?"); + + string user_id = (string) o["ID"]; + int format_version = (int) o["FormatVersion"]; + if (format_version != MetricsCollection.FormatVersion) { + Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version); + continue; + } + + var metrics = o["Metrics"] as JsonObject; + foreach (string metric_name in metrics.Keys) { + var samples = metrics[metric_name] as JsonArray; + + string name = metric_name; + if (name.StartsWith (collapse_source_metric)) { + string [] pieces = name.Split ('/'); + var reduced_name = pieces[2].Substring (8, pieces[2].IndexOfAny (collapse_source_chars, 8) - 8); + name = String.Format ("{0}{1}/{2}", collapse_source_metric, reduced_name, pieces[pieces.Length - 1]); + } + + foreach (JsonArray sample in samples) { + sample_provider.Save (MultiUserSample.Import (db, user_id, name, (string)sample[0], (object)sample[1])); + } + } + } catch (Exception e) { + Log.Exception (String.Format ("Failed to read {0}", file), e); } - db.CommitTransaction (); } + db.CommitTransaction (); + + last_config.Value = last_report_number.ToString (); + Config.Save (last_config); + + Log.InformationFormat ("Done importing - last report # = {0}", last_report_number); } } @@ -133,4 +232,4 @@ namespace metrics return list[list.Count / 2]; } } -}
\ No newline at end of file +} diff --git a/extras/metrics/Main.cs b/extras/metrics/Main.cs index c762d88c8..b6edb41f0 100644 --- a/extras/metrics/Main.cs +++ b/extras/metrics/Main.cs @@ -33,15 +33,19 @@ namespace metrics { public class MainEntry { + const string db_path = "metrics.db"; + public static void Main (string [] args) { - if (!Database.Exists) { - Database.Import (); - } else { - using (var db = Database.Open ()) { - new MetaMetrics (db); - } + try { + using (var db = new Database (db_path)) { + db.Import (); + new MetaMetrics (db); + } + } catch (Exception e) { + Console.WriteLine ("Going down, got exception {0}", e); + throw; } } } -} +}
\ No newline at end of file diff --git a/extras/metrics/Makefile.am b/extras/metrics/Makefile.am index 037276679..1dc56d6b1 100644 --- a/extras/metrics/Makefile.am +++ b/extras/metrics/Makefile.am @@ -1,13 +1,17 @@ ASSEMBLY = Metrics TARGET = exe -LINK = $(LINK_HYENA_DEPS) +LINK = $(LINK_HYENA_DEPS) $(LINK_ICSHARP_ZIP_LIB) SOURCES = \ Database.cs \ Main.cs \ Metric.cs \ MetaMetrics.cs \ + User.cs \ MultiUserSample.cs RESOURCES = +copy: + cp $(top_srcdir)/bin/Metrics.exe* . + include $(top_srcdir)/build/build.mk diff --git a/extras/metrics/MetaMetrics.cs b/extras/metrics/MetaMetrics.cs index 41bc060bb..67d50b877 100644 --- a/extras/metrics/MetaMetrics.cs +++ b/extras/metrics/MetaMetrics.cs @@ -50,12 +50,12 @@ namespace metrics private static int id; - public SampleModel (string condition, HyenaSqliteConnection db, string aggregates) + public SampleModel (string condition, Database db, string aggregates) { Selection = new Hyena.Collections.Selection (); ReloadFragment = String.Format ("FROM Samples {0}", condition); SelectAggregates = aggregates; - Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, Database.SampleProvider); + Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, db.SampleProvider); } public void Reload () @@ -67,21 +67,24 @@ namespace metrics public class MetricSampleModel : SampleModel { + private Metric metric; public string MetricName { get; private set; } + public long MetricId { get { return metric.Id; } } private string condition; - public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, HyenaSqliteConnection db, string aggregates) : base (null, db, aggregates) + public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, Database db, string aggregates) : base (null, db, aggregates) { condition = String.Format ( - "FROM Samples, HyenaCache WHERE Samples.MetricName = '{0}' AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID", + "FROM Samples, HyenaCache WHERE Samples.MetricID = {0} AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID", "{0}", limiter.CacheId ); } - public void ChangeMetric (string metricName) + public void ChangeMetric (Database db, string metricName) { MetricName = metricName; - ReloadFragment = String.Format (condition, metricName); + metric = db.GetMetric (metricName); + ReloadFragment = String.Format (condition, metric.Id); Reload (); } } @@ -90,9 +93,9 @@ namespace metrics { string fmt = "{0,20}"; - public MetaMetrics (HyenaSqliteConnection db) + public MetaMetrics (Database db) { - var latest_samples = new SampleModel ("GROUP BY UserID, MetricName ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)"); + var latest_samples = new SampleModel ("GROUP BY UserID, MetricID ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)"); latest_samples.Cache.AggregatesUpdated += (reader) => { Console.WriteLine ("Total unique users for this time slice: {0}", reader[1]); Console.WriteLine ("First report was on {0}", SqliteUtils.FromDbFormat (typeof(DateTime), reader[2])); @@ -108,8 +111,8 @@ namespace metrics Console.WriteLine (String.Format (" Users: {0}", fmt), agg_reader[1]); using (var reader = new HyenaDataReader (db.Query ( @"SELECT COUNT(DISTINCT(UserId)) as users, Value FROM Samples, HyenaCache - WHERE MetricName = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID - GROUP BY Value ORDER BY users DESC", string_summary.MetricName, string_summary.Cache.CacheId))) { + WHERE MetricId = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID + GROUP BY Value ORDER BY users DESC", string_summary.MetricId, string_summary.Cache.CacheId))) { while (reader.Read ()) { Console.WriteLine (" {0,-5}: {1,-20}", reader.Get<long> (0), reader.Get<string> (1)); } @@ -131,19 +134,19 @@ namespace metrics Console.WriteLine (); }; - var metrics = db.QueryEnumerable<string> ("SELECT DISTINCT(MetricName) as name FROM Samples ORDER BY name ASC"); + var metrics = db.QueryEnumerable<string> ("SELECT Name FROM Metrics ORDER BY Name ASC"); foreach (var metric in metrics) { switch (GetMetricType (metric)) { case "string": Console.WriteLine ("{0}:", metric); - string_summary.ChangeMetric (metric); + string_summary.ChangeMetric (db, metric); break; //case "timespan" : SummarizeNumeric<TimeSpan> (metric); break; //case "datetime" : SummarizeNumeric<DateTime> (metric); break; case "float": Console.WriteLine ("{0}:", metric); //SummarizeNumeric<long> (metric_cache); - numeric_slice.ChangeMetric (metric); + numeric_slice.ChangeMetric (db, metric); break; //case "float": //SummarizeNumeric<double> (metric_cache); diff --git a/extras/metrics/Metric.cs b/extras/metrics/Metric.cs index 105cc2fe4..7514e1c74 100644 --- a/extras/metrics/Metric.cs +++ b/extras/metrics/Metric.cs @@ -39,6 +39,14 @@ namespace metrics { public class Metric { + [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)] + public long Id { get; private set; } + + [DatabaseColumn (Index = "MetricNameIndex")] + public string Name { get; set; } + + public Metric () {} + static Metric () { var time = new Func<double, string> (d => String.Format ("{0:N0}", SqliteUtils.FromDbFormat (typeof(DateTime), d))); @@ -126,13 +134,13 @@ namespace metrics } } - private string key; + //private string key; private bool ends_with; private Func<double, string> func; public Metric (string key, Func<double, string> func) { - this.key = key; + Name = key; this.func = func; this.ends_with = key[0] == '/'; } @@ -149,9 +157,9 @@ namespace metrics public bool Matching (string key) { if (ends_with) { - return key.EndsWith (this.key); + return key.EndsWith (Name); } else { - return key == this.key; + return key == Name; } } } diff --git a/extras/metrics/MultiUserSample.cs b/extras/metrics/MultiUserSample.cs index 8fd2f0d13..1b015582f 100644 --- a/extras/metrics/MultiUserSample.cs +++ b/extras/metrics/MultiUserSample.cs @@ -36,7 +36,10 @@ namespace metrics public class MultiUserSample : Sample, Hyena.Data.ICacheableItem { [DatabaseColumn (Index = "SampleUserIdIndex")] - public string UserId; + public long UserId; + + [DatabaseColumn (Index = "SampleMetricIdIndex")] + public long MetricId; // ICacheableItem public object CacheEntryId { get; set; } @@ -48,13 +51,13 @@ namespace metrics static DateTime value_dt; static TimeSpan value_span; - public static MultiUserSample Import (string user_id, string metric_name, string stamp, object val) + public static MultiUserSample Import (Database db, string user_id, string metric_name, string stamp, object val) { var sample = new MultiUserSample (); - sample.UserId = user_id; + sample.UserId = db.GetUser (user_id).Id; // TODO collapse various DAP and DAAP library stats? - sample.MetricName = metric_name; + sample.MetricId = db.GetMetric (metric_name).Id; DateTime stamp_dt; if (!DateTimeUtil.TryParseInvariant (stamp, out stamp_dt)) { diff --git a/extras/metrics/User.cs b/extras/metrics/User.cs new file mode 100644 index 000000000..ae7d51c5c --- /dev/null +++ b/extras/metrics/User.cs @@ -0,0 +1,45 @@ +// +// User.cs +// +// Author: +// Gabriel Burt <gabriel.burt@gmail.com> +// +// Copyright (c) 2010 Novell, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; + +using Hyena.Data.Sqlite; + +namespace metrics +{ + public class User + { + [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)] + public long Id; + + [DatabaseColumn] + public string Guid; + + public User () + { + } + } +}
\ No newline at end of file diff --git a/extras/metrics/fetch-metrics b/extras/metrics/fetch-metrics index a1998aa42..3c8344c56 100755 --- a/extras/metrics/fetch-metrics +++ b/extras/metrics/fetch-metrics @@ -1,11 +1,18 @@ #!/bin/bash -rm -fr data -mkdir data/ -ssh bansheeweb@banshee-project.org "cd download.banshee-project.org/metrics/data; tar -cf metrics.tar *.gz" -scp bansheeweb@banshee-project.org:~/download.banshee-project.org/metrics/data/metrics.tar data/ -ssh bansheeweb@banshee-project.org "cd download.banshee-project.org/metrics/data; rm metrics.tar" +mkdir -p data/ cd data -tar -xvf metrics.tar -rm metrics.tar -gunzip *.gz + +echo "Updating remote tar file" +ssh bansheeweb@banshee-project.org "cd download.banshee-project.org/metrics/data; tar --append --remove-files -f metrics.tar *.gz 2>&1 | grep -v 'No such file or directory' | grep -v 'exit delayed from previous'" + +echo "Downloading changes with rsync" +rsync --progress bansheeweb@banshee-project.org:~/download.banshee-project.org/metrics/data/metrics.tar . + +echo "Untarring new records" +tar --keep-old-files -xf metrics.tar 2>&1 | grep -v "Cannot open: File exists" | grep -v "exit delayed from previous" + +#echo "Unzipping $(ls -l *.gz 2>/dev/null | wc -l) new records" +#gunzip -q *.gz + +echo "Done!" diff --git a/extras/metrics/metrics.csproj b/extras/metrics/metrics.csproj index d4893e67c..5191fd860 100644 --- a/extras/metrics/metrics.csproj +++ b/extras/metrics/metrics.csproj @@ -32,6 +32,7 @@ <Reference Include="System" /> <Reference Include="System.Core"> </Reference> + <Reference Include="ICSharpCode.SharpZipLib" /> </ItemGroup> <ItemGroup> <Compile Include="MultiUserSample.cs" /> @@ -39,6 +40,7 @@ <Compile Include="Main.cs" /> <Compile Include="Database.cs" /> <Compile Include="Metric.cs" /> + <Compile Include="User.cs" /> </ItemGroup> <ItemGroup> <ProjectReference Include="..\..\src\Libraries\Hyena\Hyena.csproj"> @@ -49,6 +51,10 @@ <Project>{BB1D1D81-7A74-4183-B7B1-3E78B32D42F1}</Project> <Name>Mono.Data.Sqlite</Name> </ProjectReference> + <ProjectReference Include="..\..\src\Core\Banshee.Services\Banshee.Services.csproj"> + <Project>{B28354F0-BA87-44E8-989F-B864A3C7C09F}</Project> + <Name>Banshee.Services</Name> + </ProjectReference> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> </Project>
\ No newline at end of file |