Greatly improve performance. - reportable - Fork of reportable required by WarVox, from hdm/reportable.
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
---
(DIR) commit 5f29dfb16b4b34158a89f37283d31d2c42852f04
(DIR) parent 880e98fc581fd44bac7c4e9337a755be19c7ed24
(HTM) Author: Cristi Balan <evil@che.lu>
Date: Fri, 21 Dec 2012 17:55:51 +0200
Greatly improve performance.
Fetch only missing cached data if we're only missing a contiguous part at the end.
Diffstat:
M lib/saulabs/reportable/report_cach… | 50 +++++++++++++++++++++----------
M spec/classes/report_cache_spec.rb | 16 ++++++++++++----
M spec/classes/report_spec.rb | 136 +++++++++++++++++++++++++++++++
3 files changed, 182 insertions(+), 20 deletions(-)
---
(DIR) diff --git a/lib/saulabs/reportable/report_cache.rb b/lib/saulabs/reportable/report_cache.rb
@@ -69,6 +69,15 @@ module Saulabs
#
def self.process(report, options, &block)
raise ArgumentError.new('A block must be given') unless block_given?
+
+ # If end_date is in the middle of the current reporting period it means it requests live_data.
+ # Update the options hash to reflect reality.
+ current_reporting_period = ReportingPeriod.new(options[:grouping])
+ if options[:end_date] && options[:end_date] > current_reporting_period.date_time
+ options[:live_data] = true
+ options.delete(:end_date)
+ end
+
self.transaction do
cached_data = read_cached_data(report, options)
new_data = read_new_data(cached_data, options, &block)
@@ -140,11 +149,10 @@ module Saulabs
serialize_conditions(options[:conditions])
]
first_reporting_period = get_first_reporting_period(options)
- last_reporting_period = get_last_reporting_period(options)
- if last_reporting_period
+ if options[:end_date]
conditions.first << ' AND reporting_period BETWEEN ? AND ?'
conditions << first_reporting_period.date_time
- conditions << last_reporting_period.date_time
+ conditions << ReportingPeriod.new(options[:grouping], options[:end_date]).date_time
else
conditions.first << ' AND reporting_period >= ?'
conditions << first_reporting_period.date_time
@@ -157,16 +165,30 @@ module Saulabs
end
def self.read_new_data(cached_data, options, &block)
- if !options[:live_data] && cached_data.length == options[:limit]
- []
+ return [] if !options[:live_data] && cached_data.length == options[:limit]
+
+ first_reporting_period_to_read = get_first_reporting_period_to_read(cached_data, options)
+ last_reporting_period_to_read = options[:end_date] ? ReportingPeriod.new(options[:grouping], options[:end_date]).last_date_time : nil
+
+ yield(first_reporting_period_to_read.date_time, last_reporting_period_to_read)
+ end
+
+ def self.get_first_reporting_period_to_read(cached_data, options)
+ return get_first_reporting_period(options) if cached_data.empty?
+
+ last_cached_reporting_period = ReportingPeriod.new(options[:grouping], cached_data.last.reporting_period)
+ missing_reporting_periods = options[:limit] - cached_data.length
+ last_reporting_period = if !options[:live_data] && options[:end_date]
+ ReportingPeriod.new(options[:grouping], options[:end_date])
else
- first_reporting_period_to_read = if cached_data.length < options[:limit]
- get_first_reporting_period(options)
- else
- ReportingPeriod.new(options[:grouping], cached_data.last.reporting_period).next
- end
- last_reporting_period_to_read = options[:end_date] ? ReportingPeriod.new(options[:grouping], options[:end_date]).last_date_time : nil
- yield(first_reporting_period_to_read.date_time, last_reporting_period_to_read)
+ ReportingPeriod.new(options[:grouping]).previous
+ end
+
+ if missing_reporting_periods == 0 || last_cached_reporting_period.offset(missing_reporting_periods) == last_reporting_period
+ # cache only has missing data contiguously at the end
+ last_cached_reporting_period.next
+ else
+ get_first_reporting_period(options)
end
end
@@ -178,10 +200,6 @@ module Saulabs
end
end
- def self.get_last_reporting_period(options)
- return ReportingPeriod.new(options[:grouping], options[:end_date]) if options[:end_date]
- end
-
end
end
(DIR) diff --git a/spec/classes/report_cache_spec.rb b/spec/classes/report_cache_spec.rb
@@ -184,7 +184,7 @@ describe Saulabs::Reportable::ReportCache do
describe 'with :end_date = <some date>' do
before do
- @options = @report.options.merge(:end_date => Time.now)
+ @options = @report.options.merge(:end_date => Time.now - 1.send(@report.options[:grouping].identifier))
end
it 'should yield the last date and time of the reporting period for the specified end date' do
@@ -221,7 +221,7 @@ describe Saulabs::Reportable::ReportCache do
end
it 'should utilize the end_date in the conditions' do
- end_date = Time.now
+ end_date = Time.now - 1.send(@report.options[:grouping].identifier)
Saulabs::Reportable::ReportCache.should_receive(:all).once.with(
:conditions => [
%w(model_name report_name grouping aggregation conditions).map do |column_name|
@@ -232,7 +232,7 @@ describe Saulabs::Reportable::ReportCache do
@report.options[:grouping].identifier.to_s,
@report.aggregation.to_s,
'',
- Saulabs::Reportable::ReportingPeriod.first(@report.options[:grouping], 9).date_time,
+ Saulabs::Reportable::ReportingPeriod.first(@report.options[:grouping], 10).date_time,
Saulabs::Reportable::ReportingPeriod.new(@report.options[:grouping], end_date).date_time
],
:limit => 10,
@@ -271,7 +271,15 @@ describe Saulabs::Reportable::ReportCache do
end
end
end
-
+
+ describe '.get_first_reporting_period_to_read' do
+ it 'returns first reporting period if no cached data' do
+ Saulabs::Reportable::ReportCache.should_receive(:get_first_reporting_period).once.and_return('first')
+ result = Saulabs::Reportable::ReportCache.send(:get_first_reporting_period_to_read, [], {})
+ result.should == 'first'
+ end
+ end
+
describe '.serialize_conditions' do
it 'should serialize empty conditions correctly' do
(DIR) diff --git a/spec/classes/report_spec.rb b/spec/classes/report_spec.rb
@@ -77,6 +77,142 @@ describe Saulabs::Reportable::Report do
User.create!(:login => 'test 4', :created_at => Time.now - 3.send(grouping), :profile_visits => 3)
end
+ describe 'optimized querying with contiguously cached data' do
+ it "should be optimized with specified end_date" do
+ @end_date = DateTime.now - 1.send(grouping)
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10,
+ :end_date => @end_date
+ )
+ @result = @report.run
+
+ Saulabs::Reportable::ReportCache.last.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, @end_date)
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.date_time
+ end_at.should == reporting_period.last_date_time
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+
+ it "should be optimized without specific end_date and live_data" do
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10,
+ :live_data => true
+ )
+ @result = @report.run.to_a
+
+ Saulabs::Reportable::ReportCache.last.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, DateTime.now).previous
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.date_time
+ end_at.should == nil
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+
+ it "should be optimized without specific end_date and without live_data requested" do
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10
+ )
+ @result = @report.run.to_a
+
+ Saulabs::Reportable::ReportCache.last.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, DateTime.now).previous
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.date_time
+ end_at.should == nil
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+ end
+
+ describe 'non optimized querying when gaps present in cached data' do
+ it "should not be optimized with specified end_date" do
+ @end_date = DateTime.now - 1.send(grouping)
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10,
+ :end_date => @end_date
+ )
+ @result = @report.run
+
+ Saulabs::Reportable::ReportCache.first.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, @end_date)
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.offset(-9).date_time
+ end_at.should == reporting_period.last_date_time
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+
+ it "should not be optimized without specific end_date and live_data" do
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10,
+ :live_data => true
+ )
+ @result = @report.run.to_a
+
+ Saulabs::Reportable::ReportCache.first.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, DateTime.now).previous
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.offset(-9).date_time
+ end_at.should == nil
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+
+ it "should not be optimized without specific end_date and without live_data requested" do
+ @report = Saulabs::Reportable::Report.new(User, :registrations,
+ :grouping => grouping,
+ :limit => 10
+ )
+ @result = @report.run.to_a
+
+ Saulabs::Reportable::ReportCache.first.delete
+
+ grouping_instance = Saulabs::Reportable::Grouping.new(grouping)
+ reporting_period = Saulabs::Reportable::ReportingPeriod.new(grouping_instance, DateTime.now).previous
+
+ @report.should_receive(:read_data) do |begin_at, end_at, options|
+ begin_at.should == reporting_period.offset(-9).date_time
+ end_at.should == nil
+ [] # without this rspec whines about an ambiguous return value
+ end
+
+ @result = @report.run
+ end
+ end
+
describe 'when :end_date is specified' do
it 'should not raise a SQL duplicate key error after multiple runs' do