Hi,
We're running v0.37.4 on Heroku with Redshift as our primary database. We've run into issues on some queries that run longer than 30s with more than a few thousand rows. It seems like this might fundamentally be an issue with Heroku's timeouts. After the timeout, Metabase immediately tries to rerun the query (which inevitably leads to another Heroku timeout).
The problem is, even if we try to cancel the query from the Metabase GUI, the queries keep running in Redshift, leading to a logjam death spiral of heavy queries running at the same time:
Is this something to flag as an issue on Github?
If it's not a bug/in the meantime, does anyone have any recommendations on dealing with the Heroku errors or Redshift logjams of this sort?
Edit:
Logs from Heroku:
Jan 27 15:30:42 sawyer-metabase heroku/router at=error code=H12 desc="Request timeout" method=POST path="/api/card/2113/query" host=sawyer-metabase.herokuapp.com request_id=6d2bb018-600d-43b6-80bd-805206ad207f fwd="<<redacted ip>>" dyno=web.2 connect=1ms service=30000ms status=503 bytes=0 protocol=https
Jan 27 15:30:42 sawyer-metabase app/web.2 2021-01-27 20:30:42,426 DEBUG middleware.log :: POST /api/card/2113/query 202 [ASYNC: canceled] 30.1 s (5 DB calls) App DB connections: 0/15 Jetty threads: 4/50 (2 idle, 0 queued) (128 total active threads) Queries in flight: 0 (0 queued)
Jan 27 15:30:56 sawyer-metabase app/web.1 2021-01-27 20:30:56,542 ERROR middleware.catch-exceptions :: Error processing query: null
Jan 27 15:30:56 sawyer-metabase app/web.1 {:database_id 7,
Jan 27 15:30:56 sawyer-metabase app/web.1 :started_at #t "2021-01-27T20:23:59.891Z[Etc/UTC]",
Jan 27 15:30:56 sawyer-metabase app/web.1 :json_query
Jan 27 15:30:56 sawyer-metabase app/web.1 {:constraints {:max-results 10000, :max-results-bare-rows 2000},
Jan 27 15:30:56 sawyer-metabase app/web.1 :type :native,
Jan 27 15:30:56 sawyer-metabase app/web.1 :middleware {:js-int-to-string? true},
Jan 27 15:30:56 sawyer-metabase app/web.1 :native
Jan 27 15:30:56 sawyer-metabase app/web.1 {:query
Jan 27 15:30:56 sawyer-metabase app/web.1 "with activity_info as (\n\n\tselect distinct \n\t\tbookings.user_id,\n\t\tbookings.provider_id,\n\t\tmax (bookings.activity_session_date) as last_booked_activity,\n\t\tbookings.activity_id,\n\t\tmin (activity_sessions.activity_session_date) as next_available_activity_date,\n\t\trank () over (partition by bookings.user_id order by last_booked_activity desc) as activity_rank\n\tfrom analytics.bookings \n\tleft join analytics.activity_sessions on bookings.provider_id = activity_sessions.provider_id\n\twhere \t\t\n\t\tactivity_sessions.activity_session_date > getdate() \n\t\tand activity_sessions.is_discoverable_session is true \n\tgroup by 1,2,4\n\torder by 1,4 desc\n\n), provider_info as (\n\n\tselect \n\t\tid as provider_id,\n\t\tname as provider_name,\n\t\tcase \n\t\t\twhen primary_business_photo is not null \n\t\t\t\tthen 'https://cdn-p.thekidspassport.com/uploads/providers/'||providers.id||'/'||providers.primary_business_photo\n\t\t\twhen primary_business_photo is null \n\t\t\t\tthen 'https://mcusercontent.com/cb10cf7136f5bf717b1f8552d/images/68c8ec43-40b0-4186-ada0-64910e77da92.png' end as business_photo,\n\t\tminimum_age||'-'||maximum_age as age_range,\n\t\t'https://www.hisawyer.com/providers/'||providers.slug as live_link\n\tfrom sawyer.providers\n\twhere (provider_type = 'both' or provider_type = 'marketplace')\n\n), user_info as (\n\n\tselect \n\t\trow_number() over (partition by users.user_id order by child_id asc) as child_number,\n\t\tusers.user_id,\n\t\tprivate_users.email,\n\t\tprivate_users.user_name,\n\t\tsplit_part(trim(sawyer.children.name), ' ', 1) as child_first_name,\n\t\tchildren.id as child_id\n\tfrom analytics.users\n\tleft join private_analytics.private_users on users.user_id = private_users.user_id\n\tleft join sawyer.children on users.user_id = children.member_id\n\twhere\n\t\tusers.marketable_at_et is not null\n\t\tand trim(children.name) != ''\n and trim(regexp_replace(children.name, '[\\\\W]')) != ''\n and users.user_id in (select distinct activity_info.user_id from activity_info)\n order by 2 asc\n\n)\n\nselect distinct\n child_number,\n\tactivity_info.user_id,\n\tuser_name,\n\tchild_first_name,\n\temail,\n\tprovider_name,\n\tcoalesce ('Age Range: '||provider_info.age_range||' yrs', '') as age_range,\n\tbusiness_photo,\n\tlast_booked_activity,\n\tnext_available_activity_date,\n\tlive_link||'?utm_source=transactional&utm_medium=email&utm_campaign='||to_char(getdate(), 'YYYYMMDD')||'-repeat-booking' as live_link_with_utm\nfrom activity_info\nleft join provider_info using (provider_id)\nleft join user_info using (user_id)\nwhere \n\tactivity_rank = '1'\n\tand provider_name is not null\n\tand email is not null\n\tand last_booked_activity < next_available_activity_date\n\t\norder by 2 asc, 1 asc\n;\n",
Jan 27 15:30:56 sawyer-metabase app/web.1 :template-tags {}},
Jan 27 15:30:56 sawyer-metabase app/web.1 :database 7,
Jan 27 15:30:56 sawyer-metabase app/web.1 :parameters [],
Jan 27 15:30:56 sawyer-metabase app/web.1 :async? true,
Jan 27 15:30:56 sawyer-metabase app/web.1 :cache-ttl nil},
Jan 27 15:30:56 sawyer-metabase app/web.1 :status :failed,
Jan 27 15:30:56 sawyer-metabase app/web.1 :class org.eclipse.jetty.io.EofException,
Jan 27 15:30:56 sawyer-metabase app/web.1 :stacktrace
Jan 27 15:30:56 sawyer-metabase app/web.1 ["org.eclipse.jetty.server.HttpConnection$SendCallback.reset(HttpConnection.java:724)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpConnection$SendCallback.access$300(HttpConnection.java:683)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpConnection.send(HttpConnection.java:542)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpChannel.sendResponse(HttpChannel.java:833)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpChannel.write(HttpChannel.java:910)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpOutput.channelWrite(HttpOutput.java:284)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpOutput.channelWrite(HttpOutput.java:268)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "org.eclipse.jetty.server.HttpOutput.flush(HttpOutput.java:713)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "java.util.zip.DeflaterOutputStream.flush(DeflaterOutputStream.java:282)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "--> async.streaming_response$delay_output_stream$fn__24654.invoke(streaming_response.clj:120)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "async.streaming_response.proxy$java.io.OutputStream$ff19274a.flush(Unknown Source)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.streaming.json$fn$reify__52070.finish_BANG_(json.clj:83)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.streaming$streaming_reducedf$fn__52543.invoke(streaming.clj:40)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$reducedf.invokeStatic(context.clj:77)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$reducedf.invoke(context.clj:72)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context.default$default_reducef.invokeStatic(default.clj:65)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context.default$default_reducef.invoke(default.clj:49)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$reducef.invokeStatic(context.clj:69)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$reducef.invoke(context.clj:62)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context.default$default_runf$respond_STAR___36406.invoke(default.clj:70)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc.execute$execute_reducible_query.invokeStatic(execute.clj:394)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc.execute$execute_reducible_query.invoke(execute.clj:376)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc.execute$execute_reducible_query.invokeStatic(execute.clj:385)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc.execute$execute_reducible_query.invoke(execute.clj:376)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc$fn__78125.invokeStatic(sql_jdbc.clj:49)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver.sql_jdbc$fn__78125.invoke(sql_jdbc.clj:47)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$executef.invokeStatic(context.clj:59)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$executef.invoke(context.clj:48)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context.default$default_runf.invokeStatic(default.clj:69)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context.default$default_runf.invoke(default.clj:67)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$runf.invokeStatic(context.clj:45)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.context$runf.invoke(context.clj:39)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.reducible$pivot.invokeStatic(reducible.clj:34)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.reducible$pivot.invoke(reducible.clj:31)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.mbql_to_native$mbql__GT_native$fn__44124.invoke(mbql_to_native.clj:26)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.check_features$check_features$fn__43400.invoke(check_features.clj:42)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.optimize_datetime_filters$optimize_datetime_filters$fn__44289.invoke(optimize_datetime_filters.clj:133)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.auto_parse_filter_values$auto_parse_filter_values$fn__42201.invoke(auto_parse_filter_values.clj:44)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.wrap_value_literals$wrap_value_literals$fn__45855.invoke(wrap_value_literals.clj:149)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.annotate$add_column_info$fn__41956.invoke(annotate.clj:575)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.permissions$check_query_permissions$fn__43275.invoke(permissions.clj:70)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.pre_alias_aggregations$pre_alias_aggregations$fn__44807.invoke(pre_alias_aggregations.clj:40)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.cumulative_aggregations$handle_cumulative_aggregations$fn__43473.invoke(cumulative_aggregations.clj:61)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_joined_fields$resolve_joined_fields$fn__45044.invoke(resolve_joined_fields.clj:36)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_joins$resolve_joins$fn__45363.invoke(resolve_joins.clj:183)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_implicit_joins$add_implicit_joins$fn__37681.invoke(add_implicit_joins.clj:254)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.large_int_id$convert_id_to_string$fn__44085.invoke(large_int_id.clj:44)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.limit$limit$fn__44110.invoke(limit.clj:38)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.format_rows$format_rows$fn__44065.invoke(format_rows.clj:75)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.desugar$desugar$fn__43539.invoke(desugar.clj:22)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.binning$update_binning_strategy$fn__42560.invoke(binning.clj:229)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_fields$resolve_fields$fn__43077.invoke(resolve_fields.clj:24)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_dimension_projections$add_remapping$fn__37225.invoke(add_dimension_projections.clj:316)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_implicit_clauses$add_implicit_clauses$fn__37432.invoke(add_implicit_clauses.clj:141)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_source_metadata$add_source_metadata_for_source_queries$fn__37830.invoke(add_source_metadata.clj:105)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.reconcile_breakout_and_order_by_bucketing$reconcile_breakout_and_order_by_bucketing$fn__45004.invoke(reconcile_breakout_and_order_by_bucketing.clj:98)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.auto_bucket_datetimes$auto_bucket_datetimes$fn__42148.invoke(auto_bucket_datetimes.clj:140)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_source_table$resolve_source_tables$fn__43124.invoke(resolve_source_table.clj:46)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.parameters$substitute_parameters$fn__44789.invoke(parameters.clj:114)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_referenced$resolve_referenced_card_resources$fn__43176.invoke(resolve_referenced.clj:80)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.expand_macros$expand_macros$fn__43795.invoke(expand_macros.clj:158)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_timezone_info$add_timezone_info$fn__37861.invoke(add_timezone_info.clj:15)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.splice_params_in_response$splice_params_in_response$fn__45725.invoke(splice_params_in_response.clj:32)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_database_and_driver$resolve_database_and_driver$fn__45015$fn__45019.invoke(resolve_database_and_driver.clj:33)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver$do_with_driver.invokeStatic(driver.clj:61)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "driver$do_with_driver.invoke(driver.clj:57)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.resolve_database_and_driver$resolve_database_and_driver$fn__45015.invoke(resolve_database_and_driver.clj:27)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.fetch_source_query$resolve_card_id_source_tables$fn__44013.invoke(fetch_source_query.clj:267)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.store$initialize_store$fn__45734$fn__45735.invoke(store.clj:11)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.store$do_with_store.invokeStatic(store.clj:46)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.store$do_with_store.invoke(store.clj:40)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.store$initialize_store$fn__45734.invoke(store.clj:10)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.cache$maybe_return_cached_results$fn__43053.invoke(cache.clj:214)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.validate$validate_query$fn__45743.invoke(validate.clj:10)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.normalize_query$normalize$fn__44137.invoke(normalize_query.clj:22)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.add_rows_truncated$add_rows_truncated$fn__37699.invoke(add_rows_truncated.clj:36)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.results_metadata$record_and_return_metadata_BANG_$fn__45710.invoke(results_metadata.clj:147)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.constraints$add_default_userland_constraints$fn__43416.invoke(constraints.clj:42)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.process_userland_query$process_userland_query$fn__44878.invoke(process_userland_query.clj:136)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.middleware.catch_exceptions$catch_exceptions$fn__43359.invoke(catch_exceptions.clj:174)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.reducible$async_qp$qp_STAR___36488$thunk__36489.invoke(reducible.clj:103)"
Jan 27 15:30:56 sawyer-metabase app/web.1 "query_processor.reducible$async_qp$qp_STAR___36488$fn__36491.invoke(reducible.clj:108)"],
Jan 27 15:30:56 sawyer-metabase app/web.1 :context :question,
Jan 27 15:30:56 sawyer-metabase app/web.1 :error nil,
Jan 27 15:30:56 sawyer-metabase app/web.1 :row_count 0,
Jan 27 15:30:56 sawyer-metabase app/web.1 :running_time 0,
Jan 27 15:30:56 sawyer-metabase app/web.1 :data {:rows [], :cols []}}
Jan 27 15:31:45 sawyer-metabase heroku/router at=error code=H12 desc="Request timeout" method=POST path="/api/card/2113/query" host=sawyer-metabase.herokuapp.com request_id=8e6b6c93-d7cc-4521-baea-f4696dd57f4b fwd="<<redacted ip>>" dyno=web.2 connect=0ms service=30001ms status=503 bytes=0 protocol=https
Jan 27 15:31:46 sawyer-metabase app/web.2 2021-01-27 20:31:45,843 DEBUG middleware.log :: POST /api/card/2113/query 202 [ASYNC: canceled] 30.1 s (5 DB calls) App DB connections: 0/15 Jetty threads: 5/50 (2 idle, 0 queued) (130 total active threads) Queries in flight: 0 (0 queued)