My Model has one row for each day and vehicle, and total milesDriven in that day:
I am filtering vehicles that ran less than 100miles daily, so I can create a "group by: milesDriven 10bins" which to my understanding should create 10 bins/rows (0-10, 10-20, til 90-100) for each VIN. Then I would count how many rows their drivenMiles have fell into each bucket. so here is the notebook query:
and the result come with more bins than I expected:
What am I doing wrong?
here is the SQL from the notebook editor, which uses other models:
SELECT
`source`.`vin` AS `vin`,
(
FLOOR((`source`.`milesDriven` - 116.0) / (1.0 / 10.0)) * (1.0 / 10.0)
) + 116.0 AS `milesDriven`,
COUNT(*) AS `count`
FROM
(
with all_vehicles_daily as (
WITH ice_data AS (
SELECT
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`vin` AS `vin`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`department` AS `department`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`unit__` AS `unit__`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`make` AS `make`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`model` AS `model`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`modelyear` AS `modelyear`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`trim` AS `trim`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`enrollmentstatus` AS `enrollmentstatus`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`date_utc` AS `date_utc`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`miles_driven` AS `miles_driven`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`fuel_type` AS `fuel_type`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`fuel_type__clean_` AS `fuel_type__clean_`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`vehicle_type` AS `vehicle_type`,
`csv_utilization_metabase_mockup_v3_csv_20231124101901`.`state` AS `state`
FROM
`csv_utilization_metabase_mockup_v3_csv_20231124101901`
LIMIT
1048575
), evs_data AS (
SELECT
`source`.`vin` AS `vin`,
DATE(`source`.`rds_ts`) AS `rds_ts`,
`source`.`miles_driven` AS `miles_driven`,
`source`.`driven_over_5_mi` AS `driven_over_5_mi`,
`source`.`Vehicles__c_code` AS `Vehicles__c_code`,
`source`.`Vehicles__s_site_name` AS `Vehicles__s_site_name`,
`source`.`state` AS `state`,
`source`.`Vehicles__v_vehicle_name` AS `Vehicles__v_vehicle_name`,
`source`.`fuelType` AS `fuelType`,
`source`.`Vehicles__v_asset_type` AS `Vehicles__v_asset_type`,
`source`.`Vehicles__v_make` AS `Vehicles__v_make`,
`source`.`Vehicles__v_model` AS `Vehicles__v_model`,
`source`.`Vehicles__v_model_year` AS `Vehicles__v_model_year`,
`source`.`Vehicles__V_Daily_Target_Mi_Or_100` AS `Vehicles__V_Daily_Target_Mi_Or_100`
FROM
(
SELECT
`source`.`vin` AS `vin`,
`source`.`rds_ts` AS `rds_ts`,
`source`.`driven_over_5_mi` AS `driven_over_5_mi`,
`source`.`miles_driven` AS `miles_driven`,
SUBSTRING(
`Vehicles`.`s_site_address`,
(1 + CHAR_LENGTH(`Vehicles`.`s_site_address`)) - 8,
2
) AS `state`,
CASE
WHEN (`Vehicles`.`v_vin` IS NOT NULL)
AND (
(`Vehicles`.`v_vin` <> '')
OR (`Vehicles`.`v_vin` IS NULL)
) THEN 'Electric'
END AS `fuelType`,
CASE
WHEN `Vehicles`.`v_daily_target_mi` IS NULL THEN 100
ELSE `Vehicles`.`v_daily_target_mi`
END AS `Vehicles__V_Daily_Target_Mi_Or_100`,
`Vehicles`.`v_vin` AS `Vehicles__v_vin`,
`Vehicles`.`s_site_address` AS `Vehicles__s_site_address`,
`Vehicles`.`v_daily_target_mi` AS `Vehicles__v_daily_target_mi`,
`Vehicles`.`c_code` AS `Vehicles__c_code`,
`Vehicles`.`s_site_name` AS `Vehicles__s_site_name`,
`Vehicles`.`v_vehicle_name` AS `Vehicles__v_vehicle_name`,
`Vehicles`.`v_asset_type` AS `Vehicles__v_asset_type`,
`Vehicles`.`v_make` AS `Vehicles__v_make`,
`Vehicles`.`v_model` AS `Vehicles__v_model`,
`Vehicles`.`v_model_year` AS `Vehicles__v_model_year`
FROM
(
SELECT
rdodometer.rds_vin as vin,
FROM_UNIXTIME(rdodometer.rds_ts / 1000) AS rds_ts,
CASE
WHEN MAX(rdodometer.rds_odometer) - MIN(rdodometer.rds_odometer) > 5 THEN true
ELSE false
END as driven_over_5_mi,
MAX(rdodometer.rds_odometer) - MIN(rdodometer.rds_odometer) as miles_driven
FROM
rdodometer
LEFT JOIN vehicles ON rdodometer.rds_vin = vehicles.v_vin
LEFT JOIN rdsites ON vehicles.s_site_name = rdsites.s_site_name
LEFT JOIN rdvendors ON rdsites.s_default_vendor = rdvendors.vn_vendor_name
WHERE
rdodometer.vn_id = rdvendors.vn_id
GROUP BY
rdodometer.rds_vin,
DATE(FROM_UNIXTIME(rdodometer.rds_ts / 1000))
) AS `source`
LEFT JOIN `vehicles` AS `Vehicles` ON `source`.`vin` = `Vehicles`.`v_vin`
) AS `source`
GROUP BY
`source`.`vin`,
DATE(`source`.`rds_ts`),
`source`.`miles_driven`,
`source`.`driven_over_5_mi`,
`source`.`Vehicles__c_code`,
`source`.`Vehicles__s_site_name`,
`source`.`state`,
`source`.`Vehicles__v_vehicle_name`,
`source`.`fuelType`,
`source`.`Vehicles__v_asset_type`,
`source`.`Vehicles__v_make`,
`source`.`Vehicles__v_model`,
`source`.`Vehicles__v_model_year`,
`source`.`Vehicles__V_Daily_Target_Mi_Or_100`
ORDER BY
`source`.`vin` ASC,
DATE(`source`.`rds_ts`) ASC,
`source`.`miles_driven` ASC,
`source`.`driven_over_5_mi` ASC,
`source`.`Vehicles__c_code` ASC,
`source`.`Vehicles__s_site_name` ASC,
`source`.`state` ASC,
`source`.`Vehicles__v_vehicle_name` ASC,
`source`.`fuelType` ASC,
`source`.`Vehicles__v_asset_type` ASC,
`source`.`Vehicles__v_make` ASC,
`source`.`Vehicles__v_model` ASC,
`source`.`Vehicles__v_model_year` ASC,
`source`.`Vehicles__V_Daily_Target_Mi_Or_100` ASC
LIMIT
1048575
)
SELECT
vin AS vin,
date_utc AS datetime,
miles_driven AS milesDriven,
case
WHEN (miles_driven > 5) THEN 1
ELSE 0
END AS drivenOver5mi,
'SPWR' AS orgName,
department AS site,
NULL AS state,
unit__ AS vehicleName,
fuel_type__clean_ AS fuelType,
vehicle_type AS vehicleType,
make,
model,
modelYear,
NULL as dailyTargetMiles
FROM
ice_data
where
fuel_type__clean_ != 'Electric'
UNION ALL
SELECT
vin,
rds_ts AS datetime,
miles_driven as milesDriven,
driven_over_5_mi as drivenOver5mi,
`Vehicles__c_code` AS orgName,
`Vehicles__s_site_name` AS site,
state,
`Vehicles__v_vehicle_name` AS vehicleName,
fuelType AS fuelType,
`Vehicles__v_asset_type` AS vehicleType,
`Vehicles__v_make` AS make,
`Vehicles__v_model` AS model,
`Vehicles__v_model_year` AS modelYear,
`Vehicles__V_Daily_Target_Mi_Or_100` AS dailyTargetMiles
FROM
evs_data
)
SELECT
vin,
STR_TO_DATE(datetime, '%Y-%m-%d') as dateTime,
milesDriven,
MAX(milesDriven) OVER (PARTITION BY vin) as maxMilesDriven,
drivenOver5mi,
dailyTargetMiles,
(milesDriven / dailyTargetMiles) AS contractedMileagePercentageUtilizationDaily,
orgName,
CASE
WHEN state = 'Alabama' THEN 'AL'
WHEN state = 'Alaska' THEN 'AK'
WHEN state = 'Arizona' THEN 'AZ'
WHEN state = 'Arkansas' THEN 'AR'
WHEN state = 'California' THEN 'CA'
WHEN state = 'Colorado' THEN 'CO'
WHEN state = 'Connecticut' THEN 'CT'
WHEN state = 'Delaware' THEN 'DE'
WHEN state = 'Florida' THEN 'FL'
WHEN state = 'Georgia' THEN 'GA'
WHEN state = 'Hawaii' THEN 'HI'
WHEN state = 'Idaho' THEN 'ID'
WHEN state = 'Illinois' THEN 'IL'
WHEN state = 'Indiana' THEN 'IN'
WHEN state = 'Iowa' THEN 'IA'
WHEN state = 'Kansas' THEN 'KS'
WHEN state = 'Kentucky' THEN 'KY'
WHEN state = 'Louisiana' THEN 'LA'
WHEN state = 'Maine' THEN 'ME'
WHEN state = 'Maryland' THEN 'MD'
WHEN state = 'Massachusetts' THEN 'MA'
WHEN state = 'Michigan' THEN 'MI'
WHEN state = 'Minnesota' THEN 'MN'
WHEN state = 'Mississippi' THEN 'MS'
WHEN state = 'Missouri' THEN 'MO'
WHEN state = 'Montana' THEN 'MT'
WHEN state = 'Nebraska' THEN 'NE'
WHEN state = 'Nevada' THEN 'NV'
WHEN state = 'New Hampshire' THEN 'NH'
WHEN state = 'New Jersey' THEN 'NJ'
WHEN state = 'New Mexico' THEN 'NM'
WHEN state = 'New York' THEN 'NY'
WHEN state = 'North Carolina' THEN 'NC'
WHEN state = 'North Dakota' THEN 'ND'
WHEN state = 'Ohio' THEN 'OH'
WHEN state = 'Oklahoma' THEN 'OK'
WHEN state = 'Oregon' THEN 'OR'
WHEN state = 'Pennsylvania' THEN 'PA'
WHEN state = 'Rhode Island' THEN 'RI'
WHEN state = 'South Carolina' THEN 'SC'
WHEN state = 'South Dakota' THEN 'SD'
WHEN state = 'Tennessee' THEN 'TN'
WHEN state = 'Texas' THEN 'TX'
WHEN state = 'Utah' THEN 'UT'
WHEN state = 'Vermont' THEN 'VT'
WHEN state = 'Virginia' THEN 'VA'
WHEN state = 'Washington' THEN 'WA'
WHEN state = 'West Virginia' THEN 'WV'
WHEN state = 'Wisconsin' THEN 'WI'
WHEN state = 'Wyoming' THEN 'WY'
ELSE state
END AS state,
site,
vehicleName,
CASE
WHEN fuelType = 'Flexible Fuel Vehicle (FFV)' THEN 'Flex Fuel'
WHEN fuelType = 'Gas/Electric Hybrid' THEN 'Hybrid'
WHEN fuelType = 'Plug-in Gas/Electric Hybrid' THEN 'Hybrid'
ELSE fuelType
END as fuelType,
CASE
WHEN fuelType = 'Electric' THEN 'EV'
ELSE 'ICE'
END as fuelCategory,
vehicleType,
make,
model,
modelYear
FROM
all_vehicles_daily
WHERE
DAYOFWEEK(STR_TO_DATE(datetime, '%Y-%m-%d')) NOT IN (1, 7)
ORDER by
vin,
dateTime
) AS `source`
WHERE
(`source`.`fuelCategory` = 'ICE')
AND (`source`.`maxMilesDriven` < 100)
GROUP BY
`source`.`vin`,
(
FLOOR((`source`.`milesDriven` - 116.0) / (1.0 / 10.0)) * (1.0 / 10.0)
) + 116.0
ORDER BY
`source`.`vin` ASC,
(
FLOOR((`source`.`milesDriven` - 116.0) / (1.0 / 10.0)) * (1.0 / 10.0)
) + 116.0 ASC
and diagnostic info:
{
"browser-info": {
"language": "en",
"platform": "MacIntel",
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"vendor": "Google Inc."
},
"system-info": {
"file.encoding": "UTF-8",
"java.runtime.name": "OpenJDK Runtime Environment",
"java.runtime.version": "11.0.20.1+1",
"java.vendor": "Eclipse Adoptium",
"java.vendor.url": "https://adoptium.net/",
"java.version": "11.0.20.1",
"java.vm.name": "OpenJDK 64-Bit Server VM",
"java.vm.version": "11.0.20.1+1",
"os.name": "Linux",
"os.version": "4.14.322-170.538.amzn1.x86_64",
"user.language": "en",
"user.timezone": "US/Pacific"
},
"metabase-info": {
"databases": [
"h2",
"mysql",
"athena"
],
"hosting-env": "unknown",
"application-database": "mysql",
"application-database-details": {
"database": {
"name": "MySQL",
"version": "8.0.33"
},
"jdbc-driver": {
"name": "MariaDB Connector/J",
"version": "2.7.6"
}
},
"run-mode": "prod",
"version": {
"date": "2023-10-11",
"tag": "v0.47.4",
"branch": "?",
"hash": "c96dc65"
},
"settings": {
"report-timezone": null
}
}
}
Thanks!