-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBigquery code.sql
76 lines (71 loc) · 1.8 KB
/
Bigquery code.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Using NCAA Basketball data from bigquery public dataset
# Create table in your dataset with the data that you want to use
CREATE TABLE IF NOT EXISTS `bigdatagroup2v1.bigdata.main_data`
AS
SELECT
Game_id, field_goals_made,
turnovers, offensive_rebounds,
defensive_rebounds, free_throws_made,
assists, blocks, steals ,win,
CASE
WHEN row_num <= 20862 THEN 'training'
WHEN row_num <= 26822 THEN 'evaluation'
ELSE 'prediction'
END AS dataframe
FROM
(
SELECT
Game_id, field_goals_made,
turnovers, offensive_rebounds,
defensive_rebounds, free_throws_made,
assists, blocks, steals ,win, ROW_NUMBER() OVER (partition by win) as row_num
FROM `bigquery-public-data.ncaa_basketball.mbb_teams_games_sr`
WHERE win is NOT NULL AND
turnovers IS NOT NULL AND
offensive_rebounds IS NOT NULL AND
defensive_rebounds IS NOT NULL AND
free_throws_made IS NOT NULL AND
assists IS NOT NULL AND
blocks IS NOT NULL AND
steals IS NOT NULL AND
field_goals_made IS NOT NULL
)
;
# Create model to predict win
CREATE MODEL IF NOT EXISTS
`bigdatagroup2v1.bigdata.win_model1`
OPTIONS
(MODEL_TYPE = 'BOOSTED_TREE_CLASSIFIER',
learn_rate = 0.1,
early_stop = TRUE,
input_label_cols=['win']) AS
SELECT
field_goals_made,
turnovers, offensive_rebounds,
defensive_rebounds, free_throws_made,
assists, blocks, steals ,win
FROM `bigdatagroup2v1.bigdata.train_V1`
WHERE win is NOT NULL
AND dataframe <> 'evaluation'
;
# Evaluate the model
SELECT
*
FROM
ML.EVALUATE (MODEL `bigdatagroup2v1.bigdata.win_model1`,
(
SELECT
*
FROM
`bigdatagroup2v1.bigdata.main_data`
WHERE
dataframe = 'evaluation'
)
)
;
# check feature importance
SELECT *
FROM
ML.FEATURE_IMPORTANCE(MODEL `bigdatagroup2v1.bigdata.win_model1`)
ORDER BY importance_weight DESC
;