As featured in: Data Management Solutions Using SAS Hash Table Operations: A Business Intelligence Case Study , and built from this github repository.
generatepitchandpadata.sas
1 /* "generatepitchandpadata.sas" from the SAS Press book
2  Data Management Solutions Using SAS Hash Table Operations:
3  A Business Intelligence Case Study
4 */
5 
6 %macro generatePitchAndPAData
7  (from =
8  ,nweeks =
9  );
10 
11  %local to;
12  %let from = %sysfunc(inputn(&from,date9.));
13  %let to = %eval(&from + &nweeks*7 - 1);
14 
15  %do date = &from %to &to;
16 
17  data _null_;
18 
19  if 0 then set template.AtBats template.Pitches template.Runs;
20 
21  retain Inning 1 Pitcher_ID . Date &date;
22  length data_to_load $16;
23  array runners(*) onFirst onSecond onThird;
24 
25  if _n_ = 1 then
26  do; /* define the needed hash tables */
27 
28  declare hash pitch_dist(ordered:"A");
29  rc = pitch_dist.DefineKey("Index");
30  rc = pitch_dist.DefineData("Index","Result","AB_Done","Is_An_Ab","Is_An_Out","Is_A_Hit","Is_An_OnBase"
31  ,"Bases","Runners_Advance_Factor","Pitch_Distribution_SK");
32  rc = pitch_dist.DefineDone();
33  lr = 0;
34  do until(lr);
35  set bizarro.pitch_distribution end=lr;
36  do Index = From to To;
37  rc =pitch_dist.add();
38  end;
39  end;
40  lr=0;
41 
42  declare hash hit_distance(dataset:"bizarro.hit_distance"
43  || "(rename=(Pitch_Distribution_FK=Pitch_Distribution_SK))");
44  rc = hit_distance.DefineKey("Pitch_Distribution_SK");
45  rc = hit_distance.DefineData("MinDistance","MaxDistance");
46  rc = hit_distance.DefineDone();
47 
48 
49  declare hash batters(ordered:"A",multidata:"Y");
50  rc = batters.DefineKey("Top_Bot","Batting_Order");
51  rc = batters.DefineData("Team_SK","Top_Bot","Batter_ID","First_Name","Last_Name","Position_Code","Bats","Throws");
52  rc = batters.DefineDone();
53 
54  declare hash pitchers(ordered:"D",multidata:"Y");
55  rc = pitchers.DefineKey("Top_Bot");
56  rc = pitchers.DefineData("Team_SK","Top_Bot","Pitcher_ID","First_Name","Last_Name","Position_Code","Pitcher_Bats","Pitcher_Throws");
57  rc = pitchers.DefineDone();
58 
59  if exist("bizarro.Runs")
60  then data_to_load = "bizarro.Runs";
61  else data_to_load = "template.Runs";
62  declare hash facts_runs(dataset:data_to_load
63  ,ordered:"A"
64  ,multidata:"Y");
65  facts_runs.DefineKey("Date","Game_SK");
66  facts_runs.DefineData("Game_SK","Date","Batter_ID"
67  ,"Inning","Top_Bot"
68  ,"AB_Number","Runner_ID");
69  facts_runs.DefineDone();
70 
71  if exist("bizarro.Pitches") then data_to_load = "bizarro.Pitches";
72  else data_to_load = "template.Pitches";
73  declare hash facts_pitches(dataset:data_to_load,ordered:"A",multidata:"Y");
74  facts_pitches.DefineKey("Date","Game_SK");
75  facts_pitches.DefineData("Game_SK","Date","Team_SK","Pitcher_ID","Pitcher_First_Name","Pitcher_Last_Name"
76  ,"Pitcher_Bats","Pitcher_Throws","Pitcher_Type","Inning","Top_Bot","Result","AB_Number","Outs"
77  ,"Balls","Strikes","Pitch_Number","Is_A_Ball","Is_A_Strike","onBase");
78  facts_pitches.DefineDone();
79 
80  if exist("bizarro.AtBats") then data_to_load = "bizarro.AtBats";
81  else data_to_load = "template.AtBats";
82  declare hash facts_atbats(dataset:data_to_load,ordered:"A",multidata:"Y");
83  facts_atbats.DefineKey("Date","Game_SK");
84  facts_atbats.DefineData("Game_SK","Date","Time","League","Away_SK","Home_SK","Team_SK"
85  ,"Batter_ID","First_Name","Last_Name","Position_Code","Inning"
86  ,"Top_Bot","Bats","Throws","AB_Number","Result","Direction","Distance"
87  ,"Outs","Balls","Strikes","onFirst","onSecond","onThird","onBase"
88  ,"Left_On_Base","Runs","Is_An_AB","Is_An_Out","Is_A_Hit","Is_An_OnBase"
89  ,"Bases","Number_of_Pitches");
90  facts_atbats.DefineDone();
91 
92  end; /* define the needed hash tables */
93 
94  if lr then
95  do; /* output the updated fact tables */
96  facts_runs.output(dataset:"bizarro.Runs");
97  facts_pitches.output(dataset:"bizarro.Pitches");
98  facts_atbats.output(dataset:"bizarro.AtBats");
99  end; /* output the updated fact tables */
100 
101  set bizarro.games end=lr;
102  where date=&date;
103  League = League_SK; /* fix/hack for missed rename */
104 
105  if game_sk ne lag(game_sk) then
106  do; /* delete existing rows for this game */
107  if facts_runs.check() = 0 then facts_runs.remove();
108  if facts_pitches.check() = 0 then facts_pitches.remove();
109  if facts_atbats.check() = 0 then facts_atbats.remove();
110  end; /* delete existing rows for this game */
111 
112  /* load the batter data for this game */
113  rc = batters.clear();
114  rc = pitchers.clear();
115  Top_Bot = "T";
116  do team_sk = away_sk, home_sk;
117  do until(_iorc_ ne 0);
118  set bizarro.LineUps key = LineUp;
119  if _iorc_ = 0 then
120  do; /* row found and read */
121  rc = batters.add(Key:Top_Bot,Key:Batting_Order,Data:Team_SK,Data:Top_Bot,Data:Player_ID,Data:First_Name,Data:Last_Name,Data:Position_Code,Data:Bats,Data:Throws);
122  if Position_Code in ("SP" "RP") then rc = pitchers.add(Key:Top_Bot,Data:Team_SK,Data:Top_Bot,Data:Player_ID,Data:First_Name,Data:Last_Name,Data:Position_Code,Data:Bats,Data:Throws);
123  end; /* row found and read */
124  end;
125  Top_Bot = "B";
126  end;
127 
128  Team_SK = .;
129 
130  _error_ = 0; /* suppress the error message when the indexed read failed as expected */
131 
132  array _halfInning(2) ab_t ab_b;
133  call missing(ab_t,ab_b);
134 
135  do Inning = 1 to 9;
136  ab_index = 0;
137  do Top_Bot = "T", "B";
138  ab_index + 1;
139  rc = pitchers.find(key:Top_Bot);
140 
141  rc = pitchers.has_next(result:not_last);
142  if inning ge 6 and not_last then
143  do;
144  rc=pitchers.removeDup();
145  pitchers.find(key:Top_Bot);
146  end;
147 
148  Pitcher_First_Name = First_Name;
149  Pitcher_Last_Name = Last_Name;
150  Pitcher_Type = Position_Code;
151  outs = 0;
152  call missing(onFirst,onSecond,onThird);
153  do until(outs=3);
154  Is_An_Out = .;
155  Balls = 0;
156  Strikes = 0;
157  Pitch_Number = 0;
158  _halfInning(ab_index) + 1;
159  AB_Number = _halfInning(ab_index);
160  rc = batters.find(key:Top_Bot,Key:mod(AB_Number-1,&nBattersPerGame)+1);
161 
162  rc = batters.has_next(result:not_last);
163  if mod(AB_Number,&nBattersPerGame)= 0 and inning ge 6 and not_last then
164  do;
165  rc=batters.removeDup();
166  batters.find(key:Top_Bot,Key:mod(AB_Number-1,&nBattersPerGame)+1);
167  end;
168 
169  do until(AB_Done);
170  Pitch_Number+1;
171  Index = ceil(100*uniform(&seed6*&date));
172  rc = pitch_dist.find();
173  if not AB_Done then
174  do; /* continue the Plate Appearance */
175  call missing(Is_A_Ball,Is_A_Strike);
176  if Result = "Ball" then Is_A_Ball = 1;
177  else if find(Result,"strike","i") then Is_A_Strike = 1;
178  else if Result = "Foul" and Strikes lt 2 then Is_A_Strike = 1;
179  Balls + Is_A_Ball;
180  Strikes + Is_A_Strike;
181  facts_pitches.add();
182  AB_Done = (Balls = 4 or Strikes = 3);
183  if Balls = 4 then
184  do; /* set needed values for a walk */
185  Result = "Walk";
186  call missing(Is_An_AB,Is_An_Out,Is_A_Hit);
187  Bases = 1;
188  Runners_Advance_Factor = 1;
189  Is_An_OnBase = 1;
190  end; /* set needed values for a walk */
191  else if Strikes = 3 then
192  do; /* set needed values for a strikeout */
193  Result = "Strikeout";
194  Is_An_AB = 1;
195  Is_An_Out = 1;
196  Is_An_OnBase = 0;
197  call missing(Is_A_Hit,Bases);
198  end; /* set needed values for a walk */
199  end; /* continue the Plate Appearance */
200  else facts_pitches.add();
201  if ab_done then
202  do; /* create needed result fields */
203  call missing(MinDistance,MaxDistance,Direction,Distance);
204  if hit_distance.find()=0 then
205  do; /* calculate direction and distance */
206  Direction = ceil(18*uniform(&seed7*&date));
207  Distance = MinDistance + ceil((MaxDistance-MinDistance)*uniform(&seed8*&date));
208  end; /* calculate direction and distance */
209  onBase = 3 - nmiss(of runners(*));
210  Runs = 0;
211  if Bases then
212  do; /* advance runners */
213  Left_On_Base = 0;
214  Advance = Bases + (Runners_Advance_Factor > uniform(&seed9*Date));
215  do i = dim(runners) to 1 by -1;
216  if runners(i) then
217  do; /* advance runner on this base */
218  if i+Advance ge 4 then
219  do; /* runners scored */
220  Runs + 1;
221  Runner_ID = runners(i);
222  facts_runs.add();
223  end; /* runners scored */
224  else runners(i+Advance) = runners(i);
225  runners(i) = .;
226  end; /* advance runner on this base */
227  end;
228  if bases lt 4 then runners(bases) = batter_id;
229  else
230  do; /* runners scored */
231  Runs + 1;
232  Runner_ID = Batter_ID;
233  facts_runs.add();
234  end; /* runners scored */
235  end; /* advance runners */
236  else Left_On_Base = onBase;
237  Number_of_Pitches = Pitch_Number;
238  facts_atbats.add();
239  outs + Is_An_Out;
240  end; /* create needed result fields */
241  end; /* AB loop */
242  end; /* Outs Loop */
243  end; /* Innings Loop */
244  end; /* cheating a bit here - discuss with Paul */
245  run;
246  %end;
247 %mend generatePitchAndPAData;