As featured in: Data Management Solutions Using SAS Hash Table Operations: A Business Intelligence Case Study , and built from this github repository.
bizarroball.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Auto-generated file
4  @details The `build.sh` file in the https://github.com/allanbowe/bizarroball repo
5  is used to create this file.
6  @author Allan Bowe (derivative of work by Don Henderson and Paul Dorfman)
7  ///@cond INTERNAL
8 **/
9 
10 %let root=%sysfunc(pathname(sasuser)); /* change to another path as desired */
11 %*let root = /folders/myfolders/BizarroBall; /* use this for the University Edition */
12 
13 options dlcreatedir;
14 libname bizarro "&root/Data";
15 libname DW "&root/DW";
16 libname template "&root/Data/Template";
17 
18 /* SCD End Date - Used in Chapter 7 */
19 %let SCD_End_Date = '31DEC9999'd;
20 
21 /* The following macro variables are only used in the programs/macros
22  to generate the sample Bizarro Ball data.
23 */
24 
25 /* Parameters for creating the data */
26 %let nTeamsPerLeague = 16;
27 %let seasonStartDate = 20MAR2017;
28 %let nWeeksSeason = %eval((&nTeamsPerLeague-1)*2);
29 %let nPlayersPerTeam = 25;
30 %let nBattersPerGame = 9;
31 
32 /* Random Number Seeds */
33 %let seed1 = 54321; /* used in S0100 GenerateTeams.sas */
34 %let seed2 = 98765; /* used in S0300 GeneratePlayerCandidates.sas */
35 %let seed3 = 76543; /* used in S0300 GeneratePlayerCandidates.sas */
36 %let seed4 = 11; /* used in S0500 GenerateSchedule.sas */
37 %let seed5 = 9887; /* used in macro generatelinesups.sas */
38 %let seed6 = 9973; /* used in macro generatepitchandpadata.sas */
39 %let seed7 = 101; /* used in macro generatepitchandpadata.sas */
40 %let seed8 = 10663; /* used in macro generatepitchandpadata.sas */
41 %let seed9 = 10753; /* used in macro generatepitchandpadata.sas */
42 %let seed10 = 98999; /* used in S0300 GeneratePlayerCandidates.sas */
43 %let seed11 = 99223; /* used in S0300 GeneratePlayerCandidates.sas */
44 
45 
46 /* now include macros & datalines */
47 /* "createhash.sas" from the SAS Press book
48  Data Management Solutions Using SAS Hash Table Operations:
49  A Business Intelligence Case Study
50 */
51 
52 %macro createHash
53  (lib = dw
54  ,hashTable = hashTable
55  ,metaData = template.Schema_Metadata
56  );
57 
58  if 0 then set template.&hashTable;
59  dcl hash _&hashTable(dataset:"&lib..&hashtable"
60  ,multidata:"Y"
61  ,ordered:"A");
62  lr = 0;
63  do while(lr=0);
64  set &metadata end=lr;
65  where upcase(hashTable) = "%upcase(&hashTable)";
66  if is_a_key then _&hashTable..DefineKey(Column);
67  _&hashTable..DefineData(Column);
68  end;
69  _&hashTable..DefineDone();
70 
71 %mend createHash;
72 /* "generatelineups.sas" from the SAS Press book
73  Data Management Solutions Using SAS Hash Table Operations:
74  A Business Intelligence Case Study
75 */
76 
77 %macro generateLineUps
78  (from =
79  ,nweeks =
80  );
81 
82  %local to;
83  %let from = %sysfunc(inputn(&from,date9.));
84  %let to = %eval(&from + &nweeks*7 - 1);
85 
86  %do date = &from %to &to;
87 
88  data _null_;
89 
90  retain Date &Date;
91  if 0 then
92  do; /* define vars to PDV */
93  set template.LineUps bizarro.Positions_Snowflake;
94  Away_SK = Team_SK;
95  Home_SK = Team_SK;
96  end; /* define vars to PDV */
97 
98  /* load hash table with number of starters for each position */
99  declare hash positions(dataset:"bizarro.Positions");
100  positions.defineKey("Position_Grp_SK");
101  positions.defineData("Position_Grp_SK","Position_Code","Count","Starters");
102  positions.defineDone();
103  declare hiter positionIter("positions");
104 
105  declare hash positions_snowflake(dataset:"bizarro.Positions_Snowflake",ordered:"A",multidata:"Y");
106  positions_snowflake.defineKey("Position_Grp_FK");
107  positions_snowflake.defineData("Position_Grp_FK","Position_Code");
108  positions_snowflake.defineDone();
109 
110  declare hash LineUp(multidata:"Y");
111  rc = LineUp.defineKey("Game_SK","Team_SK");
112  rc = LineUp.defineData("Game_SK","Team_SK","Date","Batting_Order","Player_ID","First_Name","Last_Name","Position_Code","Bats","Throws");
113  rc = LineUp.defineDone();
114 
115  declare hash players(dataset:"bizarro.Player_Candidates(where=(Team_SK))",ordered:"A",multidata:"Y");
116  rc = players.defineKey("Team_SK","Position_Code");
117  rc = players.defineData("Team_SK","Player_ID","First_Name","Last_Name","Bats","Throws");
118  rc = players.defineDone();
119 
120  do while (lr = 0);
121  set bizarro.trades end = lr;
122  where trade_date le &date;
123  do while(players.do_over(Key:_team_sk,Key:_Position_Code) = 0);
124  if player_id = traded_id then
125  do; /* player traded - delete and re-add */
126  players.removeDup();
127  team_sk = traded_to;
128  Position_Code = _Position_Code;
129  players.add();
130  leave;
131  end;
132  end;
133  end;
134 
135  declare hash games(dataset:"bizarro.Games(where=(date=&date))",multidata:"Y");
136  rc = games.defineKey("Date");
137  rc = games.defineData("Game_SK","Away_SK","Home_SK");
138  rc = games.defineDone();
139 
140  games_rc = games.find(Key:&date);
141  do while (games_rc = 0);
142  do team_sk = away_sk, home_sk;
143  rc = positionIter.first();
144  P_Grp = Position_Code;
145  grp_rc = positions_snowflake.find(Key:Position_Grp_SK);
146  do while(rc=0);
147  prc = players.find(Key:Team_SK,Key:P_grp);
148  do while(prc = 0);
149  Batting_Order = uniform(&seed5*&date);
150  if divide(Starters,Count) gt Batting_Order then
151  do; /* select this player */
152  if position_code = "SP" then Batting_Order = 9;
153  rc = LineUp.add();
154  if grp_rc = 0 then grp_rc = positions_snowflake.find_next(Key:Position_Grp_SK);
155  Starters + (-1); /* need one less player */
156  end; /* select this player */
157  else if position_code ne "SP" then
158  do; /* pinch hitters and relief pitchers */
159  _position_code = position_code;
160  if position_code = 'RP' then Batting_Order = 1e6;
161  else
162  do;/* assign as a pinch hitter for the pitcher */
163  Batting_Order + 9;
164  Position_Code = 'PH';
165  end; /* assign as a pinch hitter for the pitcher */
166  LineUp.add();
167  position_code = _position_code;
168  end; /* pinch hitters and relief pitchers */
169  Count + (-1); /* regardless have one less player */
170  prc = players.find_next(Key:Team_SK,Key:P_grp);
171  end;
172  *if Position_Code = 'UT' then
173  do; /* add utility players as PHers */;
174  * Batting_Order = 9 + uniform(&seed5*&date);
175  *Position_Code = 'PH';
176  * LineUp.add();
177  *end; /* add utility players as PHers */;
178  rc=positionIter.next();
179  P_Grp = Position_Code;
180  grp_rc = positions_snowflake.find(Key:Position_Grp_SK);
181  end;
182  end;
183  games_rc = games.find_next();
184  end;
185  LineUp.output(dataset:"Lineups");
186  stop;
187  run;
188 
189  proc sort data = lineups out = lineups;
190  by game_sk team_sk batting_order;
191  run;
192 
193  %if %sysfunc(exist(bizarro.LineUps)) %then
194  %do; /* delete existing rows for these games */
195  proc sql;
196  delete from bizarro.Lineups
197  where Game_SK in (select distinct Game_SK from Lineups);
198  quit;
199  %end; /* delete existing rows for these games */
200  %else
201  %do; /* create the initial data set */
202  data bizarro.LineUps;
203  set Lineups(obs=0);
204  run;
205  %end; /* create initial data set */
206 
207  data bizarro.LineUps(index=(LineUp=(Game_SK Team_SK)));
208  set bizarro.Lineups
209  LineUps(in = new);
210  drop Order;
211  by game_sk team_sk;
212  if first.team_sk then Order=0;
213  Order+1;
214  if batting_order ne int(batting_order) and new then batting_order = min(Order,9);
215  run;
216  %end;
217 %mend generateLineUps;
218 /* "generatepitchandpadata.sas" from the SAS Press book
219  Data Management Solutions Using SAS Hash Table Operations:
220  A Business Intelligence Case Study
221 */
222 
223 %macro generatePitchAndPAData
224  (from =
225  ,nweeks =
226  );
227 
228  %local to;
229  %let from = %sysfunc(inputn(&from,date9.));
230  %let to = %eval(&from + &nweeks*7 - 1);
231 
232  %do date = &from %to &to;
233 
234  data _null_;
235 
236  if 0 then set template.AtBats template.Pitches template.Runs;
237 
238  retain Inning 1 Pitcher_ID . Date &date;
239  length data_to_load $16;
240  array runners(*) onFirst onSecond onThird;
241 
242  if _n_ = 1 then
243  do; /* define the needed hash tables */
244 
245  declare hash pitch_dist(ordered:"A");
246  rc = pitch_dist.DefineKey("Index");
247  rc = pitch_dist.DefineData("Index","Result","AB_Done","Is_An_Ab","Is_An_Out","Is_A_Hit","Is_An_OnBase"
248  ,"Bases","Runners_Advance_Factor","Pitch_Distribution_SK");
249  rc = pitch_dist.DefineDone();
250  lr = 0;
251  do until(lr);
252  set bizarro.pitch_distribution end=lr;
253  do Index = From to To;
254  rc =pitch_dist.add();
255  end;
256  end;
257  lr=0;
258 
259  declare hash hit_distance(dataset:"bizarro.hit_distance"
260  || "(rename=(Pitch_Distribution_FK=Pitch_Distribution_SK))");
261  rc = hit_distance.DefineKey("Pitch_Distribution_SK");
262  rc = hit_distance.DefineData("MinDistance","MaxDistance");
263  rc = hit_distance.DefineDone();
264 
265 
266  declare hash batters(ordered:"A",multidata:"Y");
267  rc = batters.DefineKey("Top_Bot","Batting_Order");
268  rc = batters.DefineData("Team_SK","Top_Bot","Batter_ID","First_Name","Last_Name","Position_Code","Bats","Throws");
269  rc = batters.DefineDone();
270 
271  declare hash pitchers(ordered:"D",multidata:"Y");
272  rc = pitchers.DefineKey("Top_Bot");
273  rc = pitchers.DefineData("Team_SK","Top_Bot","Pitcher_ID","First_Name","Last_Name","Position_Code","Pitcher_Bats","Pitcher_Throws");
274  rc = pitchers.DefineDone();
275 
276  if exist("bizarro.Runs")
277  then data_to_load = "bizarro.Runs";
278  else data_to_load = "template.Runs";
279  declare hash facts_runs(dataset:data_to_load
280  ,ordered:"A"
281  ,multidata:"Y");
282  facts_runs.DefineKey("Date","Game_SK");
283  facts_runs.DefineData("Game_SK","Date","Batter_ID"
284  ,"Inning","Top_Bot"
285  ,"AB_Number","Runner_ID");
286  facts_runs.DefineDone();
287 
288  if exist("bizarro.Pitches") then data_to_load = "bizarro.Pitches";
289  else data_to_load = "template.Pitches";
290  declare hash facts_pitches(dataset:data_to_load,ordered:"A",multidata:"Y");
291  facts_pitches.DefineKey("Date","Game_SK");
292  facts_pitches.DefineData("Game_SK","Date","Team_SK","Pitcher_ID","Pitcher_First_Name","Pitcher_Last_Name"
293  ,"Pitcher_Bats","Pitcher_Throws","Pitcher_Type","Inning","Top_Bot","Result","AB_Number","Outs"
294  ,"Balls","Strikes","Pitch_Number","Is_A_Ball","Is_A_Strike","onBase");
295  facts_pitches.DefineDone();
296 
297  if exist("bizarro.AtBats") then data_to_load = "bizarro.AtBats";
298  else data_to_load = "template.AtBats";
299  declare hash facts_atbats(dataset:data_to_load,ordered:"A",multidata:"Y");
300  facts_atbats.DefineKey("Date","Game_SK");
301  facts_atbats.DefineData("Game_SK","Date","Time","League","Away_SK","Home_SK","Team_SK"
302  ,"Batter_ID","First_Name","Last_Name","Position_Code","Inning"
303  ,"Top_Bot","Bats","Throws","AB_Number","Result","Direction","Distance"
304  ,"Outs","Balls","Strikes","onFirst","onSecond","onThird","onBase"
305  ,"Left_On_Base","Runs","Is_An_AB","Is_An_Out","Is_A_Hit","Is_An_OnBase"
306  ,"Bases","Number_of_Pitches");
307  facts_atbats.DefineDone();
308 
309  end; /* define the needed hash tables */
310 
311  if lr then
312  do; /* output the updated fact tables */
313  facts_runs.output(dataset:"bizarro.Runs");
314  facts_pitches.output(dataset:"bizarro.Pitches");
315  facts_atbats.output(dataset:"bizarro.AtBats");
316  end; /* output the updated fact tables */
317 
318  set bizarro.games end=lr;
319  where date=&date;
320  League = League_SK; /* fix/hack for missed rename */
321 
322  if game_sk ne lag(game_sk) then
323  do; /* delete existing rows for this game */
324  if facts_runs.check() = 0 then facts_runs.remove();
325  if facts_pitches.check() = 0 then facts_pitches.remove();
326  if facts_atbats.check() = 0 then facts_atbats.remove();
327  end; /* delete existing rows for this game */
328 
329  /* load the batter data for this game */
330  rc = batters.clear();
331  rc = pitchers.clear();
332  Top_Bot = "T";
333  do team_sk = away_sk, home_sk;
334  do until(_iorc_ ne 0);
335  set bizarro.LineUps key = LineUp;
336  if _iorc_ = 0 then
337  do; /* row found and read */
338  rc = batters.add(Key:Top_Bot,Key:Batting_Order,Data:Team_SK,Data:Top_Bot,Data:Player_ID,Data:First_Name,Data:Last_Name,Data:Position_Code,Data:Bats,Data:Throws);
339  if Position_Code in ("SP" "RP") then rc = pitchers.add(Key:Top_Bot,Data:Team_SK,Data:Top_Bot,Data:Player_ID,Data:First_Name,Data:Last_Name,Data:Position_Code,Data:Bats,Data:Throws);
340  end; /* row found and read */
341  end;
342  Top_Bot = "B";
343  end;
344 
345  Team_SK = .;
346 
347  _error_ = 0; /* suppress the error message when the indexed read failed as expected */
348 
349  array _halfInning(2) ab_t ab_b;
350  call missing(ab_t,ab_b);
351 
352  do Inning = 1 to 9;
353  ab_index = 0;
354  do Top_Bot = "T", "B";
355  ab_index + 1;
356  rc = pitchers.find(key:Top_Bot);
357 
358  rc = pitchers.has_next(result:not_last);
359  if inning ge 6 and not_last then
360  do;
361  rc=pitchers.removeDup();
362  pitchers.find(key:Top_Bot);
363  end;
364 
365  Pitcher_First_Name = First_Name;
366  Pitcher_Last_Name = Last_Name;
367  Pitcher_Type = Position_Code;
368  outs = 0;
369  call missing(onFirst,onSecond,onThird);
370  do until(outs=3);
371  Is_An_Out = .;
372  Balls = 0;
373  Strikes = 0;
374  Pitch_Number = 0;
375  _halfInning(ab_index) + 1;
376  AB_Number = _halfInning(ab_index);
377  rc = batters.find(key:Top_Bot,Key:mod(AB_Number-1,&nBattersPerGame)+1);
378 
379  rc = batters.has_next(result:not_last);
380  if mod(AB_Number,&nBattersPerGame)= 0 and inning ge 6 and not_last then
381  do;
382  rc=batters.removeDup();
383  batters.find(key:Top_Bot,Key:mod(AB_Number-1,&nBattersPerGame)+1);
384  end;
385 
386  do until(AB_Done);
387  Pitch_Number+1;
388  Index = ceil(100*uniform(&seed6*&date));
389  rc = pitch_dist.find();
390  if not AB_Done then
391  do; /* continue the Plate Appearance */
392  call missing(Is_A_Ball,Is_A_Strike);
393  if Result = "Ball" then Is_A_Ball = 1;
394  else if find(Result,"strike","i") then Is_A_Strike = 1;
395  else if Result = "Foul" and Strikes lt 2 then Is_A_Strike = 1;
396  Balls + Is_A_Ball;
397  Strikes + Is_A_Strike;
398  facts_pitches.add();
399  AB_Done = (Balls = 4 or Strikes = 3);
400  if Balls = 4 then
401  do; /* set needed values for a walk */
402  Result = "Walk";
403  call missing(Is_An_AB,Is_An_Out,Is_A_Hit);
404  Bases = 1;
405  Runners_Advance_Factor = 1;
406  Is_An_OnBase = 1;
407  end; /* set needed values for a walk */
408  else if Strikes = 3 then
409  do; /* set needed values for a strikeout */
410  Result = "Strikeout";
411  Is_An_AB = 1;
412  Is_An_Out = 1;
413  Is_An_OnBase = 0;
414  call missing(Is_A_Hit,Bases);
415  end; /* set needed values for a walk */
416  end; /* continue the Plate Appearance */
417  else facts_pitches.add();
418  if ab_done then
419  do; /* create needed result fields */
420  call missing(MinDistance,MaxDistance,Direction,Distance);
421  if hit_distance.find()=0 then
422  do; /* calculate direction and distance */
423  Direction = ceil(18*uniform(&seed7*&date));
424  Distance = MinDistance + ceil((MaxDistance-MinDistance)*uniform(&seed8*&date));
425  end; /* calculate direction and distance */
426  onBase = 3 - nmiss(of runners(*));
427  Runs = 0;
428  if Bases then
429  do; /* advance runners */
430  Left_On_Base = 0;
431  Advance = Bases + (Runners_Advance_Factor > uniform(&seed9*Date));
432  do i = dim(runners) to 1 by -1;
433  if runners(i) then
434  do; /* advance runner on this base */
435  if i+Advance ge 4 then
436  do; /* runners scored */
437  Runs + 1;
438  Runner_ID = runners(i);
439  facts_runs.add();
440  end; /* runners scored */
441  else runners(i+Advance) = runners(i);
442  runners(i) = .;
443  end; /* advance runner on this base */
444  end;
445  if bases lt 4 then runners(bases) = batter_id;
446  else
447  do; /* runners scored */
448  Runs + 1;
449  Runner_ID = Batter_ID;
450  facts_runs.add();
451  end; /* runners scored */
452  end; /* advance runners */
453  else Left_On_Base = onBase;
454  Number_of_Pitches = Pitch_Number;
455  facts_atbats.add();
456  outs + Is_An_Out;
457  end; /* create needed result fields */
458  end; /* AB loop */
459  end; /* Outs Loop */
460  end; /* Innings Loop */
461  end; /* cheating a bit here - discuss with Paul */
462  run;
463  %end;
464 %mend generatePitchAndPAData;
465 /* "AtBats.sas" from the SAS Press book
466  Data Management Solutions Using SAS Hash Table Operations:
467  A Business Intelligence Case Study
468 */
469 
470 proc sql;
471  create table TEMPLATE.ATBATS
472  (
473  Game_SK char(16) format=$HEX32. label = "Game Surrogate Key",
474  Date num format=YYMMDD10. label = "Game Date",
475  Time num format=TIMEAMPM8. label = "Game Time",
476  League num label = "League",
477  Home_SK num label = "Home Team Surrogate Key",
478  Away_SK num label = "Away Team Surrogate Key",
479  Team_SK num label = "Team Surrogate Key",
480  Batter_ID num label = "Batter ID",
481  First_Name char(12) label = "Batter First Name",
482  Last_Name char(12) label = "Batter Last Name",
483  Position_Code char(3) label "Batter Position",
484  Inning num label = "Inning",
485  Top_Bot char(1) label = "Which Half Inning",
486  Bats char(1) informat=$1. label = "Bats L, R or Switch",
487  Throws char(1) informat=$1. label = "Throws L or R",
488  AB_Number num label = "At Bat Number in Game",
489  Result char(16) label = "Result of the At Bat",
490  Direction num label='Hit Direction',
491  Distance num label = 'Hit Distance',
492  Outs num label = "Number of Outs",
493  Balls num label = "Number of Balls",
494  Strikes num label = "Number of Strikes",
495  onFirst num label = "ID of Runner on First",
496  onSecond num label = "ID of Runner on Second",
497  onThird num label = "ID of Runner on Third",
498  onBase num label = "Number of Men on Base at Beginning of AB",
499  Left_On_Base num label = "Number of Men Left on Base at End of AB",
500  Runs num label = "Runs Scored",
501  Is_An_AB num label = "Counts as an AB",
502  Is_An_Out num label = "Is an Out",
503  Is_A_Hit num label = "Is a Hit",
504  Is_An_OnBase num label = "Counts as an On Base",
505  Bases num label = "Number of Bases for the Hit",
506  Number_of_Pitches num label = "Number of Pitches This AB"
507  );
508 quit;
509 /*******************************************************************
510  Datalines for CHAPTER8PARMFILE dataset
511  Generated by %mp_ds2cards()
512  Licensed under GNU, available on github.com/boemska/macrocore
513 ********************************************************************/
514 data template.Chapter8parmfile ;
515 attrib
516 hashTable length= $32 label="Member Name"
517 Column length= $32 label="Column Name"
518 is_A_Key length= 8
519 ;
520 infile cards dsd delimiter=',';
521 input
522  hashTable :$char.
523  Column :$char.
524  is_A_Key
525 ;
526 datalines4;
527 BYDAYOFWEEK,DayOfWeek,1
528 BYDAYOFWEEK,PAs,0
529 BYDAYOFWEEK,AtBats,0
530 BYDAYOFWEEK,Hits,0
531 BYDAYOFWEEK,BA,0
532 BYDAYOFWEEK,OBP,0
533 BYDAYOFWEEK,SLG,0
534 BYDAYOFWEEK,OPS,0
535 BYDAYOFWEEK,_Bases,0
536 BYDAYOFWEEK,_Reached_Base,0
537 BYMONTH,Month,1
538 BYMONTH,PAs,0
539 BYMONTH,AtBats,0
540 BYMONTH,Hits,0
541 BYMONTH,BA,0
542 BYMONTH,OBP,0
543 BYMONTH,SLG,0
544 BYMONTH,OPS,0
545 BYMONTH,_Bases,0
546 BYMONTH,_Reached_Base,0
547 BYPLAYER,Last_Name,1
548 BYPLAYER,First_Name,1
549 BYPLAYER,Batter_ID,1
550 BYPLAYER,PAs,0
551 BYPLAYER,AtBats,0
552 BYPLAYER,Hits,0
553 BYPLAYER,BA,0
554 BYPLAYER,OBP,0
555 BYPLAYER,SLG,0
556 BYPLAYER,OPS,0
557 BYPLAYER,_Bases,0
558 BYPLAYER,_Reached_Base,0
559 BYPLAYERMONTH,Last_Name,1
560 BYPLAYERMONTH,First_Name,1
561 BYPLAYERMONTH,Batter_ID,1
562 BYPLAYERMONTH,Month,1
563 BYPLAYERMONTH,PAs,0
564 BYPLAYERMONTH,AtBats,0
565 BYPLAYERMONTH,Hits,0
566 BYPLAYERMONTH,BA,0
567 BYPLAYERMONTH,OBP,0
568 BYPLAYERMONTH,SLG,0
569 BYPLAYERMONTH,OPS,0
570 BYPLAYERMONTH,_Bases,0
571 BYPLAYERMONTH,_Reached_Base,0
572 BYTEAM,Team_Name,1
573 BYTEAM,Team_SK,1
574 BYTEAM,PAs,0
575 BYTEAM,AtBats,0
576 BYTEAM,Hits,0
577 BYTEAM,BA,0
578 BYTEAM,OBP,0
579 BYTEAM,SLG,0
580 BYTEAM,OPS,0
581 BYTEAM,_Bases,0
582 BYTEAM,_Reached_Base,0
583 ;;;;
584 run;
585 /*******************************************************************
586  Datalines for CHAPTER9LOOKUPTABLES dataset
587  Generated by %mp_ds2cards()
588  Licensed under GNU, available on github.com/boemska/macrocore
589 ********************************************************************/
590 data template.Chapter9lookuptables ;
591 attrib
592 hashTable length= $32 label="Member Name"
593 Column length= $32 label="Column Name"
594 Is_A_Key length= 8
595 datasetTag length= $200
596 ;
597 infile cards dsd delimiter=',';
598 input
599  hashTable :$char.
600  Column :$char.
601  Is_A_Key
602  datasetTag :$char.
603 ;
604 datalines4;
605 GAMES,Game_SK,1,DW.GAMES
606 GAMES,Date,0,DW.GAMES
607 GAMES,Time,0,DW.GAMES
608 GAMES,Year,0,DW.GAMES
609 GAMES,Month,0,DW.GAMES
610 GAMES,DayOfWeek,0,DW.GAMES
611 GAMES,League,0,DW.GAMES
612 GAMES,Home_SK,0,DW.GAMES
613 GAMES,Away_SK,0,DW.GAMES
614 PLAYERS,Player_ID,1,DW.PLAYERS
615 PLAYERS,Team_SK,0,DW.PLAYERS
616 PLAYERS,First_Name,0,DW.PLAYERS
617 PLAYERS,Last_Name,0,DW.PLAYERS
618 PLAYERS,Bats,0,DW.PLAYERS
619 PLAYERS,Throws,0,DW.PLAYERS
620 PLAYERS,Start_Date,0,DW.PLAYERS
621 PLAYERS,End_Date,0,DW.PLAYERS
622 TEAMS,Team_SK,1,DW.TEAMS
623 TEAMS,Team_Name,0,DW.TEAMS
624 TEAMS,League_SK,0,DW.TEAMS
625 ;;;;
626 run;
627 /*******************************************************************
628  Datalines for CHAPTER9SPLITS dataset
629  Generated by %mp_ds2cards()
630  Licensed under GNU, available on github.com/boemska/macrocore
631 ********************************************************************/
632 data template.Chapter9splits ;
633 attrib
634 hashTable length= $32 label="Member Name"
635 Column length= $32 label="Column Name"
636 is_A_Key length= 8
637 ;
638 infile cards dsd delimiter=',';
639 input
640  hashTable :$char.
641  Column :$char.
642  is_A_Key
643 ;
644 datalines4;
645 BYDAYOFWEEK,DayOfWeek,1
646 BYDAYOFWEEK,PAs,0
647 BYDAYOFWEEK,AtBats,0
648 BYDAYOFWEEK,Hits,0
649 BYDAYOFWEEK,BA,0
650 BYDAYOFWEEK,OBP,0
651 BYDAYOFWEEK,SLG,0
652 BYDAYOFWEEK,OPS,0
653 BYDAYOFWEEK,_Bases,0
654 BYDAYOFWEEK,_Reached_Base,0
655 BYMONTH,Month,1
656 BYMONTH,PAs,0
657 BYMONTH,AtBats,0
658 BYMONTH,Hits,0
659 BYMONTH,BA,0
660 BYMONTH,OBP,0
661 BYMONTH,SLG,0
662 BYMONTH,OPS,0
663 BYMONTH,_Bases,0
664 BYMONTH,_Reached_Base,0
665 BYPLAYER,Last_Name,1
666 BYPLAYER,First_Name,1
667 BYPLAYER,Player_ID,1
668 BYPLAYER,PAs,0
669 BYPLAYER,AtBats,0
670 BYPLAYER,Hits,0
671 BYPLAYER,BA,0
672 BYPLAYER,OBP,0
673 BYPLAYER,SLG,0
674 BYPLAYER,OPS,0
675 BYPLAYER,_Bases,0
676 BYPLAYER,_Reached_Base,0
677 BYPLAYERMONTH,Last_Name,1
678 BYPLAYERMONTH,First_Name,1
679 BYPLAYERMONTH,Player_ID,1
680 BYPLAYERMONTH,Month,1
681 BYPLAYERMONTH,PAs,0
682 BYPLAYERMONTH,AtBats,0
683 BYPLAYERMONTH,Hits,0
684 BYPLAYERMONTH,BA,0
685 BYPLAYERMONTH,OBP,0
686 BYPLAYERMONTH,SLG,0
687 BYPLAYERMONTH,OPS,0
688 BYPLAYERMONTH,_Bases,0
689 BYPLAYERMONTH,_Reached_Base,0
690 BYTEAM,Team_Name,1
691 BYTEAM,Team_SK,1
692 BYTEAM,PAs,0
693 BYTEAM,AtBats,0
694 BYTEAM,Hits,0
695 BYTEAM,BA,0
696 BYTEAM,OBP,0
697 BYTEAM,SLG,0
698 BYTEAM,OPS,0
699 BYTEAM,_Bases,0
700 BYTEAM,_Reached_Base,0
701 ;;;;
702 run;
703 /* "Games.sas" from the SAS Press book
704  Data Management Solutions Using SAS Hash Table Operations:
705  A Business Intelligence Case Study
706 */
707 
708 proc sql;
709  create table TEMPLATE.GAMES
710  (
711  Game_SK char(16) format=$HEX32. label = "Game Surrogate Key"
712  ,Date num format=YYMMDD10. label = "Game Date"
713  ,Time num format=TIMEAMPM8. label = "Game Time"
714  ,Year num label = "Year"
715  ,Month num label = "Month"
716  ,DayOfWeek num Label = "Day of the Week"
717  ,League_SK num label = "League"
718  ,Home_SK num label = "Home Team Surrogate Key"
719  ,Away_SK num label = "Away Team Surrogate Key"
720  );
721 quit;
722 
723 /* "LineUps.sas" from the SAS Press book
724  Data Management Solutions Using SAS Hash Table Operations:
725  A Business Intelligence Case Study
726 */
727 
728 proc sql;
729  create table TEMPLATE.LINEUPS
730  (
731  Game_SK char(16) format=$HEX32. label = "Game Surrogate Key",
732  Date num format=YYMMDD10. label = "Game Date",
733  Team_SK num label = "Team Surrogate Key",
734  Batting_Order num label = "Lineup Position",
735  Player_ID num format=Z5. label = "Player ID",
736  First_Name char(12) informat=$12. label = "First Name",
737  Last_Name char(12) informat=$12. label = "Last Name",
738  Position_Code char(3) informat=$3. label "Position",
739  Bats char(1) informat=$1. label = "Bats L, R or Switch",
740  Throws char(1) informat=$1. label = "Throws L or R"
741  );
742  create index LineUp on template.LINEUPS(Game_SK,Team_SK);
743 quit;
744 /* "Pitches.sas" from the SAS Press book
745  Data Management Solutions Using SAS Hash Table Operations:
746  A Business Intelligence Case Study
747 */
748 
749 proc sql;
750  create table TEMPLATE.PITCHES
751  (
752  Game_SK char(16) format=$HEX32. label = "Game Surrogate Key",
753  Date num format=YYMMDD10. label = "Game Date",
754  Team_SK num label = "Team Surrogate Key",
755  Pitcher_ID num label = "Pitcher_ID",
756  Pitcher_First_Name char(12) label = "Pitcher_First_Name",
757  Pitcher_Last_Name char(12) label = "Pitcher_Last_Name",
758  Pitcher_Bats char(1) informat=$1. label = "Bats L, R or Switch",
759  Pitcher_Throws char(1) informat=$1. label = "Throws L or R",
760  Pitcher_Type char(3) label "Starter or Reliever",
761  Inning num label = "Inning",
762  Top_Bot char(1) label = "Which Half Inning",
763  Result char(16) label = "Result of the At Bat",
764  AB_Number num label = "At Bat Number in Game",
765  Outs num label = "Number of Outs",
766  Balls num label = "Number of Balls",
767  Strikes num label = "Number of Strikes",
768  Pitch_Number num label = "Pitch Number in the AB",
769  Is_A_Ball num label = "Pitch is a Ball",
770  Is_A_Strike num label ="Pitch is Strike",
771  onBase num label ="Number of Men on Base"
772  );
773 quit;
774 /* "Player_Candidates.sas" from the SAS Press book
775  Data Management Solutions Using SAS Hash Table Operations:
776  A Business Intelligence Case Study
777 */
778 
779 proc sql;
780  create table TEMPLATE.PLAYER_CANDIDATES
781  (
782  Player_ID num format=Z5. label = "Player ID",
783  Team_SK num label = "Team Surrogate Key",
784  First_Name char(12) informat=$12. label = "First Name",
785  Last_Name char(12) informat=$12. label = "Last Name",
786  Position_Code char(3) informat=$3. label = "Batter Position",
787  Bats char(1) informat=$1. label = "Bats L, R or Switch",
788  Throws char(1) informat=$1. label = "Throws L or R"
789  );
790 quit;
791 /* "Player_SCD_All.sas" from the SAS Press book
792  Data Management Solutions Using SAS Hash Table Operations:
793  A Business Intelligence Case Study
794 */
795 
796 proc sql;
797  create table TEMPLATE.PLAYERS_SCD0
798  (
799  Player_ID num format=Z5. label = "Player ID",
800  Team_SK num label = "Team Surrogate Key",
801  First_Name char(12) informat=$12. label = "First Name",
802  Last_Name char(12) informat=$12. label = "Last Name",
803  Position_Code char(3) informat=$3. label "Batter Position",
804  Bats char(1) informat=$1. label = "Bats L, R or Switch",
805  Throws char(1) informat=$1. label = "Throws L or R"
806  );
807 quit;
808 
809 data TEMPLATE.PLAYERS_SCD1;
810  set TEMPLATE.PLAYERS_SCD0;
811 run;
812 
813 proc sql;
814  create table TEMPLATE.PLAYERS_SCD2
815  (
816  Player_ID num format=Z5. label = "Player ID",
817  Team_SK num label = "Team Surrogate Key",
818  First_Name char(12) informat=$12. label = "First Name",
819  Last_Name char(12) informat=$12. label = "Last Name",
820  Position_Code char(3) informat=$3. label "Batter Position",
821  Bats char(1) informat=$1. label = "Bats L, R or Switch",
822  Throws char(1) informat=$1. label = "Throws L or R",
823  Start_Date num format=YYMMDD10. label = "First Game Date",
824  End_Date num format=YYMMDD10. label = "Last Game Date"
825  );
826  create table TEMPLATE.PLAYERS LIKE TEMPLATE.PLAYERS_SCD2(drop=Position_Code);
827 quit;
828 
829 proc sql;
830  create table TEMPLATE.PLAYERS_SCD3
831  (
832  Player_ID num format=Z5. label = "Player ID",
833  Debut_Team_SK num label = "Debut Team Surrogate Key",
834  Team_SK num label = "Current Team Surrogate Key",
835  First_Name char(12) informat=$12. label = "First Name",
836  Last_Name char(12) informat=$12. label = "Last Name",
837  Bats char(1) informat=$1. label = "Bats L, R or Switch",
838  Throws char(1) informat=$1. label = "Throws L or R",
839  Position_Code char(3) informat=$3. label "Batter Position"
840  );
841 quit;
842 
843 proc sql;
844  create table TEMPLATE.PLAYERS_SCD3_FACTS
845  (
846  Player_ID num format=Z5. label = "Player ID",
847  Team_SK num label = "Current Team Surrogate Key",
848  First_Name char(12) informat=$12. label = "First Name",
849  Last_Name char(12) informat=$12. label = "Last Name",
850  First num label = "Games at First",
851  Second num label = "Games at Second",
852  Short num label = "Games at ShortStop",
853  Third num label = "Games at Third",
854  Left num label = "Games in Left",
855  Center num label = "Games in Center",
856  Right num label = "Games in Right",
857  Catcher num label = "Games at Catcher",
858  Pitcher num label = "Games at Pitcher",
859  Pinch_Hitter num label = "Games as a Pinch Hitter"
860  );
861  create table template.PLAYERS_POSITIONS_PLAYED LIKE TEMPLATE.PLAYERS_SCD3_FACTS;
862 quit;
863 
864 proc sql;
865  create table TEMPLATE.PLAYERS_SCD6
866  (
867  Player_ID num format=Z5. label = "Player ID",
868  Active num label = "Currently Active?",
869  SubKey num label = "Secondary Key",
870  Team_SK num label = "Team Surrogate Key",
871  First_Name char(12) informat=$12. label = "First Name",
872  Last_Name char(12) informat=$12. label = "Last Name",
873  Position_Code char(3) informat=$3. label "Batter Position",
874  Bats char(1) informat=$1. label = "Bats L, R or Switch",
875  Throws char(1) informat=$1. label = "Throws L or R",
876  Start_Date num format=YYMMDD10. label = "First Game Date",
877  End_Date num format=YYMMDD10. label = "Last Game Date"
878  );
879 quit;
880 /* "Runs.sas" from the SAS Press book
881  Data Management Solutions Using SAS Hash Table Operations:
882  A Business Intelligence Case Study
883 */
884 
885 proc sql;
886  create table TEMPLATE.RUNS
887  (
888  Game_SK char(16) format=$HEX32. label = "Game Surrogate Key",
889  Date num format=YYMMDD10. label = "Game Date",
890  Batter_ID num label = "Batter ID",
891  Inning num label = "Inning",
892  Top_Bot char(1) label = "Which Half Inning",
893  AB_Number num label = "At Bat Number in Game",
894  Runner_ID num label = "ID of Runner Who Scored"
895  );
896 quit;
897 /*******************************************************************
898  Datalines for SCHEMA_METADATA dataset
899  Generated by %mp_ds2cards()
900  Licensed under GNU, available on github.com/boemska/macrocore
901 ********************************************************************/
902 data template.SCHEMA_METADATA ;
903 attrib
904 hashTable length= $32 label="Member Name"
905 Column length= $32 label="Column Name"
906 Is_A_Key length= 8
907 ;
908 infile cards dsd delimiter=',';
909 input
910  hashTable :$char.
911  Column :$char.
912  Is_A_Key
913 ;
914 datalines4;
915 ATBATS,Game_SK,1
916 ATBATS,Batter_ID,.
917 ATBATS,Position_Code,.
918 ATBATS,Inning,.
919 ATBATS,Top_Bot,.
920 ATBATS,AB_Number,.
921 ATBATS,Result,.
922 ATBATS,Direction,.
923 ATBATS,Distance,.
924 ATBATS,Outs,.
925 ATBATS,Balls,.
926 ATBATS,Strikes,.
927 ATBATS,onFirst,.
928 ATBATS,onSecond,.
929 ATBATS,onThird,.
930 ATBATS,onBase,.
931 ATBATS,Left_On_Base,.
932 ATBATS,Runs,.
933 ATBATS,Is_An_AB,.
934 ATBATS,Is_An_Out,.
935 ATBATS,Is_A_Hit,.
936 ATBATS,Is_An_OnBase,.
937 ATBATS,Bases,.
938 ATBATS,Number_of_Pitches,.
939 CHAPTER10LOOKUPTABLES,hashTable,1
940 CHAPTER10LOOKUPTABLES,Column,.
941 CHAPTER10LOOKUPTABLES,Is_A_Key,.
942 CHAPTER10LOOKUPTABLES,datasetTag,.
943 CHAPTER10SPLITS,hashTable,1
944 CHAPTER10SPLITS,Column,.
945 CHAPTER10SPLITS,is_A_Key,.
946 CHAPTER9PARMFILE,hashTable,1
947 CHAPTER9PARMFILE,Column,.
948 CHAPTER9PARMFILE,is_A_Key,.
949 GAMES,Game_SK,1
950 GAMES,Date,.
951 GAMES,Time,.
952 GAMES,Year,.
953 GAMES,Month,.
954 GAMES,DayOfWeek,.
955 GAMES,League,.
956 GAMES,Home_SK,.
957 GAMES,Away_SK,.
958 LINEUPS,Game_SK,1
959 LINEUPS,Team_SK,.
960 LINEUPS,Batting_Order,.
961 LINEUPS,Player_ID,.
962 LINEUPS,Position_Code,.
963 LINEUPS,Bats,.
964 LINEUPS,Throws,.
965 PITCHES,Game_SK,1
966 PITCHES,Pitcher_ID,.
967 PITCHES,Pitcher_First_Name,.
968 PITCHES,Pitcher_Last_Name,.
969 PITCHES,Pitcher_Type,.
970 PITCHES,Inning,.
971 PITCHES,Top_Bot,.
972 PITCHES,Result,.
973 PITCHES,AB_Number,.
974 PITCHES,Outs,.
975 PITCHES,Balls,.
976 PITCHES,Strikes,.
977 PITCHES,Pitch_Number,.
978 PITCHES,Is_A_Ball,.
979 PITCHES,Is_A_Strike,.
980 PITCHES,onBase,.
981 PLAYERS,Player_ID,1
982 PLAYERS,Team_SK,.
983 PLAYERS,First_Name,.
984 PLAYERS,Last_Name,.
985 PLAYERS,Bats,.
986 PLAYERS,Throws,.
987 PLAYERS,Start_Date,.
988 PLAYERS,End_Date,.
989 PLAYERS_POSITIONS_PLAYED,Player_ID,1
990 PLAYERS_POSITIONS_PLAYED,First,.
991 PLAYERS_POSITIONS_PLAYED,Second,.
992 PLAYERS_POSITIONS_PLAYED,Short,.
993 PLAYERS_POSITIONS_PLAYED,Third,.
994 PLAYERS_POSITIONS_PLAYED,Left,.
995 PLAYERS_POSITIONS_PLAYED,Center,.
996 PLAYERS_POSITIONS_PLAYED,Right,.
997 PLAYERS_POSITIONS_PLAYED,Catcher,.
998 PLAYERS_POSITIONS_PLAYED,Pitcher,.
999 PLAYERS_POSITIONS_PLAYED,Pinch_Hitter,.
1000 RUNS,Game_SK,1
1001 RUNS,Inning,.
1002 RUNS,Top_Bot,.
1003 RUNS,AB_Number,.
1004 RUNS,Runner_ID,.
1005 ;;;;
1006 run;
1007 /* "Chapter 5 GenerateTeams.sas" from the SAS Press book
1008  Data Management Solutions Using SAS Hash Table Operations:
1009  A Business Intelligence Case Study
1010 */
1011 
1012 data bizarro.teams;
1013  /* Select team names from 100 most popular team names.
1014  Source: http://mascotdb.com/lists.php?id=5
1015  */
1016  keep League_SK Team_SK Team_Name;
1017  keep League; /* fix for rename issue found post-publication */
1018  label League_SK = "League Surrogate Key"
1019  Team_SK = "Team Surrogate Key"
1020  Team_Name = "Team Name"
1021  ;
1022  retain League_SK . Team_SK 100;
1023  if _n_ = 1 then
1024  do; /* create hash table */
1025  declare hash teams();
1026  rc = teams.defineKey("Team_Name");
1027  rc = teams.defineData("Team_SK","Team_Name");
1028  rc = teams.defineDone();
1029  end; /* create hash table */
1030  infile datalines eof=lr;
1031  input Team_Name $16.;
1032  Team_SK + ceil(uniform(&seed1)*4);
1033  rc = teams.add();
1034  return;
1035  lr:
1036  declare hiter teamIter("teams");
1037  do i = 1 to 2*&nTeamsPerLeague;
1038  rc = teamIter.next();
1039  League_SK = int((i-1)/&nTeamsPerLeague) + 1;
1040  League = League_SK; /* fix for rename issue found post-publication */
1041  output;
1042  end;
1043 datalines;
1044 Eagles
1045 Tigers
1046 Bulldogs
1047 Panthers
1048 Wildcats
1049 Warriors
1050 Lions
1051 Indians
1052 Cougars
1053 Knights
1054 Mustangs
1055 Falcons
1056 Trojans
1057 Cardinals
1058 Vikings
1059 Pirates
1060 Raiders
1061 Rams
1062 Spartans
1063 Bears
1064 Hornets
1065 Patriots
1066 Hawks
1067 Crusaders
1068 Rebels
1069 Bobcats
1070 Saints
1071 Braves
1072 Blue Devils
1073 Titans
1074 Wolverines
1075 Jaguars
1076 Wolves
1077 Dragons
1078 Pioneers
1079 Chargers
1080 Rockets
1081 Huskies
1082 Red Devils
1083 Yellowjackets
1084 Chiefs
1085 Stars
1086 Comets
1087 Colts
1088 Lancers
1089 Rangers
1090 Broncos
1091 Giants
1092 Senators
1093 Bearcats
1094 Thunder
1095 Royals
1096 Storm
1097 Cowboys
1098 Cubs
1099 Cavaliers
1100 Golden Eagles
1101 Generals
1102 Owls
1103 Buccaneers
1104 Hurricanes
1105 Bruins
1106 Grizzlies
1107 Gators
1108 Bombers
1109 Red Raiders
1110 Flyers
1111 Lakers
1112 Miners
1113 Redskins
1114 Coyotes
1115 Longhorns
1116 Greyhounds
1117 Beavers
1118 Yellow Jackets
1119 Outlaws
1120 Reds
1121 Highlanders
1122 Sharks
1123 Oilers
1124 Jets
1125 Dodgers
1126 Mountaineers
1127 Red Sox
1128 Thunderbirds
1129 Blazers
1130 Clippers
1131 Aces
1132 Buffaloes
1133 Lightning
1134 Bluejays
1135 Gladiators
1136 Mavericks
1137 Monarchs
1138 Tornadoes
1139 Blues
1140 Cobras
1141 Bulls
1142 Express
1143 Stallions
1144 ;
1145 data bizarro.leagues;
1146  label League_SK = "League Surrogate Key"
1147  League = "League"
1148  ;
1149  League_SK = 1;
1150  League = 'Eastern';
1151  output;
1152  League_SK = 2;
1153  League = 'Western';
1154  output;
1155 run;
1156 /* "Chapter 5 GeneratePositionsDimensionTable.sas" from the SAS Press book
1157  Data Management Solutions Using SAS Hash Table Operations:
1158  A Business Intelligence Case Study
1159 */
1160 
1161 data _null_;
1162  infile datalines eof=readall;
1163  /* Hash Object as an in memory table */
1164  if _n_ = 1 then
1165  do; /* define just once */
1166  declare hash positions(ordered:"a");
1167  positions.defineKey("Position_Grp_SK");
1168  positions.defineData("Position_Grp_SK","Position_Code","Position","Count","Starters");
1169  positions.defineDone();
1170  end; /* define just once */
1171  informat Position_Code $3. Position $17. Count Starters 8.;
1172  label Position_Grp_SK = "Position Group Surrogate Key"
1173  Position_Code = "Position Code"
1174  Position = "Position Description"
1175  Count = "Number of Players"
1176  Starters = "Number of Starters"
1177  ;
1178  input Position_Code Position & Count Starters;
1179  Position_Grp_SK + 1;
1180  positions.add(); /* could also use positions.add() or positions.ref() */
1181  return;
1182  readall:
1183  /* output a sorted version of our table */
1184  positions.output(dataset:"Bizarro.Positions");
1185  return;
1186  datalines;
1187 SP Starting Pitcher 4 1
1188 RP Relief Pitcher 6 0
1189 C Catcher 2 1
1190 CIF Corner Infielder 3 2
1191 MIF Middle Infielder 3 2
1192 COF Corner Outfielder 3 2
1193 CF Center Fielder 2 1
1194 UT Utility 2 0
1195 ;
1196 data _null_;
1197  infile datalines eof=readall;
1198  /* Hash Object as an in memory table */
1199  if _n_ = 1 then
1200  do; /* define just once */
1201  declare hash positions(ordered:"a");
1202  positions.defineKey("Position_SK");
1203  positions.defineData("Position_SK","Position_Grp_FK","Position_Code","Position");
1204  positions.defineDone();
1205  end; /* define just once */
1206  informat Position_Grp_FK 8. Position_Code $3. Position $17.;
1207  label Position_SK = "Position Surrogate Key"
1208  Position_Grp_FK = "Position Group Surrogate Key"
1209  Position_Code = "Position Code"
1210  Position = "Position Description"
1211 
1212  ;
1213  input Position_Grp_FK Position_Code Position &;
1214  Position_SK + 1;
1215  positions.add(); /* could also use positions.add() or positions.ref() */
1216  return;
1217  readall:
1218  /* output a sorted version of our table */
1219  positions.output(dataset:"Bizarro.Positions_Snowflake");
1220  return;
1221  datalines;
1222 4 1B First Baseman
1223 4 3B Third Baseman
1224 5 2B Second Baseman
1225 5 SS Shortstop
1226 6 LF Left Fielder
1227 6 RF Right Fielder
1228 ;
1229 /* "Chapter 5 GeneratePlayerCandidates.sas" from the SAS Press book
1230  Data Management Solutions Using SAS Hash Table Operations:
1231  A Business Intelligence Case Study
1232 */
1233 
1234 data first_names;
1235  /* SRC: https://www.ssa.gov/oact/babynames/decades/century.html */
1236  infile datalines;
1237  informat First_Name $12.;
1238  input First_Name $;
1239  First_Name = propcase(First_Name);
1240  n + 1;
1241  datalines;
1242 James
1243 John
1244 Robert
1245 Michael
1246 William
1247 David
1248 Richard
1249 Joseph
1250 Thomas
1251 Charles
1252 Christopher
1253 Daniel
1254 Matthew
1255 Anthony
1256 Donald
1257 Mark
1258 Paul
1259 Steven
1260 George
1261 Kenneth
1262 Andrew
1263 Joshua
1264 Edward
1265 Brian
1266 Kevin
1267 Ronald
1268 Timothy
1269 Jason
1270 Jeffrey
1271 Ryan
1272 Gary
1273 Jacoby
1274 Nicholas
1275 Eric
1276 Stephen
1277 Jonathan
1278 Larry
1279 Scott
1280 Frank
1281 Justin
1282 Brandon
1283 Raymond
1284 Gregory
1285 Samuel
1286 Benjamin
1287 Patrick
1288 Jack
1289 Alexander
1290 Dennis
1291 Jerry
1292 Tyler
1293 Aaron
1294 Henry
1295 Douglas
1296 Peter
1297 Jose
1298 Adam
1299 Zachary
1300 Walter
1301 Nathan
1302 Harold
1303 Kyle
1304 Carl
1305 Arthur
1306 Gerald
1307 Roger
1308 Keith
1309 Jeremy
1310 Lawrence
1311 Terry
1312 Sean
1313 Albert
1314 Joe
1315 Christian
1316 Austin
1317 Willie
1318 Jesse
1319 Ethan
1320 Billy
1321 Bruce
1322 Bryan
1323 Ralph
1324 Roy
1325 Jordan
1326 Eugene
1327 Wayne
1328 Louis
1329 Dylan
1330 Alan
1331 Juan
1332 Noah
1333 Russell
1334 Harry
1335 Randy
1336 Philip
1337 Vincent
1338 Gabriel
1339 Bobby
1340 Johnny
1341 Howard
1342 ;
1343 data last_names;
1344  /* SRC: http://names.mongabay.com/most_common_surnames.htm */
1345  infile datalines;
1346  informat Last_Name $12.;
1347  input Last_Name $;
1348  Last_Name = propcase(Last_Name);
1349  n + 1;
1350 datalines;
1351 SMITH
1352 JOHNSON
1353 WILLIAMS
1354 JONES
1355 BROWN
1356 DAVIS
1357 MILLER
1358 WILSON
1359 MOORE
1360 TAYLOR
1361 ANDERSON
1362 THOMAS
1363 JACKSON
1364 WHITE
1365 HARRIS
1366 MARTIN
1367 THOMPSON
1368 GARCIA
1369 MARTINEZ
1370 ROBINSON
1371 CLARK
1372 RODRIGUEZ
1373 LEWIS
1374 LEE
1375 WALKER
1376 HALL
1377 ALLEN
1378 YOUNG
1379 HERNANDEZ
1380 KING
1381 WRIGHT
1382 LOPEZ
1383 HILL
1384 SCOTT
1385 GREEN
1386 ADAMS
1387 BAKER
1388 GONZALEZ
1389 NELSON
1390 CARTER
1391 MITCHELL
1392 PEREZ
1393 ROBERTS
1394 TURNER
1395 PHILLIPS
1396 CAMPBELL
1397 PARKER
1398 EVANS
1399 EDWARDS
1400 COLLINS
1401 STEWART
1402 SANCHEZ
1403 MORRIS
1404 ROGERS
1405 REED
1406 COOK
1407 MORGAN
1408 BELL
1409 MURPHY
1410 BAILEY
1411 RIVERA
1412 COOPER
1413 RICHARDSON
1414 COX
1415 HOWARD
1416 WARD
1417 TORRES
1418 PETERSON
1419 GRAY
1420 RAMIREZ
1421 JAMES
1422 WATSON
1423 BROOKS
1424 KELLY
1425 SANDERS
1426 PRICE
1427 BENNETT
1428 WOOD
1429 BARNES
1430 ROSS
1431 HENDERSON
1432 COLEMAN
1433 JENKINS
1434 PERRY
1435 POWELL
1436 LONG
1437 PATTERSON
1438 HUGHES
1439 FLORES
1440 WASHINGTON
1441 BUTLER
1442 SIMMONS
1443 FOSTER
1444 GONZALES
1445 BRYANT
1446 ALEXANDER
1447 RUSSELL
1448 GRIFFIN
1449 DIAZ
1450 HAYES
1451 ;
1452 data _null_;
1453  if 0 then set template.player_candidates;
1454  retain Player_ID 10000 Team_SK 0;
1455  declare hash positionsDist();
1456  rc = positionsDist.defineKey("Index");
1457  rc = positionsDist.defineData("Index","Position_Code","Count");
1458  rc = positionsDist.defineDone();
1459  lr = 0;
1460  Index = 0;
1461  do until(lr);
1462  set bizarro.positions end=lr;
1463  do i = 1 to Count;
1464  Index + 1;
1465  rc = positionsDist.add();
1466  end;
1467  end;
1468  rc = positionsDist.output(dataset:"positions");
1469 
1470  declare hash fname(dataset: "first_names");
1471  rc = fname.defineKey("First_Name");
1472  rc = fname.defineData("First_Name");
1473  rc = fname.defineDone();
1474  declare hiter first_iter("fname");
1475 
1476  declare hash lname(dataset: "last_names");
1477  rc = lname.defineKey("Last_Name");
1478  rc = lname.defineData("Last_Name");
1479  rc = lname.defineDone();
1480  declare hiter last_iter("lname");
1481 
1482  declare hash players();
1483  rc = players.defineKey("Arbtrary","First_Name","Last_Name");
1484  rc = players.defineData("Player_ID","Team_SK","First_Name","Last_Name"
1485  ,"Position_Code","Bats","Throws");
1486  rc = players.defineDone();
1487 
1488  Arbtrary = 0;
1489  do frc = first_iter.first() by 0 while(frc = 0);
1490  do lrc = last_iter.first() by 0 while(lrc = 0);
1491  Arbitrary + 1;
1492  positionsDist.find(Key:ceil(uniform(&seed2)*&nPlayersPerTeam));
1493  Player_ID + ceil(uniform(&seed3)*9);
1494  random = uniform(&seed10);
1495  if random le .1 then Bats = "S";
1496  else if random le .35 then Bats = "L";
1497  else Bats = "R";
1498  if uniform(&seed11) le .3 then Throws = "L";
1499  else Throws = "R";
1500  players.add();
1501  lrc = last_iter.next();
1502  end;
1503  frc = first_iter.next();
1504  end;
1505  players.output(dataset:"bizarro.player_candidates");
1506 
1507 run;/* "Chapter 5 AssignPlayersToTeams.sas" from the SAS Press book
1508  Data Management Solutions Using SAS Hash Table Operations:
1509  A Business Intelligence Case Study
1510 */
1511 
1512 data positions;
1513  set bizarro.positions;
1514  retain DummyKey 1;
1515  drop position;
1516 run;
1517 
1518 data _null_;
1519 
1520  if 0 then set template.player_candidates;
1521  retain Player_ID 10000 Start_Date "01MAR2017"d End_Date &SCD_End_Date;
1522  format Player_ID z5. Start_Date End_Date mmddyy10.;
1523  informat Start_Date End_Date yymmdd10.;
1524 
1525  /* load the available players */
1526  declare hash available(dataset:"bizarro.player_candidates",multidata:"yes");
1527  rc = available.defineKey("Position_Code");
1528  rc = available.defineData("Player_ID","Team_SK","First_Name","Last_Name"
1529  ,"Position_Code","Bats","Throws");
1530  rc = available.defineDone();
1531 
1532  /* load the hash table of positions */
1533  declare hash positions(dataset:"positions",multidata:"yes");
1534  rc = positions.defineKey("DummyKey");
1535  rc = positions.defineData("Position_Code","Count");
1536  rc = positions.defineDone();
1537 
1538  /* load the list of teams */
1539  declare hash teams(dataset:"Bizarro.teams");
1540  rc = teams.defineKey("Team_SK");
1541  rc = teams.defineDone();
1542  declare hiter teams_iter("teams");
1543 
1544  DummyKey = 1;
1545  pos_rc = positions.find();
1546  avail_rc = available.find();
1547  do until(pos_rc);
1548  teams_rc = teams_iter.first();
1549  do until(teams_rc);
1550  Team = Team_SK;
1551  do i = 1 to Count;
1552  Team_SK = Team;
1553  available.replaceDup();
1554  avail_rc = available.find_next();
1555  end;
1556  teams_rc = teams_iter.next();
1557  end;
1558  pos_rc = positions.find_next();
1559  avail_rc = available.find();
1560  end;
1561  rc = available.output(dataset:"bizarro.player_candidates");
1562 run;
1563 
1564 data bizarro.trades;
1565  format trade_date yymmdd10.;
1566  input trade_date yymmdd10. traded_id _position_code $3. _team_sk traded_to;
1567 datalines;
1568 2017/06/23 10090 SP 269 115
1569 2017/06/23 10753 SP 115 269
1570 2017/07/26 10103 COF 171 228
1571 2017/07/26 10760 COF 228 171
1572 2017/08/30 10145 CF 193 130
1573 2017/08/30 10732 CF 130 193
1574 ;
1575 
1576 /* "Chapter 5 GenerateSchedule.sas" from the SAS Press book
1577  Data Management Solutions Using SAS Hash Table Operations:
1578  A Business Intelligence Case Study
1579 */
1580 
1581 data _null_;
1582  retain Team_SK Team1_SK Team2_SK .; /* define hash data items */
1583  format Date yymmdd10.;
1584 
1585  declare hash team1(dataset:"bizarro.teams(rename=(Team_SK=Team1_SK))",multidata:"y");
1586  rc = team1.defineKey("League_SK");
1587  rc = team1.defineData("League_SK","Team1_SK");
1588  rc = team1.defineDone();
1589 
1590  declare hash team2(dataset:"bizarro.teams(rename=(Team_SK=Team2_SK))",multidata:"y");
1591  rc = team2.defineKey("League_SK");
1592  rc = team2.defineData("League_SK","Team2_SK");
1593  rc = team2.defineDone();
1594 
1595  declare hash matchUps(multidata:"y");
1596  rc = matchUps.defineKey("League_SK");
1597  rc = matchUps.defineData("League_SK","Team1_SK","Team2_SK");
1598  rc = matchUps.defineDone();
1599 
1600  declare hash used();
1601  rc = used.defineKey("League_SK","Team_SK");
1602  rc = used.defineData("League_SK","Team_SK");
1603  rc = used.defineDone();
1604 
1605  declare hash schedule();
1606  rc = schedule.defineKey("League_SK","Team1_SK","Team2_SK");
1607  rc = schedule.defineData("League_SK","Team1_SK","Team2_SK","Date","Home");
1608  rc = schedule.defineDone();
1609 
1610  do League_SK = 1 to 2; /* loop thru Leagues */
1611  Team1_rc = team1.find();
1612  do while(Team1_rc=0); /* loop thru Team1 Teams */
1613  Team2_rc = team2.find();
1614  do while(Team2_rc=0); /* loop thru Team2 Teams for each Team1 team */
1615  if Team2_SK ne Team1_SK then
1616  do;
1617  addrc = matchUps.add();
1618  end;
1619  Team2_rc = team2.find_next();
1620  end; /* loop thru Team2 Teams for each Team1 team */
1621  Team1_rc = team1.find_next();
1622  end; /* loop thru Team1 Teams */
1623  end; /* loop thru Leagues */
1624 
1625  do League_SK = 1 to 2; /* loop thru Leagues */
1626  Date = "&seasonStartDate"d - 3;
1627  do Combo = 1 to &nWeeksSeason; /* create the matchup sets */
1628  if mod(combo,2) = 1 then Date + 3;
1629  else Date + 4;
1630  games = 0;
1631  matchUps_rc = matchUps.find();
1632  do while(matchUps_rc = 0 and games lt &nTeamsPerLeague/2);
1633  if used.check(key:League_SK,key:Team1_SK) ne 0
1634  and used.check(key:League_SK,key:Team2_SK) ne 0
1635  then
1636  do; /* combinations not yet used */
1637  Home = ceil(uniform(&seed4)*2);
1638  if schedule.add() = 0 then
1639  do; /* add to schedule */
1640  games + 1;
1641  rc = used.add(Key:League_SK,Key:Team1_SK,Data:League_SK,Data:Team1_SK);
1642  rc = used.add(Key:League_SK,Key:Team2_SK,Data:League_SK,Data:Team1_SK);
1643  end; /* add to schedule */
1644  end; /* combinations not yet used */
1645  matchUps_rc = matchUps.find_next();
1646  end;
1647  used.clear(); /* loop thru matchups */
1648  end;/* loop thru Leagues */
1649  end;
1650  schedule.output(dataset:"Games");
1651  stop;
1652 run;
1653 
1654 proc sort data = games out = games;
1655  by League_SK Date;
1656 run;
1657 
1658 data bizarro.games;
1659  if 0 then set template.games;
1660  drop Team1_SK Team2_SK Home D;
1661  format Time Timeampm8.;
1662  set games;
1663  League = League_SK; /* fix for rename issue found post-publication */
1664  D = Date;
1665  retain D;
1666  array _homeaway team1_sk team2_sk;
1667  Home_SK = _homeaway(Home);
1668  Away_SK = _homeaway(3-Home);
1669  do Date = D to D+2;
1670  Year = Year(Date);
1671  Month = Month(Date);
1672  DayOfWeek = weekday(Date);
1673  if DayOfWeek = 5 then Time = "16:00"t;
1674  else if DayOfWeek = 1 then Time = "13:00"t;
1675  else time = "19:00"t;
1676  Game_SK = md5(catx(":",League_SK,Away_SK,Home_SK,Date,Time));
1677  output;
1678  end;
1679  /* Reverse home/away for second half of the season */
1680  D + 7*&nWeeksSeason/2;
1681  Home_SK = _homeaway(3-Home);
1682  Away_SK = _homeaway(Home);
1683  do Date = D to D+2;
1684  Year = Year(Date);
1685  Month = Month(Date);
1686  DayOfWeek = weekday(Date);
1687  Game_SK = md5(catx(":",League_SK,Away_SK,Home_SK,Date,Time));
1688  output;
1689  end;
1690 run;
1691 /* "Chapter 5 GeneratePitchDistribution.sas" from the SAS Press book
1692  Data Management Solutions Using SAS Hash Table Operations:
1693  A Business Intelligence Case Study
1694 */
1695 
1696 data bizarro.pitch_distribution;
1697  Pitch_Distribution_SK = _n_;
1698  input Result $16. AB_Done Is_An_AB Is_An_Out Is_A_Hit Is_An_OnBase Bases Runners_Advance_Factor From To;
1699  label Pitch_Distribution_SK = "Pitch Distribution Surrogate Key"
1700  Result = "Result of the Pitch"
1701  AB_Done = "Final Pitch of the AB"
1702  Is_An_AB = "Counts as an AB"
1703  Is_An_Out = "Is an Out"
1704  Is_A_Hit = "Is a Hit"
1705  Is_An_OnBase = "Counts as an On Base"
1706  Bases = "Number of Bases for the Hit"
1707  Runners_Advance_Factor = "Proabability of an Extra Base"
1708  From = "Lower Range of Distribution"
1709  to = "Upper Range of Distribution"
1710  ;
1711  datalines;
1712 Ball 0 . . . . . . 1 33
1713 Called Strike 0 . . . . . . 34 48
1714 Double 1 1 0 1 1 2 .7 49 51
1715 Error 1 1 0 0 0 1 .4 52 52
1716 Foul 0 . . . . . . 53 68
1717 Hit By Pitch 1 0 0 0 1 1 0 69 69
1718 Home Run 1 1 0 1 1 4 0 70 71
1719 Out 1 1 1 0 0 0 0 72 83
1720 Single 1 1 0 1 1 1 .5 84 88
1721 Swinging Strike 0 . . . . . . 89 99
1722 Triple 1 1 0 1 1 3 0 100 100
1723 run;
1724 
1725 data bizarro.hit_distance;
1726  Hit_Distance_SK = _n_;
1727  input Pitch_Distribution_FK MinDistance MaxDistance;
1728  label Hit_Distance_SK = "Hit Distance Surrogate Key"
1729  Pitch_Distribution_FK = "Pitch Distribution Foreign Key"
1730  MinDistance = "Hit Distance Minimum"
1731  MaxDistance = "Hit Distance Maximum"
1732  ;
1733  datalines;
1734  3 200 300
1735  4 50 300
1736  7 390 480
1737  8 3 385
1738  9 10 100
1739 11 310 390
1740 run;%generateLineUps(from=&seasonStartDate,nWeeks=&nWeeksSeason)
1741 %generatePitchAndPAData(from=&seasonStartDate,nWeeks=&nWeeksSeason)
1742 /* "Chapter 7 Create Star Schema DW.sas" from the SAS Press book
1743  Data Management Solutions Using SAS Hash Table Operations:
1744  A Business Intelligence Case Study
1745 */
1746 
1747 proc datasets lib = dw nolist;
1748  options obs = 0;
1749  copy in=bizarro out=dw;
1750  select AtBats
1751  Pitches
1752  Runs
1753  Games;
1754  copy in=template out=dw;
1755  select Players_Positions_Played Players;
1756 run;
1757  options obs = max;
1758  copy in=bizarro out=dw;
1759  select Leagues
1760  Teams;
1761 quit;
1762 
1763 /* The following step added post-publication to address the issue with
1764  an earlier rename of League to League_SK.
1765 */
1766 proc sql;
1767  alter table dw.teams
1768  drop league;
1769 quit;
1770 /* "Chapter 7 SCD 0.sas" from the SAS Press book
1771  Data Management Solutions Using SAS Hash Table Operations:
1772  A Business Intelligence Case Study
1773 */
1774 
1775 data _null_;
1776  if _n_ = 1 then
1777  do; /* define the hash table */
1778  dcl hash scd(dataset:
1779  ifc(exist("bizarro.Players_SCD0")
1780  ,"bizarro.Players_SCD0"
1781  ,"template.Players_SCD0"
1782  )
1783  ,ordered:"A");
1784  scd.defineKey("Player_ID");
1785  scd.defineData("Team_SK","Player_ID","First_Name"
1786  ,"Last_Name","Position_Code");
1787  scd.defineDone();
1788  end; /* define the hash table */
1789  set bizarro.AtBats(rename=(Batter_ID=Player_ID))
1790  end=lr;
1791  RC = scd.add();
1792  if lr;
1793  scd.output(dataset:"Bizarro.Players_SCD0");
1794  stop;
1795  set template.Players_SCD0;
1796 run;
1797 
1798 data tableLookup;
1799  /* sample lookup code */
1800  if 0 then set bizarro.Players_SCD0;
1801  dcl hash scd(dataset:"bizarro.Players_SCD0");
1802  scd.defineKey("Player_ID");
1803  scd.defineData("Team_SK","Player_ID","First_Name"
1804  ,"Last_Name","Position_Code");
1805  scd.defineDone();
1806 
1807  /* first a key not yet in the table */
1808  call missing(Team_SK,First_Name,Last_Name
1809  ,Position_Code);
1810  Player_Id = 00001;
1811  RC = scd.find();
1812  output;
1813 
1814  /* now a key already in the table */
1815  call missing(Team_SK,First_Name,Last_Name
1816  ,Position_Code);
1817  Player_Id = 10103;
1818  RC = scd.find();
1819  output;
1820  stop;
1821 run;/* "Chapter 7 SCD 1.sas" from the SAS Press book
1822  Data Management Solutions Using SAS Hash Table Operations:
1823  A Business Intelligence Case Study
1824 */
1825 
1826 data _null_;
1827  if _n_ = 1 then
1828  do; /* define the hash table */
1829  dcl hash scd(dataset:
1830  ifc(exist("bizarro.Players_SCD1")
1831  ,"bizarro.Players_SCD1"
1832  ,"template.Players_SCD1"
1833  )
1834  ,ordered:"A");
1835  scd.defineKey("Player_ID");
1836  scd.defineData("Team_SK","Player_ID","First_Name"
1837  ,"Last_Name","Position_Code");
1838  scd.defineDone();
1839  end; /* define the hash table */
1840  set bizarro.atbats(rename=(Batter_ID=Player_ID))
1841  end=lr;
1842  rc = scd.replace();
1843  if lr;
1844  scd.output(dataset:"Bizarro.Players_SCD1");
1845  stop;
1846  set template.players_scd1;
1847 run;
1848 
1849 data tableLookUp;
1850  /* sample lookup code */
1851  if 0 then set bizarro.players_SCD1;
1852  dcl hash scd(dataset:"bizarro.players_SCD1");
1853  scd.defineKey("Player_ID");
1854  scd.defineData("Team_SK","Player_ID","First_Name"
1855  ,"Last_Name","Position_Code");
1856  scd.defineDone();
1857 
1858  /* first a key with no data items */
1859  call missing(Team_SK,First_Name,Last_Name
1860  ,Position_Code);
1861  Player_Id = 00001;
1862  RC = scd.find();
1863  output;
1864  /* now a key with a row of data items */
1865  call missing(Team_SK,First_Name,Last_Name
1866  ,Position_Code);
1867  Player_Id = 10103;
1868  RC = scd.find();
1869  output;
1870  stop;
1871 run;/* "Chapter 7 SCD 2.sas" from the SAS Press book
1872  Data Management Solutions Using SAS Hash Table Operations:
1873  A Business Intelligence Case Study
1874 */
1875 
1876 data _null_;
1877  if 0 then set template.Players_SCD2;
1878  if _n_ = 1 then
1879  do; /* define the hash table */
1880  dcl hash scd(dataset:
1881  ifc(exist("bizarro.Players_SCD2")
1882  ,"bizarro.Players_SCD2"
1883  ,"template.Players_SCD2"
1884  )
1885  ,ordered:"A",multidata:"Y");
1886  scd.defineKey("Player_ID");
1887  scd.defineData("Player_ID","Team_SK"
1888  ,"First_Name","Last_Name"
1889  ,"Position_Code","Bats","Throws"
1890  ,"Start_Date","End_Date");
1891  scd.defineDone();
1892  end; /* define the hash table */
1893 
1894  set bizarro.atbats
1895  (rename = (Batter_ID = Player_ID
1896  Team_SK = _Team_SK
1897  First_Name = _First_Name
1898  Last_Name = _Last_Name
1899  Position_Code = _Position_Code
1900  Bats = _Bats
1901  Throws = _Throws)
1902  ) end=lr;
1903 
1904 
1905  if scd.check() ne 0 then
1906  do; /* need to add the player */
1907  scd.add(key: Player_ID
1908  ,data: Player_ID
1909  ,data: _Team_SK
1910  ,data: _First_Name
1911  ,data: _Last_Name
1912  ,data: _Position_Code
1913  ,data: _Bats
1914  ,data: _Throws
1915  ,data: Date
1916  ,data: &SCD_End_Date
1917  );
1918  end; /* need to add the player */
1919  else
1920  do; /* check to see if there are changes */
1921 
1922  RC = scd.find();
1923  do while(RC = 0);
1924  if (Start_Date le Date le End_Date) then leave;
1925  RC = scd.find_next();
1926  end;
1927 
1928  if catx(":", Team_SK, First_Name, Last_Name
1929  , Position_Code, Bats, Throws) ne
1930  catx(":",_Team_SK,_First_Name,_Last_Name
1931  ,_Position_Code,_Bats,_Throws) then
1932  do; /* date out prior record and add new one */;
1933  if RC = 0 then scd.replaceDup(data: Player_ID
1934  ,data: Team_SK
1935  ,data: First_Name
1936  ,data: Last_Name
1937  ,data: Position_Code
1938  ,data: Bats
1939  ,data: Throws
1940  ,data: Start_Date
1941  ,data: Date-1
1942  );
1943  scd.add(key: Player_ID
1944  ,data: Player_ID
1945  ,data: _Team_SK
1946  ,data: _First_Name
1947  ,data: _Last_Name
1948  ,data: _Position_Code
1949  ,data: _Bats
1950  ,data: _Throws
1951  ,data: Date
1952  ,data: &SCD_End_Date
1953  );
1954  end; /* date out prior record and add new one */;
1955  end; /* check to see if there are changes */
1956  if lr;
1957  scd.output(dataset:"bizarro.Players_SCD2");
1958  stop;
1959 run;
1960 
1961 data tableLookup;
1962  /* Sample Lookup */
1963  if 0 then set bizarro.Players_SCD2;
1964  if _n_ = 1 then
1965  do;
1966  dcl hash scd(dataset:"bizarro.Players_SCD2"
1967  ,multidata:"Y");
1968  scd.defineKey("Player_ID");
1969  scd.defineData("Team_SK","Player_ID","First_Name"
1970  ,"Last_Name","Position_Code","Bats"
1971  ,"Throws","Start_Date","End_Date");
1972  scd.defineDone();
1973  end;
1974  infile datalines;
1975  attrib Date format = yymmdd10. informat = yymmdd10.;
1976  input Player_ID Date;
1977  RC = scd.find();
1978  do while(RC = 0);
1979  if (Start_Date le Date le End_Date) then leave;
1980  RC = scd.find_next();
1981  end;
1982  if RC ne 0 then call missing(Team_SK,First_Name
1983  ,Last_Name,Position_Code
1984  ,Bats,Throws
1985  ,Start_Date,End_Date);
1986 datalines;
1987 10103 2017/03/23
1988 10103 2017/07/26
1989 99999 2017/04/15
1990 10782 2017/03/22
1991 10782 2017/03/21
1992 run;/* "Chapter 7 SCD 3 w Facts.sas" from the SAS Press book
1993  Data Management Solutions Using SAS Hash Table Operations:
1994  A Business Intelligence Case Study
1995 */
1996 
1997 data _null_;
1998  if 0 then set template.Players_SCD3_Facts;
1999  if _n_ = 1 then
2000  do; /* define the hash table */
2001  dcl hash scd(dataset:
2002  ifc(exist("bizarro.Players_SCD3_Facts")
2003  ,"bizarro.Players_SCD3_Facts"
2004  ,"template.Players_SCD3_Facts"
2005  )
2006  ,ordered:"A");
2007  scd.defineKey("Player_ID");
2008  scd.defineData("Player_ID","Team_SK"
2009  ,"First_Name","Last_Name"
2010  ,"First","Second","Short","Third"
2011  ,"Left","Center","Right","Catcher"
2012  ,"Pitcher","Pinch_Hitter");
2013  scd.defineDone();
2014  dcl hash uniqueGames();
2015  uniqueGames.defineKey("Game_SK"
2016  ,"Player_ID"
2017  ,"Position_Code");
2018  uniqueGames.defineDone();
2019  end; /* define the hash table */
2020  set bizarro.AtBats(rename = (Batter_ID = Player_ID))
2021  end=lr;
2022  if scd.find() ne 0 then
2023  call missing(First,Second,Short,Third
2024  ,Left,Center,Right,Catcher
2025  ,Pitcher,Pinch_Hitter);
2026  select(Position_Code);
2027  when("1B") First + (uniqueGames.add() = 0);
2028  when("2B") Second + (uniqueGames.add() = 0);
2029  when("SS") Short + (uniqueGames.add() = 0);
2030  when("3B") Third + (uniqueGames.add() = 0);
2031  when("LF") Left + (uniqueGames.add() = 0);
2032  when("CF") Center + (uniqueGames.add() = 0);
2033  when("RF") Right + (uniqueGames.add() = 0);
2034  when("C" ) Catcher + (uniqueGames.add() = 0);
2035  when("SP") Pitcher + (uniqueGames.add() = 0);
2036  when("RP") Pitcher + (uniqueGames.add() = 0);
2037  when("PH") Pinch_Hitter + (uniqueGames.add() = 0);
2038  otherwise;
2039  end;
2040  scd.replace();
2041  if lr;
2042  scd.output(dataset:"Bizarro.Players_SCD3_Facts");
2043 run;/* "Chapter 7 SCD 3.sas" from the SAS Press book
2044  Data Management Solutions Using SAS Hash Table Operations:
2045  A Business Intelligence Case Study
2046 */
2047 
2048 data _null_;
2049  if 0 then set template.Players_SCD3;
2050  if _n_ = 1 then
2051  do; /* define the hash table */
2052  dcl hash scd(dataset:
2053  ifc(exist("bizarro.Players_SCD3")
2054  ,"bizarro.Players_SCD3"
2055  ,"template.Players_SCD3"
2056  )
2057  ,ordered:"A",multidata:"Y");
2058  scd.defineKey("Player_ID");
2059  scd.defineData("Player_ID","Debut_Team_SK","Team_SK"
2060  ,"First_Name","Last_Name"
2061  ,"Position_Code","Bats","Throws");
2062  scd.defineDone();
2063  end; /* define the hash table */
2064  set bizarro.atbats(rename=(Batter_ID = Player_ID))
2065  end=lr;
2066  _Team_SK = Team_SK;
2067  if scd.find() then scd.add(Key:Player_ID
2068  ,Data:Player_ID
2069  ,Data:Team_SK
2070  ,Data:Team_SK
2071  ,Data:First_Name
2072  ,Data:Last_Name
2073  ,Data:Position_Code
2074  ,Data:Bats
2075  ,Data:Throws
2076  );
2077  else scd.replace(Key:Player_ID
2078  ,Data:Player_ID
2079  ,Data:Debut_Team_SK
2080  ,Data:_Team_SK
2081  ,Data:First_Name
2082  ,Data:Last_Name
2083  ,Data:Position_Code
2084  ,Data:Bats
2085  ,Data:Throws
2086  );
2087  if lr;
2088  scd.output(dataset:"bizarro.Players_SCD3");
2089 run;/* "Chapter 7 SCD 6.sas" from the SAS Press book
2090  Data Management Solutions Using SAS Hash Table Operations:
2091  A Business Intelligence Case Study
2092 */
2093 
2094 data _null_;
2095  if 0 then set template.Players_SCD6;
2096  if _n_ = 1 then
2097  do; /* define the hash table */
2098  dcl hash scd(dataset:
2099  ifc(exist("bizarro.Players_SCD6")
2100  ,"bizarro.Players_SCD6"
2101  ,"template.Players_SCD6"
2102  )
2103  ,ordered:"A",multidata:"Y");
2104  scd.defineKey("Player_ID");
2105  scd.defineData("Player_ID","Active","SubKey"
2106  ,"Team_SK","First_Name","Last_Name"
2107  ,"Position_Code","Bats","Throws"
2108  ,"Start_Date","End_Date");
2109  scd.defineDone();
2110  end; /* define the hash table */
2111  set bizarro.atbats
2112  (rename = (Batter_ID = Player_ID
2113  Team_SK = _Team_SK
2114  First_Name = _First_Name
2115  Last_Name = _Last_Name
2116  Position_Code = _Position_Code
2117  Bats = _Bats
2118  Throws = _Throws)
2119  ) end=lr;
2120  if scd.check(Key:Player_ID) ne 0 then
2121  do; /* player is new */
2122  scd.add(key: Player_ID
2123  ,data: Player_ID
2124  ,data: 1
2125  ,data: 1
2126  ,data: _Team_SK
2127  ,data: _First_Name
2128  ,data: _Last_Name
2129  ,data: _Position_Code
2130  ,data: _Bats
2131  ,data: _Throws
2132  ,data: Date
2133  ,data: &SCD_End_Date
2134  );
2135  end; /* player is new */
2136  else
2137  do; /* check to see if there are changes */
2138 
2139  RC = scd.find();
2140  do while(RC = 0);
2141  if (Start_Date le Date le End_Date) then leave;
2142  RC = scd.find_next();
2143  end;
2144  if RC ne 0 then
2145  call missing(Team_SK,First_Name,Last_Name
2146  ,Position_Code,Bats,Throws);
2147 
2148  if catx(":", Team_SK, First_Name, Last_Name
2149  , Position_Code, Bats, Throws) ne
2150  catx(":",_Team_SK,_First_Name,_Last_Name
2151  ,_Position_Code,_Bats,_Throws) then
2152  do; /* date out prior record and add new one */;
2153 
2154  if RC = 0 then /* date out active record */
2155  scd.replaceDup(data: Player_ID
2156  ,data: 0
2157  ,data: SubKey
2158  ,data: Team_SK
2159  ,data: First_Name
2160  ,data: Last_Name
2161  ,data: Position_Code
2162  ,data: Bats
2163  ,data: Throws
2164  ,data: Start_Date
2165  ,data: Date - 1
2166  );
2167 
2168  /* add row with the next autonumber value */
2169  _SubKey = 0;
2170  RC = scd.find();
2171  do while(RC = 0);
2172  RC = scd.find_next();
2173  _SubKey = max(_SubKey,SubKey);
2174  end;
2175  scd.add(key: Player_ID
2176  ,data: Player_ID
2177  ,data: 1
2178  ,data: _SubKey + 1
2179  ,data: _Team_SK
2180  ,data: _First_Name
2181  ,data: _Last_Name
2182  ,data: _Position_Code
2183  ,data: _Bats
2184  ,data: _Throws
2185  ,data: Date
2186  ,data: &SCD_End_Date
2187  );
2188  end; /* date out prior record and add new one */;
2189  end; /* check to see if there are changes */
2190  if lr;
2191  scd.output(dataset:"Bizarro.Players_SCD6"
2192  || "(index=(SCD6=(Player_ID Active SubKey)))");
2193 run;
2194 
2195 data tableLookup;
2196  /* Sample Lookup */
2197  retain Player_ID;
2198  if 0 then set bizarro.Players_SCD6(drop=Subkey);
2199  if _n_ = 1 then
2200  do; /* define the hash table */
2201  dcl hash scd(dataset:"bizarro.Players_SCD6"
2202  ,multidata:"Y",ordered:"D");
2203  scd.defineKey("Player_ID","Active");
2204  scd.defineData("Team_SK","Player_ID","Active"
2205  ,"First_Name","Last_Name"
2206  ,"Position_Code","Bats","Throws"
2207  ,"Start_Date","End_Date");
2208  scd.defineDone();
2209  end; /* define the hash table */
2210  infile datalines;
2211  attrib Date format = yymmdd10. informat = yymmdd10.;
2212  input Player_ID Date;
2213  RC = scd.find(Key:Player_ID,Key:1);
2214  if RC = 0 and (Start_Date le Date le End_Date)
2215  then;
2216  else
2217  do; /* search the inactive rows */
2218  RC = scd.find(Key:Player_ID,Key:0);
2219  do while(RC = 0);
2220  if (Start_Date le Date le End_Date) then leave;
2221  RC = scd.find_next();
2222  end;
2223  end; /* search the inactive rows */
2224  if RC ne 0 then
2225  call missing(Team_SK,Active,First_Name
2226  ,Last_Name,Position_Code,Bats
2227  ,Throws,Start_Date,End_Date);
2228 datalines;
2229 10103 2017/10/15
2230 10103 2017/03/23
2231 99999 2017/03/15
2232 10782 2017/03/22
2233 10782 2017/03/21
2234 run;/* "Chapter 7 Update Star Schema DW.sas" from the SAS Press book
2235  Data Management Solutions Using SAS Hash Table Operations:
2236  A Business Intelligence Case Study
2237 */
2238 
2239 data _null_;
2240  %createHash(hashTable=AtBats)
2241  %createHash(hashTable=Pitches)
2242  %createHash(hashTable=Runs)
2243  %createHash(hashTable=Games)
2244  %createHash(hashTable=Players_Positions_Played)
2245  %createHash(hashTable=Players)
2246 
2247  dcl hash uniqueGames();
2248  uniqueGames.defineKey("Game_SK"
2249  ,"Player_ID"
2250  ,"Position_Code");
2251  uniqueGames.defineDone();
2252 
2253  lr = 0;
2254  do until(lr);
2255  set bizarro.AtBats(rename = (Team_SK = _Team_SK
2256  First_Name = _First_Name
2257  Last_Name = _Last_Name
2258  Bats = _Bats
2259  Throws = _Throws)
2260  ) end=lr;
2261  if game_sk ne lag(game_sk) and _AtBats.check() = 0
2262  then _AtBats.remove();
2263  _AtBats.add();
2264  link Games_SCD;
2265  Player_ID = Batter_ID;
2266  link Positions_Played_SCD;
2267  link Players_SCD;
2268  end;
2269  _AtBats.output(dataset:"dw.AtBats");
2270 
2271  lr = 0;
2272  do until(lr);
2273  set bizarro.Pitches end=lr;
2274  if game_sk ne lag(game_sk) and _Pitches.check() = 0
2275  then _Pitches.remove();
2276  _Pitches.add();
2277  Player_ID = Pitcher_ID;
2278  _Team_SK = Team_SK;
2279  _First_Name = Pitcher_First_Name;
2280  _Last_Name = Pitcher_Last_Name;
2281  _Bats = Pitcher_Bats;
2282  _Throws = Pitcher_Throws;
2283  link Players_SCD;
2284  Position_Code = Pitcher_Type;
2285  link Positions_Played_SCD;
2286  end;
2287  _Pitches.output(dataset:"dw.Pitches");
2288 
2289  lr = 0;
2290  do until(lr);
2291  set bizarro.Runs end=lr;
2292  if game_sk ne lag(game_sk) and _Runs.check() = 0
2293  then _Runs.remove();
2294  _Runs.add();
2295  end;
2296  _Runs.output(dataset:"dw.Runs");
2297 
2298  /* output the updated dimension tables */
2299  _games.output(dataset:"dw.Games");
2300  _Players_Positions_Played.output
2301  (dataset:"dw.Players_Positions_Played");
2302  _Players.output(dataset:"dw.Players");
2303  stop;
2304 
2305  Games_SCD:
2306  Year = Year(Date);
2307  Month = Month(Date);
2308  DayOfWeek = weekday(Date);
2309  _games.replace();
2310  return;
2311 
2312  Positions_Played_SCD:
2313  if _Players_Positions_Played.find() ne 0
2314  then call missing(First,Second,Short,Third,Left
2315  ,Center,Right,Catcher,Pitcher);
2316  select(Position_Code);
2317  when("1B") First + (uniqueGames.add() = 0);
2318  when("2B") Second + (uniqueGames.add() = 0);
2319  when("SS") Short + (uniqueGames.add() = 0);
2320  when("3B") Third + (uniqueGames.add() = 0);
2321  when("LF") Left + (uniqueGames.add() = 0);
2322  when("CF") Center + (uniqueGames.add() = 0);
2323  when("RF") Right + (uniqueGames.add() = 0);
2324  when("C" ) Catcher + (uniqueGames.add() = 0);
2325  when("SP") Pitcher + (uniqueGames.add() = 0);
2326  when("RP") Pitcher + (uniqueGames.add() = 0);
2327  when("PH") Pinch_Hitter + (uniqueGames.add() = 0);
2328  otherwise;
2329  end;
2330  _Players_Positions_Played.replace();
2331  return;
2332 
2333  Players_SCD:
2334  if _Players.check() ne 0 then
2335  do; /* need to add the player */
2336  _Players.add(key: Player_ID
2337  ,data: Player_ID
2338  ,data: _Team_SK
2339  ,data: _First_Name
2340  ,data: _Last_Name
2341  ,data: _Bats
2342  ,data: _Throws
2343  ,data: Date
2344  ,data: &SCD_End_Date
2345  );
2346  end; /* need to add the player */
2347  else
2348  do; /* check to see if there are changes */
2349 
2350  RC = _Players.find();
2351  do while(RC = 0);
2352  if (Start_Date le Date le End_Date) then leave;
2353  RC = _Players.find_next();
2354  end;
2355 
2356  if catx(":", Team_SK, First_Name, Last_Name
2357  , Bats, Throws) ne
2358  catx(":",_Team_SK,_First_Name,_Last_Name
2359  ,_Bats,_Throws) then
2360  do; /* date out prior record and add new one */
2361  if RC = 0 then
2362  _Players.replaceDup(data: Player_ID
2363  ,data: Team_SK
2364  ,data: First_Name
2365  ,data: Last_Name
2366  ,data: Bats
2367  ,data: Throws
2368  ,data: Start_Date
2369  ,data: Date-1
2370  );
2371  _Players.add(key: Player_ID
2372  ,data: Player_ID
2373  ,data: _Team_SK
2374  ,data: _First_Name
2375  ,data: _Last_Name
2376  ,data: _Bats
2377  ,data: _Throws
2378  ,data: Date
2379  ,data: &SCD_End_Date
2380  );
2381  end; /* date out prior record and add new one */;
2382  end; /* check to see if there are changes */
2383  return;
2384 run;
2385 /* ///@endcond */