forked from Azure-Samples/Azure-MachineLearning-DataScience
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPlotInOutputFiles.sql
More file actions
59 lines (53 loc) · 2.67 KB
/
PlotInOutputFiles.sql
File metadata and controls
59 lines (53 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
USE [TaxiNYC_Sample]
GO
/****** Object: StoredProcedure [dbo].[PlotInOutputFiles] Script Date: 10/30/2015 6:17:41 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
IF EXISTS (SELECT * FROM sys.objects WHERE type = 'P' AND name = 'PlotInOutputFiles')
DROP PROCEDURE PlotInOutputFiles
GO
CREATE PROCEDURE [dbo].[PlotInOutputFiles]
AS
BEGIN
SET NOCOUNT ON;
DECLARE @query nvarchar(max) =
N'SELECT cast(tipped as int) as tipped, tip_amount, fare_amount FROM [dbo].[nyctaxi_joined_1_percent]'
EXECUTE sp_execute_external_script @language = N'R',
@script = N'
# Set output directory for files
# Prior to plotting ensure there are no files with same file names as the out files below in the above directory.
mainDir <- ''C:\\temp\\plots''
dir.create(mainDir, recursive = TRUE, showWarnings = FALSE)
setwd(mainDir);
print("Creating output plot files:", quote=FALSE)
# Open a jpeg file and output histogram of tipped variable in that file.
dest_filename = tempfile(pattern = ''rHistogram_Tipped_'', tmpdir = mainDir)
dest_filename = paste(dest_filename, ''.jpg'',sep="")
print(dest_filename, quote=FALSE);
jpeg(filename=dest_filename);
hist(InputDataSet$tipped, col = ''lightgreen'', xlab=''Tipped'', ylab = ''Counts'', main = ''Histogram, Tipped'');
dev.off();
# Open a pdf file and output histograms of tip amount and fare amount.
# Outputs two plots in one row
dest_filename = tempfile(pattern = ''rHistograms_Tip_and_Fare_Amount_'', tmpdir = mainDir)
dest_filename = paste(dest_filename, ''.pdf'',sep="")
print(dest_filename, quote=FALSE);
pdf(file=dest_filename, height=4, width=7);
par(mfrow=c(1,2));
hist(InputDataSet$tip_amount, col = ''lightgreen'', xlab=''Tip amount ($)'', ylab = ''Counts'', main = ''Histogram, Tip amount'', xlim = c(0,40), 100);
hist(InputDataSet$fare_amount, col = ''lightgreen'', xlab=''Fare amount ($)'', ylab = ''Counts'', main = ''Histogram, Fare amount'', xlim = c(0,100), 100);
dev.off();
# Open a pdf file and output an xyplot of tip amount vs. fare amount. This uses the lattice package in Rlibrary(lattice);
# Only 10,000 sampled observations are plotted here, otherwise file is large.
dest_filename = tempfile(pattern = ''rXYPlots_Tip_vs_Fare_Amount_'', tmpdir = mainDir)
dest_filename = paste(dest_filename, ''.pdf'',sep="")
print(dest_filename, quote=FALSE);
pdf(file=dest_filename, height=4, width=4);
plot(tip_amount ~ fare_amount, data = InputDataSet[sample(nrow(InputDataSet), 10000), ], ylim = c(0,50), xlim = c(0,150), cex=.5, pch=19, col=''darkgreen'', main = ''Tip amount by Fare amount'', xlab=''Fare Amount ($)'', ylab = ''Tip Amount ($)'');
dev.off();
',
@input_data_1 = @query
END
GO