-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTableScan
150 lines (127 loc) · 7.67 KB
/
TableScan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
scala> val m = hiveContext.read.format("orc").load("orcFile")
15/08/11 20:58:51 INFO OrcRelation: Listing file:/Users/zzhang/code/UNET/orcFile on driver
m: org.apache.spark.sql.DataFrame = [key: string, inners: struct<a:string,b:string>]
scala> m.schema
res0: org.apache.spark.sql.types.StructType = StructType(StructField(key,StringType,true), StructField(inners,StructType(StructField(a,StringType,true), StructField(b,StringType,true)),true))
scala> m.registerTempTable("orcTable")
scala> val t = hiveContext.sql("select inners.a from orcTable")
15/08/11 20:58:52 INFO ParseDriver: Parsing command: select inners.a from orcTable
15/08/11 20:58:52 INFO ParseDriver: Parse Completed
t: org.apache.spark.sql.DataFrame = [a: string]
scala> t.queryExecution
15/08/11 20:59:02 INFO MemoryStore: ensureFreeSpace(88456) called with curMem=0, maxMem=556038881
15/08/11 20:59:02 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 86.4 KB, free 530.2 MB)
15/08/11 20:59:02 INFO MemoryStore: ensureFreeSpace(19788) called with curMem=88456, maxMem=556038881
15/08/11 20:59:02 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 19.3 KB, free 530.2 MB)
15/08/11 20:59:02 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:62430 (size: 19.3 KB, free: 530.3 MB)
15/08/11 20:59:02 INFO SparkContext: Created broadcast 0 from replStringOf at <console>:10
pruneFilterRwa: inners#1.a AS a#2 <<< orignal projection
column: inners <<< passed to scan
15/08/11 20:59:03 INFO MemoryStore: ensureFreeSpace(231688) called with curMem=108244, maxMem=556038881
15/08/11 20:59:03 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 226.3 KB, free 530.0 MB)
15/08/11 20:59:03 INFO MemoryStore: ensureFreeSpace(20036) called with curMem=339932, maxMem=556038881
15/08/11 20:59:03 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 19.6 KB, free 529.9 MB)
15/08/11 20:59:03 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:62430 (size: 19.6 KB, free: 530.2 MB)
15/08/11 20:59:03 INFO SparkContext: Created broadcast 1 from replStringOf at <console>:10
res2: org.apache.spark.sql.SQLContext#QueryExecution =
== Parsed Logical Plan ==
'Project [unresolvedalias('inners.a)]
'UnresolvedRelation [orcTable], None
== Analyzed Logical Plan ==
a: string
Project [inners#1.a AS a#2]
Subquery orctable
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Optimized Logical Plan ==
Project [inners#1.a AS a#2]
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Physical Plan ==
Project [inners#1.a AS a#2]
Scan OrcRelation[file:/Users/zzhang/code/UNET/orcFile][inners#1]
Code Generation: true
scala>
scala> val t = hiveContext.sql("select inners.a from orcTable where inners.b='a'")
15/08/11 20:59:55 INFO ParseDriver: Parsing command: select inners.a from orcTable where inners.b='a'
15/08/11 20:59:55 INFO ParseDriver: Parse Completed
t: org.apache.spark.sql.DataFrame = [a: string]
scala> t.queryExecution
15/08/11 20:59:58 INFO MemoryStore: ensureFreeSpace(230672) called with curMem=359968, maxMem=556038881
15/08/11 20:59:58 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 225.3 KB, free 529.7 MB)
15/08/11 20:59:58 INFO MemoryStore: ensureFreeSpace(19788) called with curMem=590640, maxMem=556038881
15/08/11 20:59:58 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 19.3 KB, free 529.7 MB)
15/08/11 20:59:58 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:62430 (size: 19.3 KB, free: 530.2 MB)
15/08/11 20:59:58 INFO SparkContext: Created broadcast 2 from replStringOf at <console>:10
pruneFilterRwa: inners#1.a AS a#4 <<< original projection
filterPredicates: (inners#1.b = a) <<< original predicate
predicates: (inners#1.b = a) <<< original predicate
column: inners <<< passed to scan
15/08/11 20:59:58 INFO MemoryStore: ensureFreeSpace(231688) called with curMem=610428, maxMem=556038881
15/08/11 20:59:58 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 226.3 KB, free 529.5 MB)
15/08/11 20:59:58 INFO MemoryStore: ensureFreeSpace(20036) called with curMem=842116, maxMem=556038881
15/08/11 20:59:58 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 19.6 KB, free 529.5 MB)
15/08/11 20:59:58 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:62430 (size: 19.6 KB, free: 530.2 MB)
15/08/11 20:59:58 INFO SparkContext: Created broadcast 3 from replStringOf at <console>:10
res3: org.apache.spark.sql.SQLContext#QueryExecution =
== Parsed Logical Plan ==
'Project [unresolvedalias('inners.a)]
'Filter ('inners.b = a)
'UnresolvedRelation [orcTable], None
== Analyzed Logical Plan ==
a: string
Project [inners#1.a AS a#4]
Filter (inners#1.b = a)
Subquery orctable
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Optimized Logical Plan ==
Project [inners#1.a AS a#4]
Filter (inners#1.b = a)
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Physical Plan ==
Project [inners#1.a AS a#4]
Filter (inners#1.b = a)
Scan OrcRelation[file:/Users/zzhang/code/UNET/orcFile][inners#1]
Code Generation: true
scala> val t = hiveContext.sql("select inners.a from orcTable where key='a'")
15/08/11 21:00:17 INFO ParseDriver: Parsing command: select inners.a from orcTable where key='a'
15/08/11 21:00:17 INFO ParseDriver: Parse Completed
t: org.apache.spark.sql.DataFrame = [a: string]
scala> t.queryExecution
15/08/11 21:00:21 INFO MemoryStore: ensureFreeSpace(230672) called with curMem=862152, maxMem=556038881
15/08/11 21:00:21 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 225.3 KB, free 529.2 MB)
15/08/11 21:00:21 INFO MemoryStore: ensureFreeSpace(19788) called with curMem=1092824, maxMem=556038881
15/08/11 21:00:21 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 19.3 KB, free 529.2 MB)
15/08/11 21:00:21 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:62430 (size: 19.3 KB, free: 530.2 MB)
15/08/11 21:00:21 INFO SparkContext: Created broadcast 4 from replStringOf at <console>:10
pruneFilterRwa: inners#1.a AS a#6
filterPredicates: (key#0 = a)
predicates: (key#0 = a)
column: inners
column: key
filter: EqualTo(key,a)<<<< passed to scan
15/08/11 21:00:21 INFO MemoryStore: ensureFreeSpace(231696) called with curMem=1112612, maxMem=556038881
15/08/11 21:00:21 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 226.3 KB, free 529.0 MB)
15/08/11 21:00:21 INFO MemoryStore: ensureFreeSpace(20057) called with curMem=1344308, maxMem=556038881
15/08/11 21:00:21 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 19.6 KB, free 529.0 MB)
15/08/11 21:00:21 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:62430 (size: 19.6 KB, free: 530.2 MB)
15/08/11 21:00:21 INFO SparkContext: Created broadcast 5 from replStringOf at <console>:10
res4: org.apache.spark.sql.SQLContext#QueryExecution =
== Parsed Logical Plan ==
'Project [unresolvedalias('inners.a)]
'Filter ('key = a)
'UnresolvedRelation [orcTable], None
== Analyzed Logical Plan ==
a: string
Project [inners#1.a AS a#6]
Filter (key#0 = a)
Subquery orctable
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Optimized Logical Plan ==
Project [inners#1.a AS a#6]
Filter (key#0 = a)
Relation[key#0,inners#1] OrcRelation[file:/Users/zzhang/code/UNET/orcFile]
== Physical Plan ==
Project [inners#1.a AS a#6]
Filter (key#0 = a)
Scan OrcRelation[file:/Users/zzhang/code/UNET/orcFile][inners#1,key#0]
Code Generation: true
scala>