Skip to content

Commit

Permalink
Improve the lookup of .join files
Browse files Browse the repository at this point in the history
Modify the lookup algorithm to do the following:

	given "... FROM A, B ..."

	#1 first look for A:B join files in A's directory
	#2 if that fails, look for A:B join files in B's directory
	#3 when a potential match is found, prefer resolving the other
	   table to the same dbdir where the .join file resides
	#4 if you can't, look for it in other dbdirs, in order they
	   show up in LSD_DB

tables with the same name in two different LSD_DB directories.
  • Loading branch information
Mario Juric committed Jun 16, 2011
1 parent 17a8c0a commit ce6cad4
Showing 1 changed file with 40 additions and 6 deletions.
46 changes: 40 additions & 6 deletions src/lsd/join_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1994,28 +1994,62 @@ def construct_join_tree(self, from_clause):
print "Matched to", entry.table.name

for tabname, (e, jargs) in tablist:
# Check for tables that can be joined onto this one (where this one is on the right hand side of the relation)
# Find tables that can be joined onto this one (where this one is on the left hand side of the relation)
# We look for this first, so that if there are 'FROM a, b', with both having .join files, a x b join will be
# the one to prefer

# Look for default .join files named ".<tabname>:*.join"
dbpath = os.path.dirname(e.table.path) # Look only in the table's dbdir
pathlist = [dbpath] + self.path # List of paths to check to find the join destination table. Our path is the first one.

pattern = "%s/.%s:*.join" % (dbpath, e.table.name)
for fn in glob.iglob(pattern):
jtabname = fn[fn.rfind(':')+1:fn.rfind('.join')]
jpath = "%s/%s" % (dbpath, jtabname)

try:
for dbpath in pathlist:
jpath = "%s/%s" % (dbpath, jtabname)
if jpath not in tables_by_path:
continue

je, jargs = tables_by_path[jpath]
except KeyError:
break
else:
continue

if 'matchedto' in jargs: # Explicitly joined
continue
if je.relation is not None: # Already joined
continue

je.relation = create_join(self, fn, jargs, e.table, je.table)
e.joins.append(je)

# Find tables onto which the still unjoined tables can be joined
# via a .join file
for tabname, (e, jargs) in tablist:
# Ignore if already joined
if e.relation is not None:
continue

# Look for default .join files named ".*:<tabname>.join"
dbpath = os.path.dirname(e.table.path) # Look only in the table's dbdir for .join
pathlist = [dbpath] + self.path # List of paths to check to find the join destination table. Our path is the first one.

pattern = "%s/.*:%s.join" % (dbpath, e.table.name)
for fn in glob.iglob(pattern):
jtabname = fn[fn.rfind('/')+2:fn.rfind(':')]

for dbpath in pathlist:
jpath = "%s/%s" % (dbpath, jtabname)
if jpath not in tables_by_path:
continue

je, jargs = tables_by_path[jpath]
break
else:
continue

e.relation = create_join(self, fn, jargs, je.table, e.table)
je.joins.append(e)

# Discover the root (the one and only one table that has no links pointing to it)
root = None
for _, (e, jargs) in tables.iteritems():
Expand Down

0 comments on commit ce6cad4

Please sign in to comment.