"""Simple relational algebra interpreter. usage: To make the grammar python relalg.py make To run some relatoinal algebra expressions python relalg.py < expressions_file """ # EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD INSTALLDIR = "." ## simple relational algebra using only the equality predicate ## note: string values cannot contain ; ## statement sequencing using ; handled at higher level relalg_rules = """ statement :: @R statementassn :: statement >> assignment @R statementexpr :: statement >> rexpr @R assignment1 :: assignment >> name = rexpr @R assignmentn :: assignment >> name = assignment @R union :: rexpr >> rexpr U rterm @R rterm :: rexpr >> rterm @R minus :: rexpr >> rexpr - rterm @R intersect :: rterm >> rterm intersect rfactor @R join :: rterm >> rterm join rfactor @R rfactor :: rterm >> rfactor @R projection :: rfactor >> projection [ names ] rfactor @R names0 :: names >> @R namesn :: names >> names1 @R names11 :: names1 >> name @R names1n :: names1 >> names1 name @R selection :: rfactor >> selection ( condition ) rfactor @R conditionor :: condition >> condition | boolfactor @R condfactor :: condition >> boolfactor @R factorand :: boolfactor >> boolfactor & boolprimary @R factorprime :: boolfactor >> boolprimary @R notprimary :: boolprimary >> ~ boolprimary @R primarycondition :: boolprimary >> ( condition ) @R primaryeq :: boolprimary >> expression = expression @R expname :: expression >> name @R expvalue :: expression >> value @R rename :: rfactor >> rename [ names ] to [ names ] rfactor @R named :: rfactor >> name @R factorexpr :: rfactor >> ( rexpr ) @R relationval :: rfactor >> [ names ] ( rows ) @R rows0 :: rows >> @R rowsn :: rows >> somerows @R somerows1 :: somerows >> row @R somerowsn :: somerows >> somerows , row @R emptyrow :: row >> NIL @R row1 :: row >> value @R rown :: row >> row value @R valuenum :: value >> number @R valuestr :: value >> string """ keywords = """ selection intersect rename projection to NIL U join """ puncts = """=^~|,-[]()&""" nonterms = """ statement assignment rexpr rterm value rfactor names names1 condition boolfactor boolprimary expression rows somerows row """ try: from kjbuckets import * except ImportError: from kjbuckets0 import * class relation: def __init__(self, names, rows): #print "relation init", names, rows names = self.names = tuple(names) nameset = self.nameset = kjSet(names) for r in rows: if nameset != kjSet(r.keys()): raise ValueError, \ "bad names: "+`(names, r.items())` self.rows = kjSet(rows) def __repr__(self): from string import join names = self.names rows = self.rows.items() if not rows: nns = join(names) replist = [nns, "="*len(nns), " ----"] return join(replist, "\n") #print names, rows nnames = len(names) if nnames==1: replist = [names[0]] else: replist = [names] for r in rows: elt = r.dump(names) replist.append(r.dump(names)) #print replist if nnames==1: replist = maxrep(replist) else: transpose = apply(map, tuple([None] + replist)) adjusted = map(maxrep, transpose) replist = apply(map, tuple([None] + adjusted)) replist = map(join, replist) replist.insert(1, "=" * len(replist[0])) #print replist return join(replist, "\n") def maxrep(list): list = map(str, list) maxlen = max( map(len, list) ) for i in range(len(list)): item = list[i] litem = len(item) list[i] = item + (" " * (maxlen-litem)) return list # context is a simple dictionary of named relations def elt0(l, c): return l[0] statementassn = elt0 def statementexpr(l, c): from string import split, join print print " --- expression result ---" print data = str(l[0]) print " "+ join(split(data, "\n"), "\n ") def assignment1(l, c): [name, eq, val] = l c[name] = val return val assignmentn = assignment1 def check_compat(v1, v2): names1, names2 = v1.names, v2.names if names1 != names2: raise ValueError, \ "operands not union compatible "+`(names1, names2)` return names1, v1.rows, v2.rows def union(l, c): [v1, U, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1+r2).items()) rterm = elt0 def minus(l, c): [v1, m, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1-r2).items()) def intersect(l, c): [v1, i, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1&r2).items()) def join(l, c): [v1, j, v2] = l n1, n2 = v1.names, v2.names r1, r2 = v1.rows.items(), v2.rows.items() n1s, n2s = kjSet(n1), kjSet(n2) common = tuple((n1s&n2s).items()) result = kjSet() if common: # simple hashjoin G = kjGraph() for a in r1: G[a.dump(common)] = a for b in r2: for a in G.neighbors(b.dump(common)): result[a+b] = 1 else: for a in r1: for b in r2: result[a+b] = 1 return relation( (n1s+n2s).items(), result.items() ) rfactor = elt0 def projection(l, c): [p, b1, names, b2, val] = l proj = kjSet(names) result = kjSet() for row in val.rows.items(): result[ proj * row ] = 1 return relation( names, result.items()) def emptylist(l, c): return [] names0 = emptylist namesn = elt0 def names11(l, c): return l def names1n(l, c): [ns, n] = l ns.append(n) return ns def selection(l, c): [sel, p1, cond, p2, val] = l return cond.filter(val) ## conditions are not optimized at all! class conditionor: def __init__(self, l, c): [self.c1, op, self.c2] = l def filter(self, val): v1 = self.c1.filter(val) v2 = self.c2.filter(val) return relation(v1.names, (v1.rows+v2.rows).items()) condfactor = elt0 class factorand(conditionor): def filter(self, val): v1 = self.c1.filter(val) v2 = self.c2.filter(val) return relation(v1.names, (v1.rows&v2.rows).items()) factorprime = elt0 class notprimary: def __init__(self, l, c): [n, self.c1] = l def filter(self, val): v1 = self.c1.filter(val) return relation(v1.names, (val.rows-v1.rows).items()) def elt1(l, c): return l[1] primarycondition = elt1 class primaryeq: def __init__(self, l, c): [self.e1, eq, self.e2] = l def filter(self, val): rows = val.rows.items() e1v = self.e1.value(rows) e2v = self.e2.value(rows) result = kjSet() for (r, v1, v2) in map(None, rows, e1v, e2v): if v1==v2: result[r] = 1 return relation(val.names, result.items()) class expname: def __init__(self, l, c): self.name = l[0] def value(self, rows): name = self.name r = list(rows) for i in xrange(len(r)): r[i] = r[i][name] return r class expvalue(expname): def value(self, rows): return [self.name] * len(rows) def rename(l, c): [ren, b1, names, b2, to, b3, names2, b4, val] = l if len(names)!=len(names2): raise ValueError, "names lengths must match"+`(names1, names2)` remap = kjDict(map(None, names2, names)) oldnames = kjSet(val.names) addnames = kjSet(names2) remnames = kjSet(names) keepnames = oldnames - remnames remap = remap + keepnames if not remnames.subset(oldnames): #print remnames, oldnames raise ValueError, "old names not present"+`(names, val.names)` newnames = keepnames+addnames rows = val.rows.items() for i in range(len(rows)): rows[i] = remap*rows[i] return relation(newnames.items(), rows) def named(l, c): [name] = l return c[name] def relationval(l, c): [b1, names, b2, p1, rows, p2] = l names = tuple(names) ln = len(names) for i in xrange(len(rows)): this = rows[i] lt = len(this) if lt!=ln: raise ValueError, "names, vals don't match"+`(names,this)` if len(this)==1: this = this[0] else: this = tuple(this) rows[i] = kjUndump(names, this) return relation(names, rows) rows0 = emptylist rowsn = elt0 def somerows1(l, c): #print "somerows1", l return l def somerowsn(l, c): #print "somerowsn", l [sr, c, r] = l sr.append(r) return sr emptyrow = emptylist row1 = somerows1 def factorexpr(l, c): return l[1] def rown(l, c): #print "rows", l [r, v] = l r.append(v) return r valuenum = valuestr = elt0 ## snarfed from sqlbind # note: all reduction function defs must precede this assign VARS = vars() class punter: def __init__(self, name): self.name = name def __call__(self, list, context): print "punt:", self.name, list return list class tracer: def __init__(self, name, fn): self.name = name self.fn = fn def __call__(self, list, context): print "tracing", self.name, list test = self.fn(list, context) print self.name, "returns", test return test def BindRules(sqlg): for name in sqlg.RuleNameToIndex.keys(): if VARS.has_key(name): #print "binding", name sqlg.Bind(name, VARS[name]) # nondebug #sqlg.Bind(name, tracer(name, VARS[name]) ) # debug else: print "unbound", name sqlg.Bind(name, punter(name)) return sqlg ## snarfed from sqlgen MARSHALFILE = "relalg.mar" import string alphanum = string.letters+string.digits + "_" userdefre = "[%s][%s]*" % (string.letters +"_", alphanum) RACOMMENTREGEX = "COMMENT .*" def userdeffn(str): return str charstre = "'[^\n']*'" def charstfn(str): return str[1:-1] numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really... def numlitfn(str): """Note: this is "safe" because regex filters out dangerous things.""" return eval(str) def DeclareTerminals(Grammar): Grammar.Addterm("name", userdefre, userdeffn) Grammar.Addterm("string", charstre, charstfn) Grammar.Addterm("number", numlitre, numlitfn) def Buildrelalg(filename=MARSHALFILE): import kjParseBuild SQLG = kjParseBuild.NullCGrammar() #SQLG.SetCaseSensitivity(0) DeclareTerminals(SQLG) SQLG.Keywords(keywords) SQLG.punct(puncts) SQLG.Nonterms(nonterms) # should add comments SQLG.comments([RACOMMENTREGEX]) SQLG.Declarerules(relalg_rules) print "working..." SQLG.Compile() filename = INSTALLDIR+"/"+filename print "dumping to", filename outfile = open(filename, "wb") SQLG.MarshalDump(outfile) outfile.close() return SQLG def reloadrelalg(filename=MARSHALFILE): import kjParser filename = INSTALLDIR+"/"+filename infile = open(filename, "rb") SQLG = kjParser.UnMarshalGram(infile) infile.close() DeclareTerminals(SQLG) BindRules(SQLG) return SQLG def runfile(f): from string import split, join ragram = reloadrelalg() context = {} #f = open(filename, "r") data = f.read() #f.close() from string import split, strip commands = split(data, ";") for c in commands: if not strip(c): continue print " COMMAND:" data = str(c) pdata = " "+join(split(c, "\n"), "\n ") print pdata test = ragram.DoParse1(c, context) print # c:\python\python relalg.py ratest.txt if __name__=="__main__": try: done = 0 import sys argv = sys.argv if len(argv)>1: command = argv[1] if command=="make": print "building relational algebra grammar" Buildrelalg() done = 1 else: runfile(sys.stdin) done = 1 finally: if not done: print __doc__