"""
Node classes and factories used in ADQL tree processing.
"""

#c Copyright 2008-2020, the GAVO project
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import fnmatch
import re
import weakref
from functools import reduce

from gavo import stc
from gavo import utils
from gavo.adql import common
from gavo.adql import fieldinfo
from gavo.adql import fieldinfos
from gavo.stc import tapstc
from gavo.utils import parsetricks


################ Various helpers

class ReplaceNode(utils.ExecutiveAction):
	"""can be raised by code in the constructor of an ADQLNode to replace
	itself.

	It is constructed with the (single) ADQLNode that should stand in its
	stead.

	This is intended as a special service for ufuncs that want to insert
	complex, annotatable expressions.  I doubt this is something
	we should do under other circumstances.
	"""
	def __init__(self, replacingNode):
		self.replacingNode = replacingNode


def symbolAction(*symbols):
	"""is a decorator to mark functions as being a parseAction for symbol.

	This is evaluated by getADQLGrammar below.  Be careful not to alter
	global state in such a handler.
	"""
	def deco(func):
		for symbol in symbols:
			if hasattr(func, "parseActionFor"):
				# plan for double decoration (so don't worry about no coverage)
				func.parseActionFor.append(symbol)
			else:
				func.parseActionFor = [symbol]
		func.fromParseResult = func
		return func
	return deco


def getType(arg):
	"""returns the type of an ADQL node or the value of str if arg is a string.
	"""
	if isinstance(arg, str):
		return str
	else:
		return arg.type


def flatten(arg):
	"""returns the SQL serialized representation of arg.
	"""
	if isinstance(arg, str):
		return arg
	elif isinstance(arg, (int, float)):
		return repr(arg)
	elif isinstance(arg, parsetricks.ParseResults):
		return " ".join(flatten(c) for c in arg)
#	elif arg is None: import pdb;pdb.Pdb(nosigint=True).set_trace()
	else:
		return arg.flatten()


def autocollapse(nodeBuilder, children):
	"""inhibts the construction via nodeBuilder if children consists of
	a single ADQLNode.

	This function will automatically be inserted into the the constructor
	chain if the node defines an attribute collapsible=True.
	"""
	if len(children)==1 and isinstance(children[0], ADQLNode):
		return children[0]
	return nodeBuilder.fromParseResult(children)


def collectUserData(infoChildren):
	userData, tainted = (), False
	for c in infoChildren:
		userData = userData+c.fieldInfo.userData
		tainted = tainted or c.fieldInfo.tainted
	return userData, tainted


def flattenKWs(obj, *fmtTuples):
	"""returns a string built from the obj according to format tuples.

	A format tuple is consists of a literal string, and
	an attribute name.  If the corresponding attribute is
	non-None, the plain string and the flattened attribute
	value are inserted into the result string, otherwise
	both are ignored.

	Nonexisting attributes are taken to have None values.

	To allow unconditional literals, the attribute name can
	be None.  The corresponding literal is always inserted.

	All contributions are separated by single blanks.

	This is a helper method for flatten methods of parsed-out
	elements.
	"""
	res = []
	for literal, attName in fmtTuples:
		if attName is None:
			res.append(literal)
		else:
			if getattr(obj, attName, None) is not None:
				if literal:
					res.append(literal)
				res.append(flatten(getattr(obj, attName)))
	return " ".join(res)


def cleanNamespace(ns):
	"""removes all names starting with an underscore from the dict ns.

	This is intended for _getInitKWs methods.  ns is changed in place *and*
	returned for convenience
	"""
	return dict((k,v) for k,v in ns.items() if not k.startswith("_")
		and k!="cls")


def getChildrenOfType(nodeSeq, type):
	"""returns a list of children of type typ in the sequence nodeSeq.
	"""
	return [c for c in nodeSeq if getType(c)==type]


def getChildrenOfClass(nodeSeq, cls):
	return [c for c in nodeSeq if isinstance(c, cls)]


class BOMB_OUT(object): pass

def _uniquify(matches, default, exArgs):
# helper method for getChildOfX -- see there
	if len(matches)==0:
		if default is not BOMB_OUT: 
			return default
		raise common.NoChild(*exArgs)
	if len(matches)!=1:
		raise common.MoreThanOneChild(*exArgs)
	return matches[0]


def getChildOfType(nodeSeq, type, default=BOMB_OUT):
	"""returns the unique node of type in nodeSeq.

	If there is no such node in nodeSeq or more than one, a NoChild or
	MoreThanOneChild exception is raised,  Instead of raising NoChild,
	default is returned if given.
	"""
	return _uniquify(getChildrenOfType(nodeSeq, type),
		default, (type, nodeSeq))


def getChildOfClass(nodeSeq, cls, default=BOMB_OUT):
	"""returns the unique node of class in nodeSeq.

	See getChildOfType.
	"""
	return _uniquify(getChildrenOfClass(nodeSeq, cls),
		default, (cls, nodeSeq))


def parseArgs(parseResult):
	"""returns a sequence of ADQL nodes suitable as function arguments from 
	parseResult.

	This is for cleaning up _parseResults["args"], i.e. stuff from the
	Args symbol decorator in grammar.
	"""
	args = []
	for _arg in parseResult:
		# _arg is either another ParseResult, an ADQL identifier, or an ADQLNode
		if isinstance(_arg, (ADQLNode, str, utils.QuotedName)):
			args.append(_arg)
		else:
			args.append(autocollapse(GenericValueExpression, _arg))
	return tuple(args)


######################### Misc helpers related to simple query planning

def _getDescendants(args):
	"""returns the nodes in the sequence args and all their descendants.

	This is a helper function for when you have to analyse what's contributing
	to complex terms.
	"""
	descendants = list(args)
	for arg in args:
		if hasattr(arg, "iterTree"):
			descendants.extend(c[1] for c in arg.iterTree())
	return descendants


def iterFieldInfos(args):
	"""returns fieldInfo objects found within the children of the node list
	args.
	"""
	for desc in _getDescendants(args):
		if getattr(desc, "fieldInfo", None) is not None:
			yield desc.fieldInfo


def _isConstant(args):
	"""returns true if no columnReference-s are found below the node list args.
	"""
	for desc in _getDescendants(args):
		if getattr(desc, "type", None)=="columnReference":
			return False
	return True


def _estimateTableSize(args):
	"""returns an estimate for the size of a table mentioned in the node list
	args.

	Actually, we wait for the first column in fieldInfo userdata that has
	a reference to a table that knows its nrows.  If that comes, that's
	our estimate.  If it doesn't come, we return None.
	"""
	for fi in iterFieldInfos(args):
		for ud in fi.userData:
			sizeEst = getattr(ud.parent, "nrows", None)
			if sizeEst is not None:
				return sizeEst
	
	return None


def _sortLargeFirst(arg1, arg2):
	"""returns arga, argb such that arga deals with the larger table
	if we can figure that out.
	
	This is for distance; postgres in general only uses an index for them if the
	point stands alone (rather than in the circle).  So, it normally pays to have
	the larger table first in our expressions (which are point op geom where
	applicable).

	This will also swap constant arguments second (so, into the circle).
	"""
	if _isConstant([arg1]):
		return arg2, arg1

	if _isConstant([arg2]):
		return arg1, arg2

	size1, size2 = _estimateTableSize([arg1]), _estimateTableSize([arg2])
	
	if size1 is None:
		if size2 is None:
			# we know nothing; don't change anything to keep the user in control
			return arg1, arg2
		else:
			# we assume all large tables are nrows-annotated, so presumably
			# arg1 isn't large.  So, swap.
			return arg2, arg1
	else:
		if size2 is None:
			# see one comment up
			return arg1, arg2
		else:
			if size1>size2:
				return arg1, arg2
			else:
				return arg2, arg1



######################### Generic Node definitions


class ADQLNode(utils.AutoNode):
	"""A node within an ADQL parse tree.

	ADQL nodes may be parsed out; in that case, they have individual attributes
	and are craftily flattened in special methods.  We do this for nodes
	that are morphed.

	Other nodes basically just have a children attribute, and their flattening
	is just a concatenation for their flattened children.  This is convenient
	as long as they are not morphed.
	
	To derive actual classes, define 
	
		- the _a_<name> class attributes you need,
		- the type (a nonterminal from the ADQL grammar) 
		- plus bindings if the class handles more than one symbol,
		- a class method _getInitKWs(cls, parseResult); see below.
		- a method flatten() -> string if you define a parsed ADQLNode.
		- a method _polish() that is called just before the constructor is
			done and can be used to create more attributes.  There is no need
			to call _polish of superclasses.

	The _getInitKWs methods must return a dictionary mapping constructor argument
	names to values.  You do not need to manually call superclass _getInitKWs,
	since the fromParseResult classmethod figures out all _getInitKWs in the
	inheritance tree itself.  It calls all of them in the normal MRO and updates
	the argument dictionary in reverse order.  
	
	The fromParseResult class method additionally filters out all names starting
	with an underscore; this is to allow easy returning of locals().
	"""
	type = None

	@classmethod
	def fromParseResult(cls, parseResult):
		initArgs = {}
		for superclass in reversed(cls.mro()):
			if hasattr(superclass, "_getInitKWs"):
				initArgs.update(superclass._getInitKWs(parseResult))
		try:
			return cls(**cleanNamespace(initArgs))
		except TypeError:
			raise common.BadKeywords("%s, %s"%(cls, cleanNamespace(initArgs)))
		except ReplaceNode as rn:
			return rn.replacingNode

	def _setupNode(self):
		for cls in reversed(self.__class__.mro()):
			if hasattr(cls, "_polish"):
				cls._polish(self)
		self._setupNodeNext(ADQLNode)

	def __repr__(self):
		return "<ADQL Node %s>"%(self.type)

	def flatten(self):
		"""returns a string representation of the text content of the tree.

		This default implementation will only work if you returned all parsed
		elements as children.  This, in turn, is something you only want to
		do if you are sure that the node is question will not be morphed.

		Otherwise, override it to create an SQL fragment out of the parsed
		attributes.
		"""
		return " ".join(flatten(c) for c in self.children)

	def asTree(self):
		res = []
		for name, val in self.iterChildren():
			if isinstance(val, ADQLNode):
				res.append(val.asTree())
		return self._treeRepr()+tuple(res)
	
	def _treeRepr(self):
		return (self.type,)
	
	def iterTree(self):
		for name, val in self.iterChildren():
			if isinstance(val, ADQLNode):
				for item in val.iterTree():
					yield item
			yield name, val
			

class TransparentMixin(object):
	"""a mixin just pulling through the children and serializing them.
	"""
	_a_children = ()

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {"children": list(_parseResult)}


class FieldInfoedNode(ADQLNode):
	"""An ADQL node that carries a FieldInfo.

	This is true for basically everything in the tree below a derived
	column.  This class is the basis for column annotation.

	You'll usually have to override addFieldInfo.  The default implementation
	just looks in its immediate children for anything having a fieldInfo,
	and if there's exactly one such child, it adopts that fieldInfo as
	its own, not changing anything.

	FieldInfoedNode, when change()d, keep their field info.  This is usually
	what you want when morphing, but sometimes you might need adjustments.
	"""
	fieldInfo = None

	def _getInfoChildren(self):
		return [c for c in self.iterNodeChildren() if hasattr(c, "fieldInfo")]

	def addFieldInfo(self, context):
		infoChildren = self._getInfoChildren()
		if len(infoChildren)==1:
			self.fieldInfo = infoChildren[0].fieldInfo
		else:
			if len(infoChildren):
				msg = "More than one"
			else:
				msg = "No"
			raise common.Error("%s child with fieldInfo with"
				" no behaviour defined in %s, children %s"%(
					msg,
					self.__class__.__name__,
					list(self.iterChildren())))

	def change(self, **kwargs):
		other = ADQLNode.change(self, **kwargs)
		other.fieldInfo = self.fieldInfo
		return other


class FunctionNode(FieldInfoedNode):
	"""An ADQLNodes having a function name and arguments.

	The rules having this as action must use the Arg "decorator" in
	grammar.py around their arguments and must have a string-valued
	result "fName".

	FunctionNodes have attributes args (unflattened arguments),
	and funName (a string containing the function name, all upper
	case).
	"""
	_a_args = ()
	_a_funName = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		try:
			args = parseArgs(_parseResult["args"]) #noflake: locals returned
		except KeyError: # Zero-Arg function
			pass
		funName = _parseResult["fName"].upper() #noflake: locals returned
		return locals()

	def flatten(self):
		return "%s(%s)"%(self.funName, ", ".join(flatten(a) for a in self.args))


class ColumnBearingNode(ADQLNode):
	"""A Node types defining selectable columns.

	These are tables, subqueries, etc.  This class is the basis for the
	annotation of tables and subqueries.

	Their getFieldInfo(name)->fi method gives annotation.FieldInfos 
	objects for their columns, None for unknown columns.

	These keep their fieldInfos on a change()
	"""
	fieldInfos = None
	originalTable = None

	def getFieldInfo(self, name):
		if self.fieldInfos:
			return self.fieldInfos.getFieldInfo(name)
	
	def getAllNames(self): # pragma: no cover
		"""yields all relation names mentioned in this node.
		"""
		raise TypeError("Override getAllNames for ColumnBearingNodes.")

	def change(self, **kwargs):
		other = ADQLNode.change(self, **kwargs)
		other.fieldInfos = self.fieldInfos
		return other


############# Toplevel query language node types (for query analysis)

class TableName(ADQLNode):
	type = "tableName"
	_a_cat = None
	_a_schema = None
	_a_name = None

	def __eq__(self, other):
		if hasattr(other, "qName"):
			return self.qName.lower()==other.qName.lower()
		try:
			return self.qName.lower()==other.lower()
		except AttributeError:
			# other has no lower, so it's neither a string nor a table name;
			# thus, fall through to non-equal case
			pass
		return False

	def __ne__(self, other):
		return not self==other

	def __bool__(self):
		return bool(self.name)

	def __str__(self):
		return "TableName(%s)"%self.qName

	def _polish(self):
		# Implementation detail: We map tap_upload to temporary tables
		# here; therefore, we can just nil out anything called tap_upload.
		# If we need more flexibility, this probably is the place to implement
		# the mapping.
		if self.schema and self.schema.lower()=="tap_upload":
			self.schema = None

		self.qName = ".".join(flatten(n) 
			for n in (self.cat, self.schema, self.name) if n) 

	@classmethod
	def _getInitKWs(cls, _parseResult):
		_parts = _parseResult[::2]
		cat, schema, name = [None]*(3-len(_parts))+_parts
		return locals()

	def flatten(self):
		return self.qName

	def lower(self):
		"""returns self's qualified name in lower case.
		"""
		return self.qName.lower()

	@staticmethod
	def _normalizePart(part):
		if isinstance(part, utils.QuotedName):
			return part.name
		else:
			return part.lower()

	def getNormalized(self):
		"""returns self's qualified name lowercased for regular identifiers,
		in original capitalisation otherwise.
		"""
		return ".".join(self._normalizePart(p) 
			for p in [self.cat, self.schema, self.name]
			if p is not None)


class PlainTableRef(ColumnBearingNode):
	"""A reference to a simple table.
	
	The tableName is the name this table can be referenced as from within
	SQL, originalName is the name within the database; they are equal unless
	a correlationSpecification has been given.
	"""
	type = "possiblyAliasedTable"
	_a_tableName = None      # a TableName instance
	_a_originalTable = None  # a TableName instance
	_a_sampling = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		if _parseResult.get("alias"):
			tableName = TableName(name=_parseResult.get("alias"))
			originalTable = _parseResult.get("tableName")
		else:
			tableName = getChildOfType(_parseResult, "tableName")
			originalTable = tableName  #noflake: locals returned

		if _parseResult.get("tablesample"):
			sampling = float(_parseResult.get("tablesample")[2])

		return locals()

	def addFieldInfos(self, context):
		self.fieldInfos = fieldinfos.TableFieldInfos.makeForNode(self, context)

	def _polish(self):
		self.qName = flatten(self.tableName)

	def flatten(self):
		ot = flatten(self.originalTable)
		if ot!=self.qName:
			literal = "%s AS %s"%(ot, flatten(self.tableName))
		else:
			literal = self.qName

		if self.sampling:
			# TODO: Postgres dependency; this should be in morphpg
			literal = "%s TABLESAMPLE SYSTEM (%s)"%(literal, self.sampling)

		return literal

	def getAllNames(self):
		yield self.tableName.qName

	def getAllTables(self):
		yield self

	def makeUpId(self):
		# for suggestAName
		n = self.tableName.name
		if isinstance(n, utils.QuotedName):
			return "_"+re.sub("[^A-Za-z0-9_]", "", n.name)
		else:
			return n


class DerivedTable(ColumnBearingNode):
	type = "derivedTable"
	_a_query = None
	_a_tableName = None

	def getFieldInfo(self, name):
		return self.query.getFieldInfo(name)
	
	def _get_fieldInfos(self):
		return self.query.fieldInfos

	def _set_fieldInfos(self, val):
		self.query.fieldInfos = val
	fieldInfos = property(_get_fieldInfos, _set_fieldInfos)

	@classmethod
	def _getInitKWs(cls, _parseResult):
		tmp = {'tableName': TableName(name=str(_parseResult.get("alias"))),
			'query': getChildOfClass(_parseResult, SelectQuery),
		}
		return tmp

	def flatten(self):
		return "(%s) AS %s"%(flatten(self.query), flatten(self.tableName))

	def getAllNames(self):
		yield self.tableName.qName

	def getAllTables(self):
		yield self

	def makeUpId(self):
		# for suggestAName
		n = self.tableName.name
		if isinstance(n, utils.QuotedName):
			return "_"+re.sub("[^A-Za-z0-9_]", "", n.name)
		else:
			return n


class SetGeneratingFunction(ColumnBearingNode, TransparentMixin):
	"""a function that can stand instead of a table.

	For starters, we only do generate_series here.  Let's see where this
	leads.
	"""
	type = "setGeneratingFunction"
	_a_functionName = None
	_a_args = None
	_a_name = None # name is both the name of the column and the "table"
	               # here.  This will come from a correlationSpec where
	               # available.  It's generate_series otherwise.
	
	@classmethod
	def _getInitKWs(cls, _parseResult):
		functionName = _parseResult[0]
		# TODO: We really should allow more than two arguments here
		args = [_parseResult[2], _parseResult[4]]
		name = _parseResult.get("alias")
		if name is None:
			name = functionName
		return locals()
	
	def _polish(self):
		self.tableName = self.name

	def getFieldInfo(self, name):
		return self.fieldInfos.getFieldInfo(name)

	def getAllTables(self):
		yield self

	def addFieldInfos(self, context):
		# TODO: Infer types from argument types
		fieldinfos.FieldInfos(self, context)
		self.fieldInfos.addColumn(self.name,
			fieldinfo.FieldInfo("integer", None, None, sqlName=self.name))
	
	def getAllNames(self):
		yield self.name

	def makeUpId(self):
		return self.name


class JoinSpecification(ADQLNode, TransparentMixin):
	"""A join specification ("ON" or "USING").
	"""
	type = "joinSpecification"
	
	_a_children = ()
	_a_predicate = None
	_a_usingColumns = ()

	@classmethod
	def _getInitKWs(cls, _parseResult):
		predicate = _parseResult[0].upper()
		if predicate=="USING":
			usingColumns = [ #noflake: locals returned
				n for n in _parseResult["columnNames"] if n!=',']
		children = list(_parseResult) #noflake: locals returned
		return locals()


class JoinOperator(ADQLNode, TransparentMixin):
	"""the complete join operator (including all LEFT, RIGHT, ",", and whatever).
	"""
	type = "joinOperator"

	def isCrossJoin(self):
		return self.children[0] in (',', 'CROSS')


class JoinedTable(ColumnBearingNode):
	"""A joined table.

	These aren't made directly by the parser since parsing a join into
	a binary structure is very hard using pyparsing.  Instead, there's
	the helper function makeJoinedTableTree handling the joinedTable
	symbol that manually creates a binary tree.
	"""
	type = None
	originalTable = None
	tableName = TableName()
	qName = None

	_a_leftOperand = None
	_a_operator = None
	_a_rightOperand = None
	_a_joinSpecification = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		leftOperand = _parseResult[0] #noflake: locals returned
		operator = _parseResult[1] #noflake: locals returned
		rightOperand = _parseResult[2] #noflake: locals returned
		if len(_parseResult)>3:
			joinSpecification = _parseResult[3] #noflake: locals returned
		return locals()

	def flatten(self):
		js = ""
		if self.joinSpecification is not None:
			js = flatten(self.joinSpecification)
		return "%s %s %s %s"%(
			self.leftOperand.flatten(),
			self.operator.flatten(),
			self.rightOperand.flatten(),
			js)

	def addFieldInfos(self, context):
		self.fieldInfos = fieldinfos.TableFieldInfos.makeForNode(self, context)

	def _polish(self):
		self.joinedTables = [self.leftOperand, self.rightOperand]

	def getAllNames(self):
		"""iterates over all fully qualified table names mentioned in this
		(possibly joined) table reference.
		"""
		for t in self.joinedTables:
			yield t.tableName.qName

	def getTableForName(self, name):
		return self.fieldInfos.locateTable(name)

	def makeUpId(self):
		# for suggestAName
		return "_".join(t.makeUpId() for t in self.joinedTables)

	def getJoinType(self):
		"""returns a keyword indicating how result rows are formed in this
		join.

		This can be NATURAL (all common columns are folded into one),
		USING (check the joinSpecification what columns are folded),
		CROSS (no columns are folded).
		"""
		if self.operator.isCrossJoin():
			if self.joinSpecification is not None:
				raise common.Error("Cannot use cross join with a join predicate.")
			return "CROSS"
		if self.joinSpecification is not None:
			if self.joinSpecification.predicate=="USING":
				return "USING"
			if self.joinSpecification.predicate=="ON":
				return "CROSS"
		return "NATURAL"

	def getAllTables(self):
		"""returns all actual tables and subqueries (not sub-joins) 
		within this join.
		"""
		res = []
		def collect(node):
			if hasattr(node.leftOperand, "leftOperand"):
				collect(node.leftOperand)
			else:
				res.append(node.leftOperand)
			if hasattr(node.rightOperand, "leftOperand"):
				collect(node.rightOperand)
			else:
				res.append(node.rightOperand)
		collect(self)
		return res


class SubJoin(ADQLNode):
	"""A sub join (JoinedTable surrounded by parens).

	The parse result is just the parens and a joinedTable; we need to
	camouflage as that joinedTable.
	"""
	type = "subJoin"
	_a_joinedTable = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {"joinedTable": _parseResult[1]}

	def flatten(self):
		return "("+self.joinedTable.flatten()+")"

	def __getattr__(self, attName):
		return getattr(self.joinedTable, attName)


@symbolAction("joinedTable")
def makeBinaryJoinTree(children):
	"""takes the parse result for a join and generates a binary tree of
	JoinedTable nodes from it.

	It's much easier to do this in a separate step than to force a 
	non-left-recursive grammar to spit out the right parse tree in the
	first place.
	"""
	children = list(children)
	while len(children)>1:
		if len(children)>3 and isinstance(children[3], JoinSpecification):
			exprLen = 4
		else:
			exprLen = 3
		args = children[:exprLen]
		children[:exprLen] = [JoinedTable.fromParseResult(args)]
	return children[0]


class TransparentNode(ADQLNode, TransparentMixin):
	"""An abstract base for Nodes that don't parse out anything.
	"""
	type = None


class WhereClause(TransparentNode):
	type = "whereClause"

class Grouping(TransparentNode):
	type = "groupByClause"

class Having(TransparentNode):
	type = "havingClause"

class OrderBy(TransparentNode):
	type = "sortSpecification"

class OffsetSpec(ADQLNode):
	type = "offsetSpec"

	_a_offset = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {"offset": int(_parseResult[1])}
	
	def flatten(self):
		if self.offset is not None:
			# for morphpg, this never happens because _PGQS deals with it
			# (and sets self.offset to None).
			return "OFFSET %d"%self.offset
		return ""


class SelectNoParens(ColumnBearingNode): 
	type = "selectNoParens"

	_a_setQuantifier = None
	_a_setLimit = None
	_a_selectList = None
	_a_fromClause = None
	_a_whereClause = None
	_a_groupby = None
	_a_having = None
	_a_orderBy = None

	def _polish(self):
		self.query = weakref.proxy(self)

	@classmethod
	def _getInitKWs(cls, _parseResult):
		res = {}
		for name in ["setQuantifier", "setLimit", "fromClause",
				"whereClause", "groupby", "having", "orderBy"]:
			res[name] = _parseResult.get(name)
		res["selectList"] = getChildOfType(_parseResult, "selectList")
		return res

	def _iterSelectList(self):
		for f in self.selectList.selectFields:
			if isinstance(f, DerivedColumn):
				yield f
			elif isinstance(f, QualifiedStar):
				for sf in self.fromClause.getFieldsForTable(f.sourceTable):
					yield sf
			else:
				raise common.Error("Unexpected %s in select list"%getType(f))

	def getSelectFields(self):
		if self.selectList.allFieldsQuery:
			return self.fromClause.getAllFields()
		else:
			return self._iterSelectList()

	def addFieldInfos(self, context):
		self.fieldInfos = fieldinfos.QueryFieldInfos.makeForNode(self, context)

	def resolveField(self, fieldName):
		return self.fromClause.resolveField(fieldName)

	def getAllNames(self):
		return self.fromClause.getAllNames()

	def flatten(self):
		return flattenKWs(self, ("SELECT", None),
			("", "setQuantifier"),
			("TOP", "setLimit"),
			("", "selectList"),
			("", "fromClause"),
			("", "whereClause"),
			("", "groupby"),
			("", "having"),
			("", "orderBy"))

	def suggestAName(self):
		"""returns a string that may or may not be a nice name for a table
		resulting from this query.

		Whatever is being returned here, it's a regular SQL identifier.
		"""
		try:
			sources = [tableRef.makeUpId()
				for tableRef in self.fromClause.getAllTables()]
			if sources:
				return "_".join(sources)
			else:
				return "query_result"
		except:  # should not happen, but we don't want to bomb from here
			import traceback;traceback.print_exc()
			return "weird_table_report_this"

	def getContributingNames(self):
		"""returns a set of table names mentioned below this node.
		"""
		names = set()
		for name, val in self.iterTree():
			if isinstance(val, TableName):
				names.add(val.flatten())
		return names


class SetOperationNode(ColumnBearingNode, TransparentMixin):
	"""A node containing a set expression.

	This is UNION, INTERSECT, or EXCEPT.  In all cases, we need to check
	all contributing sub-expressions have compatible degree.  For now,
	in violation of SQL1992, we require identical names on all operands --
	sql92 in 7.10 says 

	  [if column names are unequal], the <column name> of the i-th column of TR
	  is implementation-dependent and different from the <column name> of any
	  column, other than itself, of any table referenced by any <table reference>
	  contained in the SQL-statement.

	Yikes.

	These collapse to keep things simple in the typical case.
	"""
	def _assertFieldInfosCompatible(self):
		"""errors out if operands have incompatible signatures.

		For convenience, if all are compatible, the common signature (ie, 
		fieldInfos) is returned.
		"""
		fieldInfos = None
		for child in self.children:
			# Skip WithQueries -- they're not part of set operations.
			if hasattr(child, "fieldInfos") and not isinstance(child, WithQuery):
				if fieldInfos is None:
					fieldInfos = child.fieldInfos
				else:
					fieldInfos.assertIsCompatible(child.fieldInfos)
		return fieldInfos

	def addFieldInfos(self, context):
		self.fieldInfos = self._assertFieldInfosCompatible()

	def getAllNames(self):
		for index, child in enumerate(self.children):
			if hasattr(child, "getAllNames"):
				for name in child.getAllNames():
					yield name
			elif hasattr(child, "suggestAName"):
				yield child.suggestAName()
			else:  # pragma: no cover
				assert False, "no name"
	
	def getSelectClauses(self):
		for child in self.children:
			for sc in getattr(child, "getSelectClauses", lambda: [])():
				yield sc
			if hasattr(child, "setLimit"):
				yield child


class SetTerm(SetOperationNode):
	type = "setTerm"
	collapsible = True


class WithQuery(SetOperationNode):
	"""A query from a with clause.

	This essentially does everything a table does.
	"""
	type = "withQuery"

	def _polish(self):
		self.name = self.children[0]
		for c in self.children:
			# this should be a selectQuery, but this we want to be sure
			# we don't fail when morphers replace the main query node
			# (as the pg morpher does)
			if hasattr(c, "setLimit"):
				self.select = c
				break
		else:  # pragma: no cover
			raise NotImplementedError("WithQuery without select?")


class SelectQuery(SetOperationNode):
	"""A complete query excluding CTEs.

	The main ugly thing here is the set limit; the querySpecification has
	max of the limits of the children, if existing, otherwise to None.

	Other than that, we hand through attribute access to our first child.

	If there is a set expression on the top level, this will have a complex
	structure; the first-child thing still ought to work since after
	annotation we'll have errored out if set operator arguments aren't
	reasonably congurent.
	"""
	type = "selectQuery"

	_a_setLimit = None
	_a_offset= None

	def getSelectClauses(self):
		for child in self.children:
			for sc in getattr(child, "getSelectClauses", lambda: [])():
				yield sc
			if hasattr(child, "setLimit"):
				yield child

	def _polish(self):
		if self.setLimit is None:
			limits = [selectClause.setLimit
				for selectClause in self.getSelectClauses()]

			limits = [int(s) for s in limits if s]
			if limits:
				self.setLimit = max(limits)
	
		for child in self.children:
			if isinstance(child, OffsetSpec) and child.offset is not None:
				self.offset = child.offset
				child.offset = None

	def __getattr__(self, attrName):
		return getattr(self.children[0], attrName)


class QuerySpecification(TransparentNode):
	"""The toplevel query objects including CTEs.

	Apart from any CTEs, that's just a SelectQuery (which is always the last
	child), and we hand through essentially all attribute access to it.
	"""
	type = "querySpecification"

	def _polish(self):
		self.withTables = []
		for child in self.children:
			if isinstance(child, WithQuery):
				self.withTables.append(child)

	def _setSetLimit(self, val):
		self.children[-1].setLimit = val
	def _getSetLimit(self):
		return self.children[-1].setLimit
	setLimit = property(_getSetLimit, _setSetLimit)


	def __getattr__(self, attrName):
		return getattr(self.children[-1], attrName)


class ColumnReference(FieldInfoedNode):
# normal column references will be handled by the dispatchColumnReference
# function below, hence the binding is missing here.
	type = "columnReference"
	bindings = ["geometryValue"]
	_a_refName = None  # if given, a TableName instance
	_a_name = None

	def _polish(self):
		if not self.refName:
			self.refName = None
		self.colName = ".".join(
			flatten(p) for p in (self.refName, self.name) if p)

	@classmethod
	def _getInitKWs(cls, _parseResult):
		names = [_c for _c in _parseResult if _c!="."]
		names = [None]*(4-len(names))+names
		refName = TableName(cat=names[0], 
			schema=names[1], 
			name=names[2])
		if not refName:
			refName = None
		return {
			"name": names[-1],
			"refName": refName}

	def addFieldInfo(self, context):
		self.fieldInfo = context.getFieldInfo(self.name, self.refName)

		srcColumn = None
		if self.fieldInfo.userData:
			srcColumn = self.fieldInfo.userData[0] 
		if hasattr(srcColumn, "originalName"):
			# This is a column from a VOTable upload we have renamed to avoid
			# clashes with postgres-reserved column names.  Update the name
			# so the "bad" name doesn't apprear in the serialised query.
			if not isinstance(self.name, utils.QuotedName):
				self.name = srcColumn.name
				self._polish()

	def flatten(self):
		if self.fieldInfo and self.fieldInfo.sqlName:
			return ".".join(
				flatten(p) for p in (self.refName, self.fieldInfo.sqlName) if p)
		return self.colName

	def _treeRepr(self):
		return (self.type, self.name)


class ColumnReferenceByUCD(ColumnReference):
# these are tricky: As, when parsing, we don't know where the columns
# might come from, we have to 
	type = "columnReferenceByUCD"
	bindings = ["columnReferenceByUCD"]
	_a_ucdWanted = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {
			"ucdWanted": _parseResult[2].value,
			"name": utils.Undefined,
			"refName": utils.Undefined}

	def addFieldInfo(self, context):
		# I've not really thought about where these might turn up.
		# Hence, I just heuristically walk up the ancestor stack
		# until I find a from clause.  TODO: think about if that's valid.
		for ancestor in reversed(context.ancestors):
			if hasattr(ancestor, "fromClause"):
				break
		else:
			raise common.Error("UCDCOL outside of query specification with FROM")

		for field in ancestor.fromClause.getAllFields():
			if fnmatch.fnmatch(field.fieldInfo.ucd, self.ucdWanted):
				self.fieldInfo = field.fieldInfo
				self.name = self.colName = field.name
				self.refName = None
				break
		else:
			raise utils.NotFoundError(self.ucdWanted, "column matching ucd",
				"from clause")


@symbolAction("columnReference")
def dispatchColumnReference(parseResult):
# this dispatch is there so ColumnReference is not bothered
# by the by-UCD hack in the normal case.  It should go if we
# punt UCDCOL, and the columnReference binding should then go
# back to ColumnReference
	if len(parseResult)==1 and isinstance(parseResult[0], ColumnReferenceByUCD):
		return parseResult[0]
	else:
		return ColumnReference.fromParseResult(parseResult)
	

class FromClause(ADQLNode):
	type = "fromClause"
	_a_tableReference = ()
	_a_tables = ()

	@classmethod
	def _getInitKWs(cls, parseResult):
		parseResult = list(parseResult)
		if len(parseResult)==1:
			tableReference = parseResult[0]
		else:
			# it's a cross join; to save repeating the logic, we'll
			# just build an artificial join as the table reference
			tableReference = reduce(lambda left, right:
				JoinedTable(
					leftOperand=left, 
					operator=JoinOperator(children=[","]),
					rightOperand=right), parseResult)
		return {
			"tableReference": tableReference,
			"tables": parseResult}
	
	def flatten(self):
		return "FROM %s"%(' , '.join(t.flatten() for t in self.tables))
	
	def getAllNames(self):
		"""returns the names of all tables taking part in this from clause.
		"""
		return self.tableReference.getAllNames()
	
	def resolveField(self, name):
		return self.tableReference.getFieldInfo(name)

	def _makeColumnReference(self, sourceTableName, colPair):
		"""returns a ColumnReference object for a name, colInfo pair from a 
		table's fieldInfos.
		"""
		cr = ColumnReference(name=colPair[0], refName=sourceTableName)
		cr.fieldInfo = colPair[1]
		return cr

	def getAllFields(self):
		"""returns all fields from all tables in this FROM.

		These will be qualified names.  Columns taking part in joins are
		resolved here.

		This will only work for annotated tables.
		"""
		res = []
		commonColumns = common.computeCommonColumns(self.tableReference)
		commonColumnsMade = set()

		for table in self.getAllTables():
			for label, fi in table.fieldInfos.seq:
				if label in commonColumns:
					if label not in commonColumnsMade:
						res.append(self._makeColumnReference(
							None, (label, fi)))
						commonColumnsMade.add(label)

				else:
					res.append(self._makeColumnReference(
						table.tableName, (label, fi)))

		return res

	def getFieldsForTable(self, srcTableName):
		"""returns the fields in srcTable.

		srcTableName is a TableName.
		"""
		if fieldinfos.tableNamesMatch(self.tableReference, srcTableName):
			table = self.tableReference
		else:
			table = self.tableReference.fieldInfos.locateTable(srcTableName)

		return [self._makeColumnReference(table.tableName, ci)
			for ci in table.fieldInfos.seq]

	def getAllTables(self):
		return self.tableReference.getAllTables()


class DerivedColumn(FieldInfoedNode):
	"""A column within a select list.
	"""
	type = "derivedColumn"
	_a_expr = None
	_a_alias = None
	_a_tainted = True

	def _polish(self):
		if getType(self.expr)=="columnReference":
			self.tainted = False

	@property
	def name(self):
		# todo: be a bit more careful here to come up with meaningful
		# names (users don't like the funny names).  Also: do
		# we make sure somewhere we're getting unique names?
		if self.alias is not None:
			return self.alias

		elif hasattr(self.expr, "name"):
			return self.expr.name

		else:
				return utils.intToFunnyWord(id(self))

	@classmethod
	def _getInitKWs(cls, _parseResult):
		expr = _parseResult["expr"] #noflake: locals returned
		alias = _parseResult.get("alias") #noflake: locals returned
		return locals()
	
	def flatten(self):
		return flattenKWs(self,
			("", "expr"),
			("AS", "alias"))

	def _treeRepr(self):
		return (self.type, self.name)


class QualifiedStar(ADQLNode):
	type = "qualifiedStar"
	_a_sourceTable = None  # A TableName for the column source

	@classmethod
	def _getInitKWs(cls, _parseResult):
		parts = _parseResult[:-2:2] # kill dots and star
		cat, schema, name = [None]*(3-len(parts))+parts
		return {"sourceTable": TableName(cat=cat, schema=schema, name=name)}
	
	def flatten(self):
		return "%s.*"%flatten(self.sourceTable)


class SelectList(ADQLNode):
	type = "selectList"
	_a_selectFields = ()
	_a_allFieldsQuery = False

	@classmethod
	def _getInitKWs(cls, _parseResult):
		allFieldsQuery = _parseResult.get("starSel", False)
		if allFieldsQuery:
			# Will be filled in by query, we don't have the from clause here.
			selectFields = None  #noflake: locals returned
		else:
			selectFields = list(_parseResult.get("fieldSel")) #noflake: locals returned
		return locals()
	
	def flatten(self):
		if self.allFieldsQuery:
			return self.allFieldsQuery
		else:
			return ", ".join(flatten(sf) for sf in self.selectFields)


######## all expression parts we need to consider when inferring units and such

class Comparison(ADQLNode):
	"""is required when we want to morph the braindead contains(...)=1 into
	a true boolean function call.
	"""
	type = "comparisonPredicate"
	_a_op1 = None
	_a_opr = None
	_a_op2 = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		op1, opr, op2 = _parseResult #noflake: locals returned
		return locals()
	
	def flatten(self):
		return "%s %s %s"%(flatten(self.op1), self.opr, flatten(self.op2))


def _guessNumericType(literal):
	"""returns a guess for a type suitable to hold a numeric value given in
	literal.

	I don't want to pull through the literal symbol that matched
	from grammar in all cases.  Thus, at times I simply guess the type 
	(and yes, I'm aware that -32768 still is a smallint).
	"""
	try:
		val = int(literal)
		if abs(val)<32767:
			type = "smallint"
		elif abs(val)<2147483648:
			type = "integer"
		else:
			type = "bigint"
	except ValueError:
		type = "double precision"
	return type


class Factor(FieldInfoedNode, TransparentMixin):
	"""is a factor within an SQL expression.

	factors may have only one (direct) child with a field info and copy
	this.  They can have no child with a field info, in which case they're
	simply numeric (about the weakest assumption: They're doubles).
	"""
	type = "factor"
	collapsible = True

	def addFieldInfo(self, context):
		infoChildren = self._getInfoChildren()
		if infoChildren:
			assert len(infoChildren)==1
			self.fieldInfo = infoChildren[0].fieldInfo
		else:
			self.fieldInfo = fieldinfo.FieldInfo(
				_guessNumericType("".join(self.children)), "", "")


class ArrayReference(FieldInfoedNode, TransparentMixin):
	type = "arrayReference"
	collapsible = False

	def addFieldInfo(self, context):
		infoChild = self.children[0]
		childInfo = infoChild.fieldInfo

		if childInfo.type is None:
			raise common.Error("Cannot subscript a typeless thing in %s"%(
				self.flatten()))
	
		lastSubscript = re.search("\[[0-9]*\]$", childInfo.type)
		if lastSubscript is None:
			raise common.Error("Cannot subscript a non-array in %s"%(
				self.flatten()))

		self.fieldInfo = fieldinfo.FieldInfo(
			childInfo.type[:lastSubscript.start()],
			childInfo.unit,
			childInfo.ucd,
			childInfo.userData,
			tainted=True # array might actually have semantics
			)


class CombiningFINode(FieldInfoedNode):
	def addFieldInfo(self, context):
		infoChildren = self._getInfoChildren()
		if not infoChildren:
			if len(self.children)==1: 
				# probably a naked numeric literal in the grammar, e.g., 
				# in mathFunction
				self.fieldInfo = fieldinfo.FieldInfo(
					_guessNumericType(self.children[0]), "", "")
			else:
				raise common.Error("Oops -- did not expect '%s' when annotating %s"%(
					"".join(self.children), self))
		elif len(infoChildren)==1:
			self.fieldInfo = infoChildren[0].fieldInfo
		else:
			self.fieldInfo = self._combineFieldInfos()


class Term(CombiningFINode, TransparentMixin):
	type = "term"
	collapsible = True

	def _combineFieldInfos(self):
# These are either multiplication or division
		toDo = self.children[:]
		opd1 = toDo.pop(0)
		fi1 = opd1.fieldInfo
		while toDo:
			opr = toDo.pop(0)
			fi1 = fieldinfo.FieldInfo.fromMulExpression(opr, fi1, 
				toDo.pop(0).fieldInfo)
		return fi1


class NumericValueExpression(CombiningFINode, TransparentMixin):
	type = "numericValueExpression"
	collapsible = True

	def _combineFieldInfos(self):
# These are either addition or subtraction
		toDo = self.children[:]
		fi1 = toDo.pop(0).fieldInfo
		while toDo:
			opr = toDo.pop(0)
			fi1 = fieldinfo.FieldInfo.fromAddExpression(
				opr, fi1, toDo.pop(0).fieldInfo)
		return fi1


class StringValueExpression(FieldInfoedNode, TransparentMixin):
	type = "stringValueExpression"
	collapsible = True

	def addFieldInfo(self, context):
# This is concatenation; we treat is as if we'd be adding numbers
		infoChildren = self._getInfoChildren()
		if infoChildren:
			fi1 = infoChildren.pop(0).fieldInfo
			if fi1.type=="unicode":
				baseType = "unicode"
			else:
				baseType = "text"
			while infoChildren:
				if infoChildren[0].fieldInfo.type=="unicode":
					baseType = "unicode"
				fi1 = fieldinfo.FieldInfo.fromAddExpression(
					"+", fi1, infoChildren.pop(0).fieldInfo, forceType=baseType)
			self.fieldInfo = fi1
		else:
			self.fieldInfo = fieldinfo.FieldInfo(
				"text", "", "")


class GenericValueExpression(CombiningFINode, TransparentMixin):
	"""A container for value expressions that we don't want to look at
	closer.

	It is returned by the makeValueExpression factory below to collect
	stray children.
	"""
	type = "valueExpression"
	collapsible = True

	def _combineFieldInfos(self):
		# we don't really know what these children are.  Let's just give up
		# unless all child fieldInfos are more or less equal (which of course
		# is a wild guess).
		childUnits, childUCDs = set(), set()
		infoChildren = self._getInfoChildren()
		for c in infoChildren:
			childUnits.add(c.fieldInfo.unit)
			childUCDs.add(c.fieldInfo.ucd)
		if len(childUnits)==1 and len(childUCDs)==1:
			# let's taint the first info and be done with it
			return infoChildren[0].fieldInfo.change(tainted=True)
		else:
			# if all else fails: let's hope someone can make a string from it
			return fieldinfo.FieldInfo("text", "", "")


@symbolAction("valueExpression")
def makeValueExpression(children):
	if len(children)!=1:
		res = GenericValueExpression.fromParseResult(children)
		res.type = "valueExpression"
		return res
	else:
		return children[0]


class SetFunction(TransparentMixin, FieldInfoedNode):
	"""An aggregate function.

	These typically amend the ucd by a word from the stat family and copy
	over the unit.  There are exceptions, however, see table in class def.
	"""
	type = "setFunctionSpecification"

	funcDefs = {
		'AVG': ('%s;stat.mean', None, "double precision"),
		'MAX': ('%s;stat.max', None, None),
		'MIN': ('%s;stat.min', None, None),
		'SUM': (None, None, None),
		'COUNT': ('meta.number;%s', '', "integer"),}

	def addFieldInfo(self, context):
		funcName = self.children[0].upper()
		ucdPref, newUnit, newType = self.funcDefs[funcName]

		# try to find out about our child
		infoChildren = self._getInfoChildren()
		if infoChildren:
			assert len(infoChildren)==1
			fi = infoChildren[0].fieldInfo
		else:
			fi = fieldinfo.FieldInfo("double precision", "", "")

		if ucdPref is None:
			# ucd of a sum is the ucd of the summands?
			ucd = fi.ucd
		elif fi.ucd:
			ucd = ucdPref%(fi.ucd)
		else:
			# no UCD given; if we're count, we're meta.number, otherwise we
			# don't know
			if funcName=="COUNT":
				ucd = "meta.number"
			else:
				ucd = None

		# most of these keep the unit of what they're working on
		if newUnit is None:
			newUnit = fi.unit

		# most of these keep the type of what they're working on
		if newType is None:
			newType = fi.type

		self.fieldInfo = fieldinfo.FieldInfo(
			newType, unit=newUnit, ucd=ucd, userData=fi.userData, tainted=fi.tainted)


class NumericValueFunction(FunctionNode):
	"""A numeric function.

	This is really a mixed bag.  We work through handlers here.  See table
	in class def.  Unknown functions result in dimlesses.
	"""
	type = "numericValueFunction"
	collapsible = True  # if it's a real function call, it has at least
		# a name, parens and an argument and thus won't be collapsed.

	funcDefs = {
		"ACOS": ('rad', '', None),
		"ASIN": ('rad', '', None),
		"ATAN": ('rad', '', None),
		"ATAN2": ('rad', '', None),
		"PI": ('', '', None),
		"RAND": ('', '', None),
		"EXP": ('', '', None),
		"LOG": ('', '', None),
		"LOG10": ('', '', None),
		"SQRT": ('', '', None),
		"SQUARE": ('', '', None),
		"POWER": ('', '', None),
		"ABS": (None, None, "keepMeta"),
		"CEILING": (None, None, "keepMeta"),
		"FLOOR": (None, None, "keepMeta"),
		"ROUND": (None, None, "keepMeta"),
		"TRUNCATE": (None, None, "keepMeta"),
		"DEGREES": ('deg', None, "keepMeta"),
		"RADIANS": ('rad', None, "keepMeta"),
# bitwise operators: hopeless
	}

	def _handle_keepMeta(self, infoChildren):
		fi = infoChildren[0].fieldInfo
		return fi.unit, fi.ucd

	def addFieldInfo(self, context):
		infoChildren = self._getInfoChildren()
		unit, ucd = '', ''
		overrideUnit, overrideUCD, handlerName = self.funcDefs.get(
			self.funName, ('', '', None))
		if handlerName:
			unit, ucd = getattr(self, "_handle_"+handlerName)(infoChildren)
		if overrideUnit:
			unit = overrideUnit
		if overrideUCD:
			ucd = overrideUCD
		self.fieldInfo = fieldinfo.FieldInfo("double precision",
			unit, ucd, *collectUserData(infoChildren))
		self.fieldInfo.tainted = True


class StringValueFunction(FunctionNode):
	type = "stringValueFunction"

	def addFieldInfo(self, context):
		self.fieldInfo = fieldinfo.FieldInfo("text", "", "",
			userData=collectUserData(self._getInfoChildren())[0])


class TimestampFunction(FunctionNode):
	type = "timestampFunction"

	def addFieldInfo(self, context):
		subordinates = self._getInfoChildren()
		if subordinates:
			ucd, stc = subordinates[0].fieldInfo.ucd, subordinates[0].fieldInfo.stc
		else:
			ucd, stc = None, None
		self.fieldInfo = fieldinfo.FieldInfo("timestamp", "",
			ucd=ucd, stc=stc, userData=subordinates)


class InUnitFunction(FieldInfoedNode):
	type = "inUnitFunction"
	_a_expr = None
	_a_unit = None

	conversionFactor = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {
			'expr': _parseResult[2],
			'unit': _parseResult[4].value,
		}
	
	def addFieldInfo(self, context):
		try:
			from gavo.base import computeConversionFactor, IncompatibleUnits, BadUnit
		except ImportError:  # pragma: no cover
			raise utils.ReportableError("in_unit only available with gavo.base"
				" installed")

		try:
			self.conversionFactor = computeConversionFactor(
				self.expr.fieldInfo.unit, self.unit)
			self.fieldInfo = self.expr.fieldInfo.change(unit=self.unit)
		except IncompatibleUnits as msg:
			raise common.Error("in_unit error: %s"%msg)
		except BadUnit as msg:
			raise common.Error("Bad unit passed to in_unit: %s"%msg)
	
	def flatten(self):
		if self.conversionFactor is None: # pragma: no cover
			raise common.Error("in_unit can only be flattened in annotated"
				" trees")

		if isinstance(self.expr, ColumnReference):
			exprPat = "%s"
		else:
			exprPat = "(%s)"
		return "(%s * %.16g)"%(exprPat%flatten(self.expr), self.conversionFactor)
	
	def change(self, **kwargs):
		copy = FieldInfoedNode.change(self, **kwargs)
		copy.conversionFactor = self.conversionFactor
		return copy


class CharacterStringLiteral(FieldInfoedNode):
	"""according to the current grammar, these are always sequences of
	quoted strings.
	"""
	type = "characterStringLiteral"
	bindings = ["characterStringLiteral", "generalLiteral"]

	_a_value = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		value = "".join(_c[1:-1] for _c in _parseResult) #noflake: locals returned
		return locals()

	def flatten(self):
		return "'%s'"%self.value

	def addFieldInfo(self, context):
		self.fieldInfo = fieldinfo.FieldInfo("text", "", "")


class CastSpecification(FieldInfoedNode, TransparentMixin):
	type = "castSpecification"
	_a_value = None
	_a_newType = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		value = _parseResult["value"]
		newType = _parseResult["newType"].lower()
		if newType.startswith("char ("):
			newType = "text"
		elif newType.startswith("national char"):
			newType = "unicode"
		return locals()

	def addFieldInfo(self, context):
		# We copy units and UCDs from the subordinate value (if it's there;
		# NULLs have nothing, of course). That has the somewhat unfortunate
		# effect that we may be declaring units on strings.  Ah well.
		if hasattr(self.value, "fieldInfo"):
			self.fieldInfo = self.value.fieldInfo.change(
				type=self.newType, tainted=True)
		else:
			self.fieldInfo = fieldinfo.FieldInfo(self.newType, "", "")


###################### Geometry and stuff that needs morphing into real SQL

class CoosysMixin(object):
	"""is a mixin that works cooSys into FieldInfos for ADQL geometries.
	"""
	_a_cooSys = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		refFrame = _parseResult.get("coordSys", "")
		if isinstance(refFrame, ColumnReference): # pragma: no cover
			raise NotImplementedError("References frames must not be column"
				" references.")
		return {"cooSys":  refFrame}


class GeometryNode(CoosysMixin, FieldInfoedNode):
	"""Nodes for geometry constructors.

	In ADQL 2.1, most of these became polymorphous.  For instance, circles
	can be constructed with a point as the first (or second, if a coosys
	is present) argument; that point can also be a column reference.  

	Also, these will always get morphed in some way (as the database
	certainly doesn't understand ADQL geometries).  So, we're
	trying to give the morphers a fair chance of not getting confused
	despite the wild variety of argument forms and types.

	stcArgs is a list of symbolic names that *might* contain stc (or similar)
	information.  Some of the actual attributes will be None.

	Flatten is only there for debugging; it'll return invalid SQL.
	OrigArgs is not for client consumption; clients must go through the 
	symbolic names.
	"""
	_a_origArgs = None

	def flatten(self):
		return "%s%s"%(self.type.upper(),
			"".join(flatten(arg) for arg in self.origArgs))

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {"origArgs": list(_parseResult[1:])}

	def addFieldInfo(self, context):
		fis = [attr.fieldInfo 
			for attr in 
				(getattr(self, arg) for arg in self.stcArgs if getattr(self, arg))
			if attr and attr.fieldInfo]
		childUserData, childUnits = [], []
		thisSystem = tapstc.getSTCForTAP(self.cooSys)

		# get reference frame from first child if not given in node and
		# one is defined there.
		if thisSystem.astroSystem.spaceFrame.refFrame is None:
			if fis and fis[0].stc:
				thisSystem = fis[0].stc

		for index, fi in enumerate(fis):
			childUserData.extend(fi.userData)
			childUnits.append(fi.unit)
			if not context.policy.match(fi.stc, thisSystem):
				context.errors.append("When constructing %s: Argument %d has"
					" incompatible STC"%(self.type, index+1))

		self.fieldInfo = fieldinfo.FieldInfo(
			type=self.sqlType,
			unit=",".join(childUnits), 
			ucd="", 
			userData=tuple(childUserData), 
			stc=thisSystem)
		self.fieldInfo.properties["xtype"] = self.xtype


class Point(GeometryNode):
	type = "point"
	_a_x = _a_y = None
	xtype = "point"
	sqlType = "spoint"

	stcArgs = ("x", "y")

	def flatten(self):
		return "%s(%s)"%(self.type.upper(),
			", ".join(flatten(arg) for arg in [self.x, self.y]))

	@classmethod
	def _getInitKWs(cls, _parseResult):
		x, y = parseArgs(_parseResult["args"]) #noflake: locals returned
		return locals()


class Circle(GeometryNode):
	"""A circle parsed from ADQL.

	There are two ways a circle is specified: either with (x, y, radius)
	or as (center, radius).  In the second case, center is an spoint-valued
	column reference.  Cases with a point-valued literal are turned into
	the first variant during parsing.
	"""
	type = "circle"
	_a_radius = None
	_a_center = None
	stcArgs = ("center", "radius")
	xtype = "circle"
	sqlType = "scircle"

	@classmethod
	def _getInitKWs(cls, _parseResult):
		args = parseArgs(_parseResult["args"])
		res = {a: None for a in cls.stcArgs}
		if len(args)==2:
				res["center"], res["radius"] = args[0], args[1]
		elif len(args)==3:
			res["center"] = Point(cooSys=_parseResult.get("coordSys", ""), 
				x=args[0], y=args[1])
			res["radius"] = args[2]
		else: # pragma: no cover
			assert False, "Grammar let through invalid args to Circle"
		return res


class MOC(GeometryNode):
	"""a MOC in an ADQL syntax tree.

	This can be constructed from an ASCII-MOC string or from an order
	and a geometry value expression.
	"""
	type = "moc"
	_a_literal = None
	_a_order = None
	_a_geometry = None
	stcArgs = ()
	xtype = "moc"
	sqlType = "smoc"

	@classmethod
	def _getInitKWs(cls, _parseResult):
		_args = parseArgs(_parseResult["args"])
		if len(_args)==1:
			literal = _args[0]

		elif len(_args)==2:
			order, geometry = _args[0], _args[1]

		else:
			raise common.Error("MOC() takes either one literal or order, geo")

		return locals()
	
	def flatten(self):
		# there's no point morphing this; when people put this into db
		# engines, they can just as well use the ADQL signature.
		if self.literal is None:
			return "smoc(%s, %s)"%(flatten(self.order), flatten(self.geometry))
		else:
			return "smoc(%s)"%flatten(self.literal)


class Box(GeometryNode):
	type = "box"
	_a_x = _a_y = _a_width = _a_height = None
	stcArgs = ("x", "y", "width", "height")
	xtype = "polygon"
	sqlType = "sbox"

	@classmethod
	def _getInitKWs(cls, _parseResult):
		x, y, width, height = parseArgs( #noflake: locals returned
			_parseResult["args"])
		return locals()


class PolygonCoos(FieldInfoedNode):
	"""a base class for the various argument forms of polygons.

	We want to tell them apart to let the grammar tell the tree builder
	what it thinks the arguments were.  Polygon may have to reconsider
	this when it learns the types of its arguments, but we don't want
	to discard the information coming from the grammar.
	"""
	_a_args = None

	@classmethod
	def _getInitKWs(cls, _parseResult):
		return {"args": parseArgs(_parseResult["args"])}

	def addFieldInfo(self, context):
		# these fieldInfos are never used because Polygon doesn't ask us.
		pass

	def flatten(self):
		return ", ".join(flatten(a) for a in self.args)


class PolygonSplitCooArgs(PolygonCoos):
	type = "polygonSplitCooArgs"
	

class PolygonPointCooArgs(PolygonCoos):
	type = "polygonPointCooArgs"


class Polygon(GeometryNode):
	type = "polygon"
	_a_coos = None
	_a_points = None
	stcArgs = ("coos", "points")
	xtype = "polygon"
	sqlType = "spoly"

	@classmethod
	def _getInitKWs(cls, _parseResult):
		# XXX TODO: The grammar will parse even-numbered arguments >=6 into
		# splitCooArgs.  We can't fix that here as we don't have reliable
		# type information at this point.  Fix coos/points confusion
		# in addFieldInfo, I'd say
		arg = parseArgs(_parseResult["args"])[0]

		if arg.type=="polygonPointCooArgs":
			# geometry-typed arguments
			res = {"points": tuple(parseArgs(arg.args))}

			# See if they're all literal points, which which case we fall
			# back to the split args
			for item in res["points"]:
				if item.type!="point":
					return res
			# all points: mutate args to let us fall through to the split coo
			# case
			arg.type = "polygonSplitCooArgs"
			newArgs = []
			for item in res["points"]:
				newArgs.extend([item.x, item.y])
			arg.args = newArgs
		
		if arg.type=="polygonSplitCooArgs":
			# turn numeric expressions into pairs
			coos, toDo = [], list(arg.args)
			while toDo:
				coos.append(tuple(toDo[:2])) 
				del toDo[:2]
			res = {"coos": coos}

		else: # pragma: no cover
			assert False, "Invalid arguments to polygon"

		return res

	def addFieldInfo(self, name):
		if self.points is not None:
			systemSource = self.points
		elif self.coos is not None:
			systemSource = (c[0] for c in self.coos)
		else: # pragma: no cover
			assert False

		if self.cooSys and self.cooSys!="UNKNOWN":
			thisSystem = tapstc.getSTCForTAP(self.cooSys)

		for geo in systemSource:
			if geo.fieldInfo.stc and geo.fieldInfo.stc.astroSystem.spaceFrame.refFrame:
				thisSystem = geo.fieldInfo.stc
				break
		else:
			thisSystem = tapstc.getSTCForTAP("UNKNOWN")
	
		userData, tainted = collectUserData(
			self.points or [c[0] for c in self.coos]+[c[1] for c in self.coos])
		self.fieldInfo = fieldinfo.FieldInfo(
			type=self.sqlType, unit="deg", ucd="phys.angArea",
			userData=userData, tainted=tainted,
			stc=thisSystem)


_regionMakers = [] 
def registerRegionMaker(fun):
	"""adds a region maker to the region resolution chain.

	region makers are functions taking the argument to REGION and
	trying to do something with it.  They should return either some
	kind of FieldInfoedNode that will then replace the REGION or None,
	in which case the next function will be tried.

	As a convention, region specifiers here should always start with
	an identifier (like simbad, siapBbox, etc, basically [A-Za-z]+).
	The rest is up to the region maker, but whitespace should separate
	this rest from the identifier.

	The entire region functionality will probably disappear with TAP 1.1.
	Don't do anything with it any more.  Use ufuncs instead.
	"""
	_regionMakers.append(fun)


@symbolAction("region")
def makeRegion(children):
	if len(children)!=4 or not isinstance(children[2], CharacterStringLiteral):
		raise common.RegionError("Invalid argument to REGION: '%s'."%
			"".join(flatten(c) for c in children[2:-1]),
			hint="Here, regions must be simple strings; concatenations or"
			" non-constant parts are forbidden.  Use ADQL geometry expressions"
			" instead.")
	arg = children[2].value
	for r in _regionMakers:
		res = r(arg)
		if res is not None:
			return res
	raise common.RegionError("Invalid argument to REGION: '%s'."%
		arg, hint="None of the region parsers known to this service could"
		" make anything of your string.  While STC-S should in general"
		" be comprehendable to TAP services, it's probably better to"
		" use ADQL geometry functions.")


class STCSRegion(FieldInfoedNode):
	bindings = []     # we're constructed by makeSTCSRegion, not by the parser
	type = "stcsRegion"
	xtype = "adql:REGION"

	_a_tapstcObj = None # from tapstc -- STCSRegion or a utils.pgshere object

	def _polish(self):
		self.cooSys = self.tapstcObj.cooSys

	def addFieldInfo(self, context):
		# XXX TODO: take type and unit from tapstcObj
		self.fieldInfo = fieldinfo.FieldInfo("spoly", unit="deg", ucd=None, 
			stc=tapstc.getSTCForTAP(self.cooSys))
	
	def flatten(self): # pragma: no cover
		raise common.FlattenError("STCRegion objectcs cannot be flattened, they"
			" must be morphed.")


def makeSTCSRegion(spec):
	try:
		return STCSRegion(stc.parseSimpleSTCS(spec))
	except stc.STCSParseError:  #Not a valid STC spec, try next region parser
		return None

registerRegionMaker(makeSTCSRegion)


class Centroid(FunctionNode):
	type = "centroid"

	def addFieldInfo(self, context):
		self.fieldInfo = fieldinfo.FieldInfo(type="spoint",
			unit="", ucd="",
			userData=collectUserData(self._getInfoChildren())[0])


class Distance(FunctionNode):
	type = "distanceFunction"

	def addFieldInfo(self, context):
		self.fieldInfo = fieldinfo.FieldInfo(type="double precision",
			unit="deg", ucd="pos.angDistance", 
			userData=collectUserData(self._getInfoChildren())[0])

	def optimize(self, stack):
		assert len(self.args)==2, "unexpected arguments in distance"
		self.args = list(self.args)
		self.args[0], self.args[1] = _sortLargeFirst(self.args[0], self.args[1])

	@classmethod
	def _getInitKWs(cls, _parseResult):
		args = parseArgs(_parseResult["args"])
		if len(args)==4:
			# always normalise to (point, point)
			args = [
				Point(cooSys="", x=args[0], y=args[1]),
				Point(cooSys="", x=args[2], y=args[3])]
		return locals()


class PredicateGeometryFunction(FunctionNode):
	type = "predicateGeometryFunction"

	_pgFieldInfo = fieldinfo.FieldInfo("integer", "", "")

	def optimize(self, stack):
		if len(self.args)!=2:
			assert False, "Grammar let through bad arguments to pgf"

		self.args = list(self.args)

		# by ADQL, an INTERSECTS with a point has to become a CONTAINS
		if self.funName=="INTERSECTS":
			ltype = getattr(self.args[0].fieldInfo, "type", None)
			rtype = getattr(self.args[1].fieldInfo, "type", None)
			if ltype=='spoint':
				self.funName = "CONTAINS"
			elif rtype=='spoint':
				self.funName = "CONTAINS"
				self.args[0], self.args[1] = self.args[1], self.args[0]

		leftInd, rightInd = 0, 1
		# optimise the common case of contains(point, circle); both
		# q3c and pgsphere won't use an index (properly) if the sequence
		# of the arguments is "wrong".
		if (self.args[leftInd].type=="point" 
				and self.args[rightInd].type=="circle"):

			if _isConstant([self.args[leftInd]]):
				self.args[leftInd], self.args[rightInd].center = \
				 self.args[rightInd].center, self.args[leftInd]

			else:
				self.args[leftInd], self.args[rightInd].center = _sortLargeFirst(
					self.args[leftInd], self.args[rightInd].center)
			# in case we swapped, coosys meta might be out of whack, so
			# fix that:
			self.args[rightInd].cooSys = self.args[rightInd].center.cooSys
				

	def addFieldInfo(self, context):
		# swallow all upstream info, it really doesn't help here
		self.fieldInfo = self._pgFieldInfo

	def flatten(self):
		return "%s(%s)"%(self.funName, ", ".join(flatten(a) for a in self.args))


class PointFunction(FunctionNode):
	type = "pointFunction"

	def _makeCoordsysFieldInfo(self):
		return fieldinfo.FieldInfo("text", unit="", ucd="meta.ref;pos.frame")
	
	def _makeCoordFieldInfo(self):
		# this should pull in the metadata from the 1st or 2nd component
		# of the argument.  However, given the way geometries are constructed
		# in ADQL, what comes back here is in degrees in the frame of the
		# child always.  We're a bit pickier with the user data -- if there's
		# exactly two user data fields in the child, we assume the child
		# has been built from individual columns, and we try to retrieve the
		# one pulled out.
		childFieldInfo = self.args[0].fieldInfo
		if len(childFieldInfo.userData)==2:
			userData = (childFieldInfo.userData[int(self.funName[-1])-1],)
		else:
			userData = childFieldInfo.userData
		return fieldinfo.FieldInfo("double precision", 
			ucd=None, unit="deg", userData=userData)

	def addFieldInfo(self, context):
		if self.funName=="COORDSYS":
			makeFieldInfo = self._makeCoordsysFieldInfo
		else: # it's coordN
			makeFieldInfo = self._makeCoordFieldInfo
		self.fieldInfo = makeFieldInfo()


class Area(FunctionNode):
	type = "area"

	def addFieldInfo(self, context):
		self.fieldInfo = fieldinfo.FieldInfo(type="double precision",
			unit="deg**2", ucd="phys.angSize", 
			userData=collectUserData(self._getInfoChildren())[0])
