From: Jakub Skoczen Date: Fri, 24 Jul 2009 16:02:26 +0000 (+0200) Subject: Mavenized. X-Git-Tag: v1.5~26 X-Git-Url: http://jira.indexdata.com/?a=commitdiff_plain;h=d58739419882639439b40b18fcefeb9e51488fb9;p=cql-java-moved-to-github.git Mavenized. I assume none of the makefiles work. So if somebody wants to keep them for nostalgic reasons, he should fixed them. Or removed them for good :) --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d2a2c7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target/ +*.swp +*~ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..fd08049 --- /dev/null +++ b/pom.xml @@ -0,0 +1,32 @@ + + 4.0.0 + org.z3950.zing + cql-java + jar + 1.3 + cql-java + http://maven.apache.org + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.0.2 + + 1.6 + 1.6 + + + + + + + junit + junit + 3.8.1 + test + + + + diff --git a/src/main/java/org/z3950/zing/cql/.cvsignore b/src/main/java/org/z3950/zing/cql/.cvsignore new file mode 100644 index 0000000..6b468b6 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/.cvsignore @@ -0,0 +1 @@ +*.class diff --git a/src/main/java/org/z3950/zing/cql/CQLAndNode.java b/src/main/java/org/z3950/zing/cql/CQLAndNode.java new file mode 100644 index 0000000..dc452db --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLAndNode.java @@ -0,0 +1,29 @@ +// $Id: CQLAndNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Represents an AND node in a CQL parse-tree. + * + * @version $Id: CQLAndNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + */ +public class CQLAndNode extends CQLBooleanNode { + /** + * Creates a new AND node with the specified left- and right-hand + * sides and modifiers. + */ + public CQLAndNode(CQLNode left, CQLNode right, ModifierSet ms) { + super(left, right, ms); + } + + // ### Too much code duplication here with OR and NOT + byte[] opType1() { + byte[] op = new byte[5]; + putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator + putLen(2, op, 2); + putTag(CONTEXT, 0, PRIMITIVE, op, 3); // and + putLen(0, op, 4); + return op; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLBooleanNode.java b/src/main/java/org/z3950/zing/cql/CQLBooleanNode.java new file mode 100644 index 0000000..ec0608c --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLBooleanNode.java @@ -0,0 +1,90 @@ +// $Id: CQLBooleanNode.java,v 1.18 2007-07-03 16:03:00 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Properties; +import java.util.Vector; + + +/** + * Represents a boolean node in a CQL parse-tree. + * + * @version $Id: CQLBooleanNode.java,v 1.18 2007-07-03 16:03:00 mike Exp $ + */ +public abstract class CQLBooleanNode extends CQLNode { + /** + * The root of a parse-tree representing the left-hand side. + */ + public CQLNode left; + + /** + * The root of a parse-tree representing the right-hand side. + */ + public CQLNode right; + + /** + * The set of modifiers that are applied to this boolean. + */ + public ModifierSet ms; + + protected CQLBooleanNode(CQLNode left, CQLNode right, ModifierSet ms) { + this.left = left; + this.right = right; + this.ms = ms; + } + + public String toXCQL(int level, Vector prefixes, + Vector sortkeys) { + return (indent(level) + "\n" + + renderPrefixes(level+1, prefixes) + + ms.toXCQL(level+1, "boolean") + + indent(level+1) + "\n" + + left.toXCQL(level+2) + + indent(level+1) + "\n" + + indent(level+1) + "\n" + + right.toXCQL(level+2) + + indent(level+1) + "\n" + + renderSortKeys(level+1, sortkeys) + + indent(level) + "\n"); + } + + public String toCQL() { + // ### We don't always need parens around the operands + return ("(" + left.toCQL() + ")" + + " " + ms.toCQL() + " " + + "(" + right.toCQL() + ")"); + } + + public String toPQF(Properties config) throws PQFTranslationException { + return ("@" + opPQF() + + " " + left.toPQF(config) + + " " + right.toPQF(config)); + } + + // represents the operation for PQF: overridden for CQLProxNode + String opPQF() { return ms.getBase(); } + + public byte[] toType1BER(Properties config) throws PQFTranslationException { + System.out.println("in CQLBooleanNode.toType1BER(): PQF=" + + toPQF(config)); + byte[] rpn1 = left.toType1BER(config); + byte[] rpn2 = right.toType1BER(config); + byte[] op = opType1(); + byte[] rpnStructure = new byte[rpn1.length+rpn2.length+op.length+4]; + + // rpnRpnOp + int offset = putTag(CONTEXT, 1, CONSTRUCTED, rpnStructure, 0); + + rpnStructure[offset++] = (byte)(0x80&0xff); // indefinite length + System.arraycopy(rpn1, 0, rpnStructure, offset, rpn1.length); + offset += rpn1.length; + System.arraycopy(rpn2, 0, rpnStructure, offset, rpn2.length); + offset += rpn2.length; + System.arraycopy(op, 0, rpnStructure, offset, op.length); + offset += op.length; + rpnStructure[offset++] = 0x00; // end rpnRpnOp + rpnStructure[offset++] = 0x00; + return rpnStructure; + } + + abstract byte[] opType1(); +} diff --git a/src/main/java/org/z3950/zing/cql/CQLGenerator.java b/src/main/java/org/z3950/zing/cql/CQLGenerator.java new file mode 100644 index 0000000..416b771 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLGenerator.java @@ -0,0 +1,317 @@ +// $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Properties; +import java.util.Random; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; + + +/** + * A generator that produces random CQL queries. + *

+ * Why is that useful? Mainly to produce test-cases for CQL parsers + * (including the CQLParser class in this package): you can + * generate a random search tree, render it to XCQL and remember the + * result. Then decompile the tree to CQL, feed the generated CQL to + * the parser of your choice, and check that the XCQL it comes up with + * is the same what you got from your initial rendering. + *

+ * This code is based on the same grammar as the CQLParser class in + * this distribution - there is a generate_x() method + * for each grammar element X. + * + * @version $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $ + * @see http://zing.z3950.org/cql/index.html + */ +public class CQLGenerator { + Properties params; + Random rnd; + static private boolean DEBUG = false; + + /** + * Creates a new CQL generator with the specified parameters. + *

+ * @param params + * A Properties table containing configuration + * parameters for the queries to be generated by this generator. + * Recognised parameters are: + *

+ *

+ *
seed
+ *
+ * If specified, this is a long used to seed the + * random number generator, so that the CQL generator can be + * run repeatably, giving the same results each time. If it's + * omitted, then no seed is explicitly specified, and the + * results of each run will be different (so long as you don't + * run it more that 2^32 times :-) + *

+ *

+ *
complexQuery
+ *
+ * [mandatory] A floating-point number between 0.0 and 1.0, + * indicating the probability for each cql-query node + * that it will be expanded into a ``complex query'' + * (cql-query boolean search-clause) rather + * than a search-clause. + *

+ *

+ *
complexClause
+ *
+ * [mandatory] A floating-point number between 0.0 and 1.0, + * indicating the probability for each search-clause + * node that it will be expanded into a full sub-query rather + * than an [ index relation ] term triplet. + *

+ *

+ *
proxOp
+ *
+ * [mandatory] A floating-point number between 0.0 and 1.0, + * indicating the probability that each boolean operator will + * be chosen to be proximity operation; otherwise, the three + * simpler boolean operations (and, or and + * not) are chosen with equal probability. + *

+ *

+ *
equalsRelation
+ *
+ * [mandatory] A floating-point number between 0.0 and 1.0, + * indicating the probability that each relation will be chosen + * to be = - this is treated as a special case, since + * it's likely to be by far the most common relation in + * ``real life'' searches. + *

+ *

+ *
numericRelation
+ *
+ * [mandatory] A floating-point number between 0.0 and 1.0, + * indicating the probability that a relation, having chosen + * not to be =, is instead chosen to be one of the six + * numeric relations (<, >, + * <=, >=, <> and + * =). + *

+ *

+ *
+ */ + public CQLGenerator(Properties params) { + this.params = params; + String seed = params.getProperty("seed"); + if (seed != null) + rnd = new Random(new Long(seed).longValue()); + else + rnd = new Random(); + } + + private static void debug(String str) { + if (DEBUG) + System.err.println("DEBUG: " + str); + } + + /** + * Generates a single random CQL query. + *

+ * Uses the parameters that were associated with the generator + * when it was created. You are free to create as many random + * queries as you wish from a single generator; each of them will + * use the same parameters. + *

+ * @return + * A CQLNode that is the root of the generated tree. + * That tree may be rendered in XCQL using its toXCQL() + * method, or decompiled into CQL using its toCQL + * method. + */ + public CQLNode generate() throws MissingParameterException { + return generate_cql_query(); + } + + private CQLNode generate_cql_query() throws MissingParameterException { + if (!maybe("complexQuery")) { + return generate_search_clause(); + } + + CQLNode node1 = generate_cql_query(); + CQLNode node2 = generate_search_clause(); + // ### should generate prefix-mapping nodes + if (maybe("proxOp")) { + // ### generate proximity nodes + } else { + switch (rnd.nextInt(3)) { + case 0: return new CQLAndNode(node1, node2, new ModifierSet("and")); + case 1: return new CQLOrNode (node1, node2, new ModifierSet("or")); + case 2: return new CQLNotNode(node1, node2, new ModifierSet("not")); + } + } + + return generate_search_clause(); + } + + private CQLNode generate_search_clause() throws MissingParameterException { + if (maybe("complexClause")) { + return generate_cql_query(); + } + + // ### Should sometimes generate index/relation-free terms + String index = generate_index(); + CQLRelation relation = generate_relation(); + String term = generate_term(); + + return new CQLTermNode(index, relation, term); + } + + // ### Should probably be more configurable + private String generate_index() { + String index = ""; // shut up compiler warning + if (rnd.nextInt(2) == 0) { + switch (rnd.nextInt(3)) { + case 0: index = "dc.author"; break; + case 1: index = "dc.title"; break; + case 2: index = "dc.subject"; break; + } + } else { + switch (rnd.nextInt(4)) { + case 0: index = "bath.author"; break; + case 1: index = "bath.title"; break; + case 2: index = "bath.subject"; break; + case 3: index = "foo>bar"; break; + } + } + + return index; + } + + private CQLRelation generate_relation() throws MissingParameterException { + String base = generate_base_relation(); + CQLRelation rel = new CQLRelation(base); + // ### should generate modifiers too + return rel; + } + + private String generate_base_relation() throws MissingParameterException { + if (maybe("equalsRelation")) { + return "="; + } else if (maybe("numericRelation")) { + return generate_numeric_relation(); + } else { + switch (rnd.nextInt(3)) { + case 0: return "exact"; + case 1: return "all"; + case 2: return "any"; + } + } + + // NOTREACHED + return ""; // shut up compiler warning + } + + // ### could read candidate terms from /usr/dict/words + // ### should introduce wildcard characters + // ### should generate multi-word terms + private String generate_term() { + switch (rnd.nextInt(10)) { + case 0: return "cat"; + case 1: return "\"cat\""; + case 2: return "comp.os.linux"; + case 3: return "xml:element"; + case 4: return ""; + case 5: return "prox/word/>=/5"; + case 6: return ""; + case 7: return "frog fish"; + case 8: return "the complete dinosaur"; + case 9: return "foo*bar"; + } + + // NOTREACHED + return ""; // shut up compiler warning + } + + private String generate_numeric_relation() { + switch (rnd.nextInt(6)) { + case 0: return "<"; + case 1: return ">"; + case 2: return "<="; + case 3: return ">="; + case 4: return "<>"; + case 5: return "="; + } + + // NOTREACHED + return ""; // shut up compiler warning + } + + boolean maybe(String param) throws MissingParameterException { + String probability = params.getProperty(param); + if (probability == null) + throw new MissingParameterException(param); + + double dice = rnd.nextDouble(); + double threshhold = new Double(probability).doubleValue(); + boolean res = dice < threshhold; + debug("dice=" + String.valueOf(dice).substring(0, 8) + + " vs. " + threshhold + "='" + param + "': " + res); + return res; + } + + + /** + * A simple test-harness for the generator. + *

+ * It generates a single random query using the parameters + * specified in a nominated properties file, plus any additional + * name value pairs provided on the command-line, and + * decompiles it into CQL which is written to standard output. + *

+ * For example, + * java org.z3950.zing.cql.CQLGenerator + * etc/generate.properties seed 18398, + * where the file generate.properties contains:

+     *	complexQuery=0.4
+     *	complexClause=0.4
+     *	equalsRelation=0.5
+     *	numericRelation=0.7
+     *	proxOp=0.0
+     * 
+ * yields:
+     *	((dc.author = "<xml.element>") or (bath.title = cat)) and
+     *		(dc.subject >= "the complete dinosaur")
+     * 
+ *

+ * @param configFile + * The name of a properties file from which to read the + * configuration parameters (see above). + * @param name + * The name of a configuration parameter. + * @param value + * The value to assign to the configuration parameter named in + * the immediately preceding command-line argument. + * @return + * A CQL query expressed in a form that should be comprehensible + * to all conformant CQL compilers. + */ + public static void main (String[] args) throws Exception { + if (args.length % 2 != 1) { + System.err.println("Usage: CQLGenerator "+ + "[ ]..."); + System.exit(1); + } + + String configFile = args[0]; + InputStream f = new FileInputStream(configFile); + if (f == null) + throw new FileNotFoundException(configFile); + + Properties params = new Properties(); + params.load(f); + f.close(); + for (int i = 1; i < args.length; i += 2) + params.setProperty(args[i], args[i+1]); + + CQLGenerator generator = new CQLGenerator(params); + CQLNode tree = generator.generate(); + System.out.println(tree.toCQL()); + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLLexer.java b/src/main/java/org/z3950/zing/cql/CQLLexer.java new file mode 100644 index 0000000..5df3822 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLLexer.java @@ -0,0 +1,236 @@ +// $Id: CQLLexer.java,v 1.14 2007-07-03 13:30:42 mike Exp $ + +package org.z3950.zing.cql; +import java.io.StreamTokenizer; +import java.io.StringReader; +import java.util.Hashtable; + + +// This is a semi-trivial subclass for java.io.StreamTokenizer that: +// * Has a halfDecentPushBack() method that actually works +// * Includes a render() method +// * Knows about the multi-character tokens "<=", ">=" and "<>" +// * Recognises a set of keywords as tokens in their own right +// * Includes some primitive debugging-output facilities +// It's used only by CQLParser. +// +class CQLLexer extends StreamTokenizer { + // New publicly visible token-types + static int TT_LE = 1000; // The "<=" relation + static int TT_GE = 1001; // The ">=" relation + static int TT_NE = 1002; // The "<>" relation + static int TT_EQEQ = 1003; // The "==" relation + static int TT_AND = 1004; // The "and" boolean + static int TT_OR = 1005; // The "or" boolean + static int TT_NOT = 1006; // The "not" boolean + static int TT_PROX = 1007; // The "prox" boolean + static int TT_SORTBY = 1008; // The "sortby" operator + + // Support for keywords. It would be nice to compile this linear + // list into a Hashtable, but it's hard to store ints as hash + // values, and next to impossible to use them as hash keys. So + // we'll just scan the (very short) list every time we need to do + // a lookup. + private class Keyword { + int token; + String keyword; + Keyword(int token, String keyword) { + this.token = token; + this.keyword = keyword; + } + } + // This should logically be static, but Java won't allow it :-P + private Keyword[] keywords = { + new Keyword(TT_AND, "and"), + new Keyword(TT_OR, "or"), + new Keyword(TT_NOT, "not"), + new Keyword(TT_PROX, "prox"), + new Keyword(TT_SORTBY, "sortby"), + }; + + // For halfDecentPushBack() and the code at the top of nextToken() + private static int TT_UNDEFINED = -1000; + private int saved_ttype = TT_UNDEFINED; + private double saved_nval; + private String saved_sval; + + // Controls debugging output + private static boolean DEBUG; + + CQLLexer(String cql, boolean lexdebug) { + super(new StringReader(cql)); + wordChars('!', '?'); // ASCII-dependency! + wordChars('[', '`'); // ASCII-dependency! + quoteChar('"'); + ordinaryChar('='); + ordinaryChar('<'); + ordinaryChar('>'); + ordinaryChar('/'); + ordinaryChar('('); + ordinaryChar(')'); + wordChars('\'', '\''); // prevent this from introducing strings + parseNumbers(); + DEBUG = lexdebug; + } + + private static void debug(String str) { + if (DEBUG) + System.err.println("LEXDEBUG: " + str); + } + + // I don't honestly understand why we need this, but the + // documentation for java.io.StreamTokenizer.pushBack() is pretty + // vague about its semantics, and it seems to me that they could + // be summed up as "it doesn't work". This version has the very + // clear semantics "pretend I didn't call nextToken() just then". + // + private void halfDecentPushBack() { + saved_ttype = ttype; + saved_nval = nval; + saved_sval = sval; + } + + public int nextToken() throws java.io.IOException { + if (saved_ttype != TT_UNDEFINED) { + ttype = saved_ttype; + nval = saved_nval; + sval = saved_sval; + saved_ttype = TT_UNDEFINED; + debug("using saved ttype=" + ttype + ", " + + "nval=" + nval + ", sval='" + sval + "'"); + return ttype; + } + + underlyingNextToken(); + if (ttype == '<') { + debug("token starts with '<' ..."); + underlyingNextToken(); + if (ttype == '=') { + debug("token continues with '=' - it's '<='"); + ttype = TT_LE; + } else if (ttype == '>') { + debug("token continues with '>' - it's '<>'"); + ttype = TT_NE; + } else { + debug("next token is " + render() + " (pushed back)"); + halfDecentPushBack(); + ttype = '<'; + debug("AFTER: ttype is now " + ttype + " - " + render()); + } + } else if (ttype == '>') { + debug("token starts with '>' ..."); + underlyingNextToken(); + if (ttype == '=') { + debug("token continues with '=' - it's '>='"); + ttype = TT_GE; + } else { + debug("next token is " + render() + " (pushed back)"); + halfDecentPushBack(); + ttype = '>'; + debug("AFTER: ttype is now " + ttype + " - " + render()); + } + } else if (ttype == '=') { + debug("token starts with '=' ..."); + underlyingNextToken(); + if (ttype == '=') { + debug("token continues with '=' - it's '=='"); + ttype = TT_EQEQ; + } else { + debug("next token is " + render() + " (pushed back)"); + halfDecentPushBack(); + ttype = '='; + debug("AFTER: ttype is now " + ttype + " - " + render()); + } + } + + debug("done nextToken(): ttype=" + ttype + ", " + + "nval=" + nval + ", " + "sval='" + sval + "'" + + " (" + render() + ")"); + + return ttype; + } + + // It's important to do keyword recognition here at the lowest + // level, otherwise when one of these words follows "<" or ">" + // (which can be the beginning of multi-character tokens) it gets + // pushed back as a string, and its keywordiness is not + // recognised. + // + public int underlyingNextToken() throws java.io.IOException { + super.nextToken(); + if (ttype == TT_WORD) + for (int i = 0; i < keywords.length; i++) + if (sval.equalsIgnoreCase(keywords[i].keyword)) + ttype = keywords[i].token; + + return ttype; + } + + // Simpler interface for the usual case: current token with quoting + String render() { + return render(ttype, true); + } + + String render(int token, boolean quoteChars) { + if (token == TT_EOF) { + return "EOF"; + } else if (token == TT_NUMBER) { + if ((double) nval == (int) nval) { + return new Integer((int) nval).toString(); + } else { + return new Double((double) nval).toString(); + } + } else if (token == TT_WORD) { + return "word: " + sval; + } else if (token == '"') { + return "string: \"" + sval + "\""; + } else if (token == TT_LE) { + return "<="; + } else if (token == TT_GE) { + return ">="; + } else if (token == TT_NE) { + return "<>"; + } else if (token == TT_EQEQ) { + return "=="; + } + + // Check whether its associated with one of the keywords + for (int i = 0; i < keywords.length; i++) + if (token == keywords[i].token) + return keywords[i].keyword; + + // Otherwise it must be a single character, such as '(' or '/'. + String res = String.valueOf((char) token); + if (quoteChars) res = "'" + res + "'"; + return res; + } + + public static void main(String[] args) throws Exception { + if (args.length > 1) { + System.err.println("Usage: CQLLexer []"); + System.err.println("If unspecified, query is read from stdin"); + System.exit(1); + } + + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); + } + + CQLLexer lexer = new CQLLexer(cql, true); + int token; + while ((token = lexer.nextToken()) != TT_EOF) { + // Nothing to do: debug() statements render tokens for us + } + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLNode.java b/src/main/java/org/z3950/zing/cql/CQLNode.java new file mode 100644 index 0000000..3e42e1e --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLNode.java @@ -0,0 +1,354 @@ +// $Id: CQLNode.java,v 1.26 2007-07-03 13:36:03 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Properties; +import java.util.Vector; + + +/** + * Represents a node in a CQL parse-tree. + * + * @version $Id: CQLNode.java,v 1.26 2007-07-03 13:36:03 mike Exp $ + */ +public abstract class CQLNode { + CQLNode() {} // prevent javadoc from documenting this + + /** + * Returns the name of the result-set to which this query is a + * reference, if and only if the entire query consists only of a + * result-set reference. If it's anything else, including a + * boolean combination of a result-set reference with something + * else, then null is returned instead. + * @return the name of the referenced result-set + */ + public String getResultSetName() { + return null; + } + + /** + * Translates a parse-tree into an XCQL document. + *

+ * @param level + * The number of levels to indent the top element of the XCQL + * document. This will typically be 0 when invoked by an + * application; it takes higher values when this method is + * invoked recursively for nodes further down the tree. + * @return + * A String containing an XCQL document equivalent to the + * parse-tree whose root is this node. + */ + public String toXCQL(int level) { + return toXCQL(level, null); + } + + public String toXCQL(int level, Vector prefixes) { + return toXCQL(level, prefixes, null); + } + + abstract public String toXCQL(int level, Vector prefixes, + Vector sortkeys); + + protected static String renderPrefixes(int level, Vector prefixes) { + if (prefixes == null || prefixes.size() == 0) + return ""; + String res = indent(level) + "\n"; + for (int i = 0; i < prefixes.size(); i++) { + CQLPrefix p = (CQLPrefix) prefixes.get(i); + res += indent(level+1) + "\n"; + if (p.name != null) + res += indent(level+2) + "" + p.name + "\n"; + res += indent(level+2) + + "" + p.identifier + "\n"; + res += indent(level+1) + "\n"; + } + return res + indent(level) + "\n"; + } + + protected static String renderSortKeys(int level, + Vector sortkeys) { + if (sortkeys == null || sortkeys.size() == 0) + return ""; + String res = indent(level) + "\n"; + for (int i = 0; i < sortkeys.size(); i++) { + ModifierSet key = sortkeys.get(i); + res += key.sortKeyToXCQL(level+1); + } + return res + indent(level) + "\n"; + } + + /** + * Decompiles a parse-tree into a CQL query. + *

+ * @return + * A String containing a CQL query equivalent to the parse-tree + * whose root is this node, so that compiling that query will + * yield an identical tree. + */ + abstract public String toCQL(); + + /** + * Renders a parse-tree into a Yaz-style PQF string. + * PQF, or Prefix Query Format, is a cryptic but powerful notation + * that can be trivially mapped, one-to-one, int Z39.50 Type-1 and + * Type-101 queries. A specification for the format can be found + * in + * Chapter 7 (Supporting Tools) of the + * YAZ manual. + *

+ * @param config + * A Properties object containing configuration + * information that specifies the mapping from CQL indexes, + * relations, etc. to Type-1 attributes. The mapping + * specification is described in the CQL-Java distribution's + * sample PQF-mapping configuration file, + * etc/pqf.properties, which see. + * @return + * A String containing a PQF query equivalent to the parse-tree + * whose root is this node. + */ + abstract public String toPQF(Properties config) + throws PQFTranslationException; + + /** + * Returns a String of spaces for indenting to the specified level. + */ + protected static String indent(int level) { return Utils.indent(level); } + + /** + * Returns the argument String quoted for XML. + * For example, each occurrence of < is translated to + * &lt;. + */ + protected static String xq(String str) { return Utils.xq(str); } + + /** + * Renders a parser-tree into a BER-endoded packet representing an + * equivalent Z39.50 Type-1 query. If you don't know what that + * means, then you don't need this method :-) This is useful + * primarily for SRW-to-Z39.50 gateways. + * + * @param config + * A Properties object containing configuration + * information that specifies the mapping from CQL indexes, + * relations, etc. to Type-1 attributes. The mapping + * specification is described in the CQL-Java distribution's + * sample PQF-mapping configuration file, + * etc/pqf.properties, which see. + * @return + * A byte array containing the BER packet. + * @see + * ftp://ftp.rsasecurity.com/pub/pkcs/ascii/layman.asc + */ + abstract public byte[] toType1BER(Properties config) + throws PQFTranslationException; + + // ANS.1 classes + protected static final int UNIVERSAL = 0; + protected static final int APPLICATION = 1; + protected static final int CONTEXT = 2; + protected static final int PRIVATE = 3; + + // ASN.1 tag forms + protected static final int PRIMITIVE = 0; + protected static final int CONSTRUCTED = 1; + + // ASN.1 UNIVERSAL data types + public static final byte BOOLEAN = 1; + public static final byte INTEGER = 2; + public static final byte BITSTRING = 3; + public static final byte OCTETSTRING = 4; + public static final byte NULL = 5; + public static final byte OBJECTIDENTIFIER = 6; + public static final byte OBJECTDESCRIPTOR = 7; + public static final byte EXTERNAL = 8; + public static final byte ENUMERATED = 10; + public static final byte SEQUENCE = 16; + public static final byte SET = 17; + public static final byte VISIBLESTRING = 26; + public static final byte GENERALSTRING = 27; + + protected static final int putTag(int asn1class, int fldid, int form, + byte[] record, int offset) { + if (fldid < 31) + record[offset++] = (byte)(fldid + asn1class*64 + form*32); + else { + record[offset++] = (byte)(31 + asn1class*64 + form*32); + if (fldid < 128) + record[offset++] = (byte)(fldid); + else { + record[offset++] = (byte)(128 + fldid/128); + record[offset++] = (byte)(fldid % 128); + } + } + return offset; + } + + /** + * Put a length directly into a BER record. + * + * @param len length to put into record + * @return the new, incremented value of the offset parameter. + */ + public // ### shouldn't this be protected? + static final int putLen(int len, byte[] record, int offset) { + + if (len < 128) + record[offset++] = (byte)len; + else { + int t; + record[offset] = (byte)(lenLen(len) - 1); + for (t = record[offset]; t > 0; t--) { + record[offset+t] = (byte)(len & 0xff); + len >>= 8; + } + t = offset; + offset += (record[offset]&0xff) + 1; + record[t] += 128; // turn on bit 8 in length byte. + } + return offset; + } + + /** + * Get the length needed to represent the given length. + * + * @param length determine length needed to encode this + * @return length needed to encode given length + */ + protected // ### shouldn't this be private? + static final int lenLen(int length) { + + return ((length < 128) ? 1 : + (length < 256) ? 2 : + (length < 65536L) ? 3 : 4); + } + + /** + * Get the length needed to represent the given number. + * + * @param num determine length needed to encode this + * @return length needed to encode given number + */ + protected static final int numLen(long num) { + num = num < 0 ? -num : num; + // ### Wouldn't this be better done algorithmically? + // Or at least with the constants expressed in hex? + return ((num < 128) ? 1 : + (num < 32768) ? 2 : + (num < 8388608) ? 3 : + (num < 2147483648L) ? 4 : + (num < 549755813888L) ? 5 : + (num < 140737488355328L) ? 6 : + (num < 36028797018963968L) ? 7 : 8); + } + + /** + * Put a number into a given buffer + * + * @param num number to put into buffer + * @param record buffer to use + * @param offset offset into buffer + * @return the new, incremented value of the offset parameter. + */ + protected static final int putNum(long num, byte record[], int offset) { + int cnt=numLen(num); + + for (int count = cnt - 1; count >= 0; count--) { + record[offset+count] = (byte)(num & 0xff); + num >>= 8; + } + return offset+cnt; + } + + // Used only by the makeOID() method + private static final java.util.Hashtable madeOIDs = + new java.util.Hashtable(10); + + protected static final byte[] makeOID(String oid) { + byte[] o; + int dot, offset = 0, oidOffset = 0, value; + + if ((o = (byte[])madeOIDs.get(oid)) == null) { + o = new byte[100]; + + // Isn't this kind of thing excruciating in Java? + while (oidOffset < oid.length() && + Character.isDigit(oid.charAt(oidOffset)) == true) { + if (offset > 90) // too large + return null; + + dot = oid.indexOf('.', oidOffset); + if (dot == -1) + dot = oid.length(); + + value = Integer.parseInt(oid.substring(oidOffset, dot)); + + if (offset == 0) { // 1st two are special + if (dot == -1) // ### can't happen: -1 is reassigned above + return null; // can't be this short + oidOffset = dot+1; // skip past '.' + + dot = oid.indexOf('.', oidOffset); + if (dot == -1) + dot = oid.length(); + + // ### Eh?! + value = value * 40 + + Integer.parseInt(oid.substring(oidOffset,dot)); + } + + if (value < 0x80) { + o[offset++] = (byte)value; + } else { + int count = 0; + byte bits[] = new byte[12]; // save a 84 (12*7) bit number + + while (value != 0) { + bits[count++] = (byte)(value & 0x7f); + value >>= 7; + } + + // Now place in the correct order + while (--count > 0) + o[offset++] = (byte)(bits[count] | 0x80); + + o[offset++] = bits[count]; + } + + dot = oid.indexOf('.', oidOffset); + if (dot == -1) + break; + + oidOffset = dot+1; + } + + byte[] ptr = new byte[offset]; + System.arraycopy(o, 0, ptr, 0, offset); + madeOIDs.put(oid, ptr); + return ptr; + } + return o; + } + + public static final byte[] makeQuery(CQLNode root, Properties properties) + throws PQFTranslationException { + byte[] rpnStructure = root.toType1BER(properties); + byte[] qry = new byte[rpnStructure.length+100]; + int offset = 0; + offset = putTag(CONTEXT, 1, CONSTRUCTED, qry, offset); + qry[offset++] = (byte)(0x80&0xff); // indefinite length + offset = putTag(UNIVERSAL, OBJECTIDENTIFIER, PRIMITIVE, qry, offset); + byte[] oid = makeOID("1.2.840.10003.3.1"); // bib-1 + offset = putLen(oid.length, qry, offset); + System.arraycopy(oid, 0, qry, offset, oid.length); + offset += oid.length; + System.arraycopy(rpnStructure, 0, qry, offset, rpnStructure.length); + offset += rpnStructure.length; + qry[offset++] = 0x00; // end of query + qry[offset++] = 0x00; + byte[] q = new byte[offset]; + System.arraycopy(qry, 0, q, 0, offset); + return q; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLNotNode.java b/src/main/java/org/z3950/zing/cql/CQLNotNode.java new file mode 100644 index 0000000..7b56de3 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLNotNode.java @@ -0,0 +1,28 @@ +// $Id: CQLNotNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Represents a NOT node in a CQL parse-tree. + * + * @version $Id: CQLNotNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + */ +public class CQLNotNode extends CQLBooleanNode { + /** + * Creates a new NOT node with the specified left- and right-hand + * sides and modifiers. + */ + public CQLNotNode(CQLNode left, CQLNode right, ModifierSet ms) { + super(left, right, ms); + } + + byte[] opType1() { + byte[] op = new byte[5]; + putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator + putLen(2, op, 2); + putTag(CONTEXT, 2, PRIMITIVE, op, 3); // and-not + putLen(0, op, 4); + return op; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLOrNode.java b/src/main/java/org/z3950/zing/cql/CQLOrNode.java new file mode 100644 index 0000000..9e7d66e --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLOrNode.java @@ -0,0 +1,28 @@ +// $Id: CQLOrNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Represents an OR node in a CQL parse-tree. + * + * @version $Id: CQLOrNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ + */ +public class CQLOrNode extends CQLBooleanNode { + /** + * Creates a new OR node with the specified left- and right-hand + * sides and modifiers. + */ + public CQLOrNode(CQLNode left, CQLNode right, ModifierSet ms) { + super(left, right, ms); + } + + byte[] opType1() { + byte[] op = new byte[5]; + putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator + putLen(2, op, 2); + putTag(CONTEXT, 1, PRIMITIVE, op, 3); // or + putLen(0, op, 4); + return op; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLParseException.java b/src/main/java/org/z3950/zing/cql/CQLParseException.java new file mode 100644 index 0000000..845dfff --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLParseException.java @@ -0,0 +1,23 @@ +// $Id: CQLParseException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that an error ocurred parsing CQL. + * + * @version $Id: CQLParseException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + */ +public class CQLParseException extends Exception { + /** + * Creates a new CQLParseException. + * @param s + * An error message describing the problem with the query, + * usually a syntax error of some kind. + */ + public CQLParseException(String s) { + super(s); + } +} + diff --git a/src/main/java/org/z3950/zing/cql/CQLParser.java b/src/main/java/org/z3950/zing/cql/CQLParser.java new file mode 100644 index 0000000..58e0326 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLParser.java @@ -0,0 +1,447 @@ +// $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ + +package org.z3950.zing.cql; +import java.io.IOException; +import java.util.Vector; +import java.util.Properties; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; + + +/** + * Compiles CQL strings into parse trees of CQLNode subtypes. + * + * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ + * @see http://zing.z3950.org/cql/index.html + */ +public class CQLParser { + private CQLLexer lexer; + private int compat; // When false, implement CQL 1.2 + public static int V1POINT1 = 12368; + public static int V1POINT2 = 12369; + public static int V1POINT1SORT = 12370; + + static private boolean DEBUG = false; + static private boolean LEXDEBUG = false; + + /** + * The new parser implements a dialect of CQL specified by the + * compat argument: + *

    + *
  • V1POINT1 - CQL version 1.1 + *
  • + *
  • V1POINT2 - CQL version 1.2 + *
  • + *
  • V1POINT1SORT - CQL version 1.1 but including + * sortby as specified for CQL 1.2. + *
  • + *
+ */ + public CQLParser(int compat) { + this.compat = compat; + } + + /** + * The new parser implements CQL 1.2 + */ + public CQLParser() { + this.compat = V1POINT2; + } + + private static void debug(String str) { + if (DEBUG) + System.err.println("PARSEDEBUG: " + str); + } + + /** + * Compiles a CQL query. + *

+ * The resulting parse tree may be further processed by hand (see + * the individual node-types' documentation for details on the + * data structure) or, more often, simply rendered out in the + * desired form using one of the back-ends. toCQL() + * returns a decompiled CQL query equivalent to the one that was + * compiled in the first place; toXCQL() returns an + * XML snippet representing the query; and toPQF() + * returns the query rendered in Index Data's Prefix Query + * Format. + * + * @param cql The query + * @return A CQLNode object which is the root of a parse + * tree representing the query. */ + public CQLNode parse(String cql) + throws CQLParseException, IOException { + lexer = new CQLLexer(cql, LEXDEBUG); + + lexer.nextToken(); + debug("about to parseQuery()"); + CQLNode root = parseTopLevelPrefixes("cql.serverChoice", + new CQLRelation(compat == V1POINT2 ? "=" : "scr")); + if (lexer.ttype != lexer.TT_EOF) + throw new CQLParseException("junk after end: " + lexer.render()); + + return root; + } + + private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation) + throws CQLParseException, IOException { + debug("top-level prefix mapping"); + + if (lexer.ttype == '>') { + return parsePrefix(index, relation, true); + } + + CQLNode node = parseQuery(index, relation); + if ((compat == V1POINT2 || compat == V1POINT1SORT) && + lexer.ttype == lexer.TT_SORTBY) { + match(lexer.ttype); + debug("sortspec"); + + CQLSortNode sortnode = new CQLSortNode(node); + while (lexer.ttype != lexer.TT_EOF) { + String sortindex = matchSymbol("sort index"); + ModifierSet ms = gatherModifiers(sortindex); + sortnode.addSortIndex(ms); + } + + if (sortnode.keys.size() == 0) { + throw new CQLParseException("no sort keys"); + } + + node = sortnode; + } + + return node; + } + + private CQLNode parseQuery(String index, CQLRelation relation) + throws CQLParseException, IOException { + debug("in parseQuery()"); + + CQLNode term = parseTerm(index, relation); + while (lexer.ttype != lexer.TT_EOF && + lexer.ttype != ')' && + lexer.ttype != lexer.TT_SORTBY) { + if (lexer.ttype == lexer.TT_AND || + lexer.ttype == lexer.TT_OR || + lexer.ttype == lexer.TT_NOT || + lexer.ttype == lexer.TT_PROX) { + int type = lexer.ttype; + String val = lexer.sval; + match(type); + ModifierSet ms = gatherModifiers(val); + CQLNode term2 = parseTerm(index, relation); + term = ((type == lexer.TT_AND) ? new CQLAndNode(term, term2, ms) : + (type == lexer.TT_OR) ? new CQLOrNode (term, term2, ms) : + (type == lexer.TT_NOT) ? new CQLNotNode(term, term2, ms) : + new CQLProxNode(term, term2, ms)); + } else { + throw new CQLParseException("expected boolean, got " + + lexer.render()); + } + } + + debug("no more ops"); + return term; + } + + private ModifierSet gatherModifiers(String base) + throws CQLParseException, IOException { + debug("in gatherModifiers()"); + + ModifierSet ms = new ModifierSet(base); + while (lexer.ttype == '/') { + match('/'); + if (lexer.ttype != lexer.TT_WORD) + throw new CQLParseException("expected modifier, " + + "got " + lexer.render()); + String type = lexer.sval.toLowerCase(); + match(lexer.ttype); + if (!isRelation()) { + // It's a simple modifier consisting of type only + ms.addModifier(type); + } else { + // It's a complex modifier of the form type=value + String comparision = lexer.render(lexer.ttype, false); + match(lexer.ttype); + String value = matchSymbol("modifier value"); + ms.addModifier(type, comparision, value); + } + } + + return ms; + } + + private CQLNode parseTerm(String index, CQLRelation relation) + throws CQLParseException, IOException { + debug("in parseTerm()"); + + String word; + while (true) { + if (lexer.ttype == '(') { + debug("parenthesised term"); + match('('); + CQLNode expr = parseQuery(index, relation); + match(')'); + return expr; + } else if (lexer.ttype == '>') { + return parsePrefix(index, relation, false); + } + + debug("non-parenthesised term"); + word = matchSymbol("index or term"); + if (!isRelation() && lexer.ttype != lexer.TT_WORD) + break; + + index = word; + String relstr = (lexer.ttype == lexer.TT_WORD ? + lexer.sval : lexer.render(lexer.ttype, false)); + relation = new CQLRelation(relstr); + match(lexer.ttype); + ModifierSet ms = gatherModifiers(relstr); + relation.setModifiers(ms); + debug("index='" + index + ", " + + "relation='" + relation.toCQL() + "'"); + } + + CQLTermNode node = new CQLTermNode(index, relation, word); + debug("made term node " + node.toCQL()); + return node; + } + + private CQLNode parsePrefix(String index, CQLRelation relation, + boolean topLevel) + throws CQLParseException, IOException { + debug("prefix mapping"); + + match('>'); + String name = null; + String identifier = matchSymbol("prefix-name"); + if (lexer.ttype == '=') { + match('='); + name = identifier; + identifier = matchSymbol("prefix-identifer"); + } + CQLNode node = topLevel ? + parseTopLevelPrefixes(index, relation) : + parseQuery(index, relation); + + return new CQLPrefixNode(name, identifier, node); + } + + // Checks for a relation + private boolean isRelation() { + debug("isRelation: checking ttype=" + lexer.ttype + + " (" + lexer.render() + ")"); + return (lexer.ttype == '<' || + lexer.ttype == '>' || + lexer.ttype == '=' || + lexer.ttype == lexer.TT_LE || + lexer.ttype == lexer.TT_GE || + lexer.ttype == lexer.TT_NE || + lexer.ttype == lexer.TT_EQEQ); + } + + private void match(int token) + throws CQLParseException, IOException { + debug("in match(" + lexer.render(token, true) + ")"); + if (lexer.ttype != token) + throw new CQLParseException("expected " + + lexer.render(token, true) + + ", " + "got " + lexer.render()); + int tmp = lexer.nextToken(); + debug("match() got token=" + lexer.ttype + ", " + + "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" + + " (tmp=" + tmp + ")"); + } + + private String matchSymbol(String expected) + throws CQLParseException, IOException { + + debug("in matchSymbol()"); + if (lexer.ttype == lexer.TT_WORD || + lexer.ttype == lexer.TT_NUMBER || + lexer.ttype == '"' || + // The following is a complete list of keywords. Because + // they're listed here, they can be used unquoted as + // indexes, terms, prefix names and prefix identifiers. + // ### Instead, we should ask the lexer whether what we + // have is a keyword, and let the knowledge reside there. + lexer.ttype == lexer.TT_AND || + lexer.ttype == lexer.TT_OR || + lexer.ttype == lexer.TT_NOT || + lexer.ttype == lexer.TT_PROX || + lexer.ttype == lexer.TT_SORTBY) { + String symbol = (lexer.ttype == lexer.TT_NUMBER) ? + lexer.render() : lexer.sval; + match(lexer.ttype); + return symbol; + } + + throw new CQLParseException("expected " + expected + ", " + + "got " + lexer.render()); + } + + + /** + * Simple test-harness for the CQLParser class. + *

+ * Reads a CQL query either from its command-line argument, if + * there is one, or standard input otherwise. So these two + * invocations are equivalent: + *

+     *  CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
+     *  echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
+     * 
+ * The test-harness parses the supplied query and renders is as + * XCQL, so that both of the invocations above produce the + * following output: + *
+     *	<triple>
+     *	  <boolean>
+     *	    <value>and</value>
+     *	  </boolean>
+     *	  <triple>
+     *	    <boolean>
+     *	      <value>or</value>
+     *	    </boolean>
+     *	    <searchClause>
+     *	      <index>au</index>
+     *	      <relation>
+     *	        <value>=</value>
+     *	      </relation>
+     *	      <term>Kerninghan</term>
+     *	    </searchClause>
+     *	    <searchClause>
+     *	      <index>au</index>
+     *	      <relation>
+     *	        <value>=</value>
+     *	      </relation>
+     *	      <term>Ritchie</term>
+     *	    </searchClause>
+     *	  </triple>
+     *	  <searchClause>
+     *	    <index>ti</index>
+     *	    <relation>
+     *	      <value>=</value>
+     *	    </relation>
+     *	    <term>Unix</term>
+     *	  </searchClause>
+     *	</triple>
+     * 
+ *

+ * @param -1 + * CQL version 1.1 (default version 1.2) + * @param -d + * Debug mode: extra output written to stderr. + * @param -c + * Causes the output to be written in CQL rather than XCQL - that + * is, a query equivalent to that which was input, is output. In + * effect, the test harness acts as a query canonicaliser. + * @return + * The input query, either as XCQL [default] or CQL [if the + * -c option is supplied]. + */ + public static void main (String[] args) { + char mode = 'x'; // x=XCQL, c=CQL, p=PQF + String pfile = null; + + Vector argv = new Vector(); + for (int i = 0; i < args.length; i++) { + argv.add(args[i]); + } + + int compat = V1POINT2; + if (argv.size() > 0 && argv.get(0).equals("-1")) { + compat = V1POINT1; + argv.remove(0); + } + + if (argv.size() > 0 && argv.get(0).equals("-d")) { + DEBUG = true; + argv.remove(0); + } + + if (argv.size() > 0 && argv.get(0).equals("-c")) { + mode = 'c'; + argv.remove(0); + } else if (argv.size() > 1 && argv.get(0).equals("-p")) { + mode = 'p'; + argv.remove(0); + pfile = (String) argv.get(0); + argv.remove(0); + } + + if (argv.size() > 1) { + System.err.println("Usage: CQLParser [-1] [-d] [-c] " + + "[-p []"); + System.err.println("If unspecified, query is read from stdin"); + System.exit(1); + } + + String cql; + if (argv.size() == 1) { + cql = (String) argv.get(0); + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); + } + + CQLParser parser = new CQLParser(compat); + CQLNode root = null; + try { + root = parser.parse(cql); + } catch (CQLParseException ex) { + System.err.println("Syntax error: " + ex.getMessage()); + System.exit(3); + } catch (IOException ex) { + System.err.println("Can't compile query: " + ex.getMessage()); + System.exit(4); + } + + try { + if (mode == 'c') { + System.out.println(root.toCQL()); + } else if (mode == 'p') { + InputStream f = new FileInputStream(pfile); + if (f == null) + throw new FileNotFoundException(pfile); + + Properties config = new Properties(); + config.load(f); + f.close(); + System.out.println(root.toPQF(config)); + } else { + System.out.print(root.toXCQL(0)); + } + } catch (IOException ex) { + System.err.println("Can't render query: " + ex.getMessage()); + System.exit(5); + } catch (UnknownIndexException ex) { + System.err.println("Unknown index: " + ex.getMessage()); + System.exit(6); + } catch (UnknownRelationException ex) { + System.err.println("Unknown relation: " + ex.getMessage()); + System.exit(7); + } catch (UnknownRelationModifierException ex) { + System.err.println("Unknown relation modifier: " + + ex.getMessage()); + System.exit(8); + } catch (UnknownPositionException ex) { + System.err.println("Unknown position: " + ex.getMessage()); + System.exit(9); + } catch (PQFTranslationException ex) { + // We catch all of this class's subclasses, so -- + throw new Error("can't get a PQFTranslationException"); + } + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLPrefix.java b/src/main/java/org/z3950/zing/cql/CQLPrefix.java new file mode 100644 index 0000000..af7c906 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLPrefix.java @@ -0,0 +1,34 @@ +// $Id: CQLPrefix.java,v 1.5 2007-06-27 22:39:55 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.String; + +/** + * Represents a CQL prefix mapping from short name to long identifier. + * + * @version $Id: CQLPrefix.java,v 1.5 2007-06-27 22:39:55 mike Exp $ + */ +public class CQLPrefix { + /** + * The short name of the prefix mapping. That is, the prefix + * itself, such as dc, as it might be used in an index + * like dc.title. + */ + public String name; + + /** + * The full identifier name of the prefix mapping. That is, + * typically, a URI permanently allocated to a specific index + * set, such as http://zthes.z3950.org/cql/1.0. + */ + public String identifier; + + /** + * Creates a new CQLPrefix mapping, which maps the specified name + * to the specified identifier. + */ + CQLPrefix(String name, String identifier) { + this.name = name; + this.identifier = identifier; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLPrefixNode.java b/src/main/java/org/z3950/zing/cql/CQLPrefixNode.java new file mode 100644 index 0000000..dd01d85 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLPrefixNode.java @@ -0,0 +1,68 @@ +// $Id: CQLPrefixNode.java,v 1.10 2007-07-03 16:40:11 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.String; +import java.util.Properties; +import java.util.Vector; + + +/** + * Represents a prefix node in a CQL parse-tree. + * + * @version $Id: CQLPrefixNode.java,v 1.10 2007-07-03 16:40:11 mike Exp $ + */ +public class CQLPrefixNode extends CQLNode { + /** + * The prefix definition that governs the subtree. + */ + public CQLPrefix prefix; + + /** + * The root of a parse-tree representing the part of the query + * that is governed by this prefix definition. + */ + public CQLNode subtree; + + /** + * Creates a new CQLPrefixNode inducing a mapping from the + * specified index-set name to the specified identifier across + * the specified subtree. + */ + public CQLPrefixNode(String name, String identifier, CQLNode subtree) { + this.prefix = new CQLPrefix(name, identifier); + this.subtree = subtree; + } + + public String toXCQL(int level, Vector prefixes, + Vector sortkeys) { + Vector tmp = (prefixes == null ? + new Vector() : + new Vector(prefixes)); + tmp.add(prefix); + return subtree.toXCQL(level, tmp, sortkeys); + } + + public String toCQL() { + // ### We don't always need parens around the subtree + if (prefix.name == null) { + return ">\"" + prefix.identifier + "\" " + + "(" + subtree.toCQL() + ")"; + } else { + return ">" + prefix.name + "=\"" + prefix.identifier + "\" " + + "(" + subtree.toCQL() + ")"; + } + } + + public String toPQF(Properties config) throws PQFTranslationException { + // Prefixes and their identifiers don't actually play any role + // in PQF translation, since the meanings of the indexes, + // including their prefixes if any, are instead wired into + // `config'. + return subtree.toPQF(config); + } + + public byte[] toType1BER(Properties config) throws PQFTranslationException { + // See comment on toPQF() + return subtree.toType1BER(config); + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLProxNode.java b/src/main/java/org/z3950/zing/cql/CQLProxNode.java new file mode 100644 index 0000000..2ce355f --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLProxNode.java @@ -0,0 +1,127 @@ +// $Id: CQLProxNode.java,v 1.14 2007-06-29 12:53:03 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Represents a proximity node in a CQL parse-tree. + * The left- and right-hand-sides must be satisfied by parts of the + * candidate records which are sufficiently close to each other, as + * specified by a set of proximity parameters. + * + * @version $Id: CQLProxNode.java,v 1.14 2007-06-29 12:53:03 mike Exp $ + */ +public class CQLProxNode extends CQLBooleanNode { + /** + * Creates a new PROX node with the specified left- and right-hand + * sides and modifiers. + */ + public CQLProxNode(CQLNode left, CQLNode right, ModifierSet ms) { + super(left, right, ms); + } + + /* + * proximity ::= exclusion distance ordered relation which-code unit-code. + * exclusion ::= '1' | '0' | 'void'. + * distance ::= integer. + * ordered ::= '1' | '0'. + * relation ::= integer. + * which-code ::= 'known' | 'private' | integer. + * unit-code ::= integer. + */ + String opPQF() { + int relCode = getRelCode(); + int unitCode = getProxUnitCode(); + + String res = "prox " + + "0 " + + ms.modifier("distance") + " " + + (ms.modifier("ordering").equals("ordered") ? 1 : 0) + " " + + relCode + " " + + "1 " + + unitCode; + + return res; + } + + private int getRelCode() { + String rel = ms.modifier("relation"); + if (rel.equals("<")) { + return 1; + } else if (rel.equals("<=")) { + return 2; + } else if (rel.equals("=")) { + return 3; + } else if (rel.equals(">=")) { + return 4; + } else if (rel.equals(">")) { + return 5; + } else if (rel.equals("<>")) { + return 6; + } + return 0; + } + + private int getProxUnitCode() { + String unit = ms.modifier("unit"); + if (unit.equals("word")) { + return 2; + } else if (unit.equals("sentence")) { + return 3; + } else if (unit.equals("paragraph")) { + return 4; + } else if (unit.equals("element")) { + return 8; + } + return 0; + } + + + byte[] opType1() { + byte[] op = new byte[100]; + int offset, value; + offset = putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator + op[offset++] = (byte)(0x80&0xff); // indefinite length + + offset = putTag(CONTEXT, 3, CONSTRUCTED, op, offset); // prox + op[offset++] = (byte)(0x80&0xff); // indefinite length + + offset = putTag(CONTEXT, 1, PRIMITIVE, op, offset); // exclusion + value = 0; // false + offset = putLen(numLen(value), op, offset); + offset = putNum(value, op, offset); + + offset = putTag(CONTEXT, 2, PRIMITIVE, op, offset); // distance + value = Integer.parseInt(ms.modifier("distance")); + offset = putLen(numLen(value), op, offset); + offset = putNum(value, op, offset); + + offset = putTag(CONTEXT, 3, PRIMITIVE, op, offset); // ordered + value = ms.modifier("ordering").equals("ordered") ? 1 : 0; + offset = putLen(numLen(value), op, offset); + offset = putNum(value, op, offset); + + offset = putTag(CONTEXT, 4, PRIMITIVE, op, offset); // relationType + value = getRelCode(); + offset = putLen(numLen(value), op, offset); + offset = putNum(value, op, offset); + + offset = putTag(CONTEXT, 5, CONSTRUCTED, op, offset); // proximityUnitCode + op[offset++] = (byte)(0x80&0xff); // indefinite length + offset = putTag(CONTEXT, 1, PRIMITIVE, op, offset); // known + value = getProxUnitCode(); + offset = putLen(numLen(value), op, offset); + offset = putNum(value, op, offset); + op[offset++] = 0x00; // end of proximityUnitCode + op[offset++] = 0x00; + + op[offset++] = 0x00; // end of prox + op[offset++] = 0x00; + op[offset++] = 0x00; // end of Operator + op[offset++] = 0x00; + + byte[] o = new byte[offset]; + System.arraycopy(op, 0, o, 0, offset); + return o; + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLRelation.java b/src/main/java/org/z3950/zing/cql/CQLRelation.java new file mode 100644 index 0000000..570afff --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLRelation.java @@ -0,0 +1,76 @@ +// $Id: CQLRelation.java,v 1.19 2007-07-03 13:40:58 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Vector; +import java.util.Properties; +import java.lang.StringBuffer; + +/** + * Represents a relation between a CQL index and term. + * + * @version $Id: CQLRelation.java,v 1.19 2007-07-03 13:40:58 mike Exp $ + */ +public class CQLRelation extends CQLNode { + ModifierSet ms; + + /** + * Creates a new CQLRelation with the specified base relation. + * Typical base relations include the usual six ordering relations + * (<=, >, etc.), the text + * relations any, all and exact, the + * old server-choice relation scr and profiled relations of + * the form prefix.name. + */ + // ### Seems wrong: a modifier set should not have a base, a + // relation should + public CQLRelation(String base) { + ms = new ModifierSet(base); + } + + /** + * Returns the base relation with which the CQLRelation was + * originally created. + */ + public String getBase() { + return ms.getBase(); + } + + /** + * Sets the modifiers of the specified CQLRelation. + * Typical relation modifiers include relevant, + * fuzzy, stem and phonetic. On the + * whole, these modifiers have a meaningful interpretation only + * for the text relations. + */ + public void setModifiers(ModifierSet ms) { + this.ms = ms; + } + + /** + * Returns an array of the modifiers associated with a CQLRelation. + * @return + * An array of Modifier objects. + */ + public Vector getModifiers() { + return ms.getModifiers(); + } + + public String toXCQL(int level, Vector prefixes, Vector sortkeys) { + if (sortkeys != null) + throw new Error("CQLRelation.toXCQL() called with sortkeys"); + + return ms.toXCQL(level, "relation"); + } + + public String toCQL() { + return ms.toCQL(); + } + + public String toPQF(Properties config) throws PQFTranslationException { + throw new Error("CQLRelation.toPQF() can never be called"); + } + + public byte[] toType1BER(Properties config) { + throw new Error("CQLRelation.toType1BER() can never be called"); + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLSortNode.java b/src/main/java/org/z3950/zing/cql/CQLSortNode.java new file mode 100644 index 0000000..6e7f21d --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLSortNode.java @@ -0,0 +1,72 @@ +// $Id: CQLSortNode.java,v 1.2 2008-04-11 12:05:15 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Properties; +import java.util.Vector; + + +/** + * Represents a sort node in a CQL parse-tree. + * + * @version $Id: CQLSortNode.java,v 1.2 2008-04-11 12:05:15 mike Exp $ + */ +public class CQLSortNode extends CQLNode { + /** + * The root of a subtree representing the query whose result is to + * be sorted. + */ + public CQLNode subtree; + + /** + * The set of sort keys by which results are to be sorted, + * each expressed as an index together with zero or more + * modifiers. + */ + Vector keys; + + public CQLSortNode(CQLNode subtree) { + this.subtree = subtree; + keys = new Vector(); + } + + public void addSortIndex(ModifierSet key) { + keys.add(key); + } + + public Vector getSortIndexes() { + return keys; + } + + public String toXCQL(int level, Vector prefixes, + Vector sortkeys) { + if (sortkeys != null) + throw new Error("CQLSortNode.toXCQL() called with sortkeys"); + return subtree.toXCQL(level, prefixes, keys); + } + + public String toCQL() { + StringBuffer buf = new StringBuffer(subtree.toCQL()); + + if (keys != null) { + buf.append(" sortby"); + for (int i = 0; i < keys.size(); i++) { + ModifierSet key = keys.get(i); + buf.append(" " + key.toCQL()); + } + } + + return buf.toString(); + } + + public String toPQF(Properties config) throws PQFTranslationException { + return "@attr 1=oops \"###\""; + } + + public byte[] toType1BER(Properties config) + throws PQFTranslationException { + // There is no way to represent sorting in a standard Z39.50 + // Type-1 query, so the best we can do is return the + // underlying query and ignore the sort-specification. + return subtree.toType1BER(config); + } +} diff --git a/src/main/java/org/z3950/zing/cql/CQLTermNode.java b/src/main/java/org/z3950/zing/cql/CQLTermNode.java new file mode 100644 index 0000000..f9b17ac --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLTermNode.java @@ -0,0 +1,280 @@ +// $Id: CQLTermNode.java,v 1.28 2007-07-03 13:41:24 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Properties; +import java.util.Vector; + + +/** + * Represents a terminal node in a CQL parse-tree. + * A term node consists of the term String itself, together with, + * optionally, an index string and a relation. Neither or both of + * these must be provided - you can't have an index without a + * relation or vice versa. + * + * @version $Id: CQLTermNode.java,v 1.28 2007-07-03 13:41:24 mike Exp $ + */ +public class CQLTermNode extends CQLNode { + private String index; + private CQLRelation relation; + private String term; + + /** + * Creates a new term node with the specified index, + * relation and term. The first two may be + * null, but the term may not. + */ + public CQLTermNode(String index, CQLRelation relation, String term) { + this.index = index; + this.relation = relation; + this.term = term; + } + + public String getIndex() { return index; } + public CQLRelation getRelation() { return relation; } + public String getTerm() { return term; } + + private static boolean isResultSetIndex(String qual) { + return (qual.equals("srw.resultSet") || + qual.equals("srw.resultSetId") || + qual.equals("srw.resultSetName") || + qual.equals("cql.resultSet") || + qual.equals("cql.resultSetId") || + qual.equals("cql.resultSetName")); + } + + public String getResultSetName() { + if (isResultSetIndex(index)) + return term; + else + return null; + } + + public String toXCQL(int level, Vector prefixes, + Vector sortkeys) { + return (indent(level) + "\n" + + renderPrefixes(level+1, prefixes) + + indent(level+1) + "" + xq(index) + "\n" + + relation.toXCQL(level+1) + + indent(level+1) + "" + xq(term) + "\n" + + renderSortKeys(level+1, sortkeys) + + indent(level) + "\n"); + } + + public String toCQL() { + String quotedIndex = maybeQuote(index); + String quotedTerm = maybeQuote(term); + String res = quotedTerm; + + if (index != null && + !index.equalsIgnoreCase("srw.serverChoice") && + !index.equalsIgnoreCase("cql.serverChoice")) { + // ### We don't always need spaces around `relation'. + res = quotedIndex + " " + relation.toCQL() + " " + quotedTerm; + } + + return res; + } + + // ### Interaction between this and its callers is not good as + // regards truncation of the term and generation of truncation + // attributes. Change the interface to fix this. + private Vector getAttrs(Properties config) throws PQFTranslationException { + Vector attrs = new Vector(); + + // Do this first so that if any other truncation or + // completeness attributes are generated, they "overwrite" + // those specified here. + // + // ### This approach relies on an unpleasant detail of Index + // Data's (admittedly definitive) implementation of PQF, + // and should not relied upon. + // + String attr = config.getProperty("always"); + if (attr != null) + attrs.add(attr); + + attr = config.getProperty("index." + index); + if (attr == null) + throw new UnknownIndexException(index); + attrs.add(attr); + + String rel = relation.getBase(); + if (rel.equals("=")) { + rel = "eq"; + } else if (rel.equals("<=")) { + rel = "le"; + } else if (rel.equals(">=")) { + rel = "ge"; + } + // ### Handling "any" and "all" properly would involve breaking + // the string down into a bunch of individual words and ORring + // or ANDing them together. Another day. + attr = config.getProperty("relation." + rel); + if (attr == null) + throw new UnknownRelationException(rel); + attrs.add(attr); + + Vector mods = relation.getModifiers(); + for (int i = 0; i < mods.size(); i++) { + String type = mods.get(i).type; + attr = config.getProperty("relationModifier." + type); + if (attr == null) + throw new UnknownRelationModifierException(type); + attrs.add(attr); + } + + String pos = "any"; + String text = term; + if (text.length() > 0 && text.substring(0, 1).equals("^")) { + text = text.substring(1); // ### change not seen by caller + pos = "first"; + } + int len = text.length(); + if (len > 0 && text.substring(len-1, len).equals("^")) { + text = text.substring(0, len-1); // ### change not seen by caller + pos = pos.equals("first") ? "firstAndLast" : "last"; + // ### in the firstAndLast case, the standard + // pqf.properties file specifies that we generate a + // completeness=whole-field attributem, which means that + // we don't generate a position attribute at all. Do we + // care? Does it matter? + } + + attr = config.getProperty("position." + pos); + if (attr == null) + throw new UnknownPositionException(pos); + attrs.add(attr); + + attr = config.getProperty("structure." + rel); + if (attr == null) + attr = config.getProperty("structure.*"); + attrs.add(attr); + + return attrs; + } + + public String toPQF(Properties config) throws PQFTranslationException { + if (isResultSetIndex(index)) { + // Special case: ignore relation, modifiers, wildcards, etc. + // There's parallel code in toType1BER() + return "@set " + maybeQuote(term); + } + + Vector attrs = getAttrs(config); + + String attr, s = ""; + for (int i = 0; i < attrs.size(); i++) { + attr = (String) attrs.get(i); + s += "@attr " + Utils.replaceString(attr, " ", " @attr ") + " "; + } + + String text = term; + if (text.length() > 0 && text.substring(0, 1).equals("^")) + text = text.substring(1); + int len = text.length(); + if (len > 0 && text.substring(len-1, len).equals("^")) + text = text.substring(0, len-1); + + return s + maybeQuote(text); + } + + static String maybeQuote(String str) { + if (str == null) + return null; + + // There _must_ be a better way to make this test ... + if (str.length() == 0 || + str.indexOf('"') != -1 || + str.indexOf(' ') != -1 || + str.indexOf('\t') != -1 || + str.indexOf('=') != -1 || + str.indexOf('<') != -1 || + str.indexOf('>') != -1 || + str.indexOf('/') != -1 || + str.indexOf('(') != -1 || + str.indexOf(')') != -1) { + str = '"' + Utils.replaceString(str, "\"", "\\\"") + '"'; + } + + return str; + } + + public byte[] toType1BER(Properties config) throws PQFTranslationException { + if (isResultSetIndex(index)) { + // Special case: ignore relation, modifiers, wildcards, etc. + // There's parallel code in toPQF() + byte[] operand = new byte[term.length()+100]; + int offset; + offset = putTag(CONTEXT, 0, CONSTRUCTED, operand, 0); // op + operand[offset++] = (byte)(0x80&0xff); // indefinite length + offset = putTag(CONTEXT, 31, PRIMITIVE, operand, offset); // ResultSetId + byte[] t = term.getBytes(); + offset = putLen(t.length, operand, offset); + System.arraycopy(t, 0, operand, offset, t.length); + offset += t.length; + operand[offset++] = 0x00; // end of Operand + operand[offset++] = 0x00; + byte[] o = new byte[offset]; + System.arraycopy(operand, 0, o, 0, offset); + return o; + } + + String text = term; + if (text.length() > 0 && text.substring(0, 1).equals("^")) + text = text.substring(1); + int len = text.length(); + if (len > 0 && text.substring(len-1, len).equals("^")) + text = text.substring(0, len-1); + + String attr, attrList, term = text; + byte[] operand = new byte[text.length()+100]; + int i, j, offset, type, value; + offset = putTag(CONTEXT, 0, CONSTRUCTED, operand, 0); // op + operand[offset++]=(byte)(0x80&0xff); // indefinite length + offset = putTag(CONTEXT, 102, CONSTRUCTED, operand, offset); // AttributesPlusTerm + operand[offset++] = (byte)(0x80&0xff); // indefinite length + offset = putTag(CONTEXT, 44, CONSTRUCTED, operand, offset); // AttributeList + operand[offset++] = (byte)(0x80&0xff); // indefinite length + + Vector attrs = getAttrs(config); + for(i = 0; i < attrs.size(); i++) { + attrList = (String) attrs.get(i); + java.util.StringTokenizer st = + new java.util.StringTokenizer(attrList); + while (st.hasMoreTokens()) { + attr = st.nextToken(); + j = attr.indexOf('='); + offset = putTag(UNIVERSAL, SEQUENCE, CONSTRUCTED, operand, offset); + operand[offset++] = (byte)(0x80&0xff); + offset = putTag(CONTEXT, 120, PRIMITIVE, operand, offset); + type = Integer.parseInt(attr.substring(0, j)); + offset = putLen(numLen(type), operand, offset); + offset = putNum(type, operand, offset); + + offset = putTag(CONTEXT, 121, PRIMITIVE, operand, offset); + value = Integer.parseInt(attr.substring(j+1)); + offset = putLen(numLen(value), operand, offset); + offset = putNum(value, operand, offset); + operand[offset++] = 0x00; // end of SEQUENCE + operand[offset++] = 0x00; + } + } + operand[offset++] = 0x00; // end of AttributeList + operand[offset++] = 0x00; + + offset = putTag(CONTEXT, 45, PRIMITIVE, operand, offset); // general Term + byte[] t = term.getBytes(); + offset = putLen(t.length, operand, offset); + System.arraycopy(t, 0, operand, offset, t.length); + offset += t.length; + + operand[offset++] = 0x00; // end of AttributesPlusTerm + operand[offset++] = 0x00; + operand[offset++] = 0x00; // end of Operand + operand[offset++] = 0x00; + byte[] o = new byte[offset]; + System.arraycopy(operand, 0, o, 0, offset); + return o; + } +} diff --git a/src/main/java/org/z3950/zing/cql/Makefile b/src/main/java/org/z3950/zing/cql/Makefile new file mode 100644 index 0000000..39c4f21 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/Makefile @@ -0,0 +1,34 @@ +# $Id: Makefile,v 1.18 2007-07-03 12:56:29 mike Exp $ +# +# Your Java compiler will require that this source directory is on the +# classpath. The best way to do that is just to add the CQL-Java +# distribution's "src" subdirectory to your CLASSPATH environment +# variable, like this: +# CLASSPATH=$CLASSPATH:/where/ever/you/unpacked/it/cql-java-VERSION/src + +OBJ = Utils.class \ + CQLNode.class CQLTermNode.class CQLBooleanNode.class \ + CQLAndNode.class CQLOrNode.class CQLNotNode.class \ + CQLProxNode.class CQLPrefixNode.class CQLSortNode.class \ + CQLPrefix.class \ + CQLRelation.class Modifier.class ModifierSet.class \ + CQLParser.class CQLLexer.class CQLGenerator.class \ + CQLParseException.class MissingParameterException.class \ + PQFTranslationException.class \ + UnknownIndexException.class UnknownRelationException.class \ + UnknownRelationModifierException.class UnknownPositionException.class + +JARPATH = ../lib/cql-java.jar +JAR = ../../../../$(JARPATH) +$(JAR): $(OBJ) + cd ../../../..; jar cf $(JARPATH) org/z3950/zing/cql/*.class + +%.class: %.java + javac -Xlint:unchecked *.java + +test: $(JAR) + cd ../../../../../test/regression && make + +clean: + rm -f $(OBJ) 'CQLLexer$$Keyword.class' + diff --git a/src/main/java/org/z3950/zing/cql/MissingParameterException.java b/src/main/java/org/z3950/zing/cql/MissingParameterException.java new file mode 100644 index 0000000..3b54668 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/MissingParameterException.java @@ -0,0 +1,21 @@ +// $Id: MissingParameterException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that a required property was not specified. + * + * @version $Id: MissingParameterException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + */ +public class MissingParameterException extends Exception { + /** + * Creates a new MissingParameterException. + * @param s + * The name of the property whose value was required but not supplied. + */ + public MissingParameterException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/Modifier.java b/src/main/java/org/z3950/zing/cql/Modifier.java new file mode 100644 index 0000000..4992b25 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/Modifier.java @@ -0,0 +1,87 @@ +// $Id: Modifier.java,v 1.4 2007-07-03 13:29:34 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Vector; +import java.lang.StringBuffer; + +/** + * Represents a single modifier, consisting of three elements: a type, + * a comparision and a value. For example, "distance", "<", "3". The + * type is mandatory; either the comparison and value must both occur, + * or neither must. + *

+ * This class is used only by ModifierSet. + * + * @version $Id: Modifier.java,v 1.4 2007-07-03 13:29:34 mike Exp $ + */ +public class Modifier { + String type; + String comparison; + String value; + + /** + * Creates a new Modifier with the specified type, comparison + * and value. + */ + public Modifier(String type, String comparison, String value) { + this.type = type; + this.comparison = comparison; + this.value = value; + //System.err.println("Made new modifier with " + "type='" + type + "', " + "comparison='" + comparison + "', " + "value='" + value + "',\n"); + } + + /** + * Creates a new Modifier with the specified type but no + * comparison or value. + */ + public Modifier(String type) { + this.type = type; + //System.err.println("Made new modifier of type '" + type + "'\n"); + } + + /** + * Returns the type with which the Modifier was created. + */ + public String getType() { + return type; + } + + /** + * Returns the comparison with which the Modifier was created. + */ + public String getComparison() { + return comparison; + } + + /** + * Returns the value with which the Modifier was created. + */ + public String getValue() { + return value; + } + + public String toXCQL(int level, String relationElement) { + StringBuffer buf = new StringBuffer(); + + buf.append(Utils.indent(level) + "\n"); + buf.append(Utils.indent(level+1) + + "" + Utils.xq(type) + "\n"); + if (value != null) { + buf.append(Utils.indent(level+1) + "<" + relationElement + ">" + + Utils.xq(comparison) + "\n"); + buf.append(Utils.indent(level+1) + + "" + Utils.xq(value) + "\n"); + } + + buf.append(Utils.indent(level) + "\n"); + return buf.toString(); + } + + public String toCQL() { + StringBuffer buf = new StringBuffer(type); + if (value != null) + buf.append(" " + comparison + " " + value); + + return buf.toString(); + } +} diff --git a/src/main/java/org/z3950/zing/cql/ModifierSet.java b/src/main/java/org/z3950/zing/cql/ModifierSet.java new file mode 100644 index 0000000..7b6991a --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/ModifierSet.java @@ -0,0 +1,128 @@ +// $Id: ModifierSet.java,v 1.13 2007-07-03 13:30:18 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Vector; +import java.lang.StringBuffer; + +/** + * Represents a base String and a set of Modifiers. + *

+ * This class is used as a workhorse delegate by both CQLRelation and + * CQLProxNode - two functionally very separate classes that happen to + * require similar data structures and functionality. + *

+ * A ModifierSet consists of a ``base'' string together with a set of + * zero or more type comparison value pairs, + * where type, comparison and value are all strings. + * + * @version $Id: ModifierSet.java,v 1.13 2007-07-03 13:30:18 mike Exp $ + */ +public class ModifierSet { + String base; + Vector modifiers; + + /** + * Creates a new ModifierSet with the specified base. + */ + public ModifierSet(String base) { + this.base = base; + modifiers = new Vector(); + } + + /** + * Returns the base string with which the ModifierSet was created. + */ + public String getBase() { + return base; + } + + /** + * Adds a modifier of the specified type, + * comparison and value to a ModifierSet. + */ + public void addModifier(String type, String comparison, String value) { + Modifier modifier = new Modifier(type, comparison, value); + modifiers.add(modifier); + } + + /** + * Adds a modifier of the specified type, but with no + * comparison and value, to a ModifierSet. + */ + public void addModifier(String type) { + Modifier modifier = new Modifier(type); + modifiers.add(modifier); + } + + /** + * Returns the value of the modifier in the specified ModifierSet + * that corresponds to the specified type. + */ + public String modifier(String type) { + int n = modifiers.size(); + for (int i = 0; i < n; i++) { + Modifier mod = modifiers.get(i); + if (mod.type.equals(type)) + return mod.value; + } + return null; + } + + /** + * Returns an array of the modifiers in a ModifierSet. + * @return + * An array of Modifiers. + */ + public Vector getModifiers() { + return modifiers; + } + + public String toXCQL(int level, String topLevelElement) { + return underlyingToXCQL(level, topLevelElement, "value"); + } + + public String sortKeyToXCQL(int level) { + return underlyingToXCQL(level, "key", "index"); + } + + private String underlyingToXCQL(int level, String topLevelElement, + String valueElement) { + StringBuffer buf = new StringBuffer(); + buf.append(Utils.indent(level) + "<" + topLevelElement + ">\n"); + buf.append(Utils.indent(level+1) + + "<" + valueElement + ">" + Utils.xq(base) + + "\n"); + if (modifiers.size() > 0) { + buf.append(Utils.indent(level+1) + "\n"); + for (int i = 0; i < modifiers.size(); i++) { + buf.append(modifiers.get(i).toXCQL(level+2, "comparison")); + } + buf.append(Utils.indent(level+1) + "\n"); + } + buf.append(Utils.indent(level) + "\n"); + return buf.toString(); + } + + public String toCQL() { + StringBuffer buf = new StringBuffer(base); + for (int i = 0; i < modifiers.size(); i++) { + buf.append("/" + modifiers.get(i).toCQL()); + } + + return buf.toString(); + } + + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Usage: ModifierSet [ ]..."); + System.exit(1); + } + + ModifierSet res = new ModifierSet(args[0]); + for (int i = 1; i < args.length; i += 3) { + res.addModifier(args[i], args[i+1], args[i+2]); + } + + System.out.println(res.toCQL()); + } +} diff --git a/src/main/java/org/z3950/zing/cql/PQFTranslationException.java b/src/main/java/org/z3950/zing/cql/PQFTranslationException.java new file mode 100644 index 0000000..85ec184 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/PQFTranslationException.java @@ -0,0 +1,16 @@ +// $Id: PQFTranslationException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Base class for exceptions occurring when translating parse trees to PQF. + * + * @version $Id: PQFTranslationException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ + */ +public class PQFTranslationException extends Exception { + PQFTranslationException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/UnknownIndexException.java b/src/main/java/org/z3950/zing/cql/UnknownIndexException.java new file mode 100644 index 0000000..b4201d3 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/UnknownIndexException.java @@ -0,0 +1,26 @@ +// $Id: UnknownIndexException.java,v 1.2 2007-06-27 22:44:40 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that an index was not recognised. + * At compilation time, we accept any syntactically valid index; + * but when rendering a tree out as PQF, we need to translate the + * indexes into sets of Type-1 query attributes. If we can't do + * that, because the PQF configuration doesn't know about a relation, + * we throw one of these babies. + * + * @version $Id: UnknownIndexException.java,v 1.2 2007-06-27 22:44:40 mike Exp $ + */ +public class UnknownIndexException extends PQFTranslationException { + /** + * Creates a new UnknownIndexException. + * @param s + * The index for which there was no PQF configuration. + */ + public UnknownIndexException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/UnknownPositionException.java b/src/main/java/org/z3950/zing/cql/UnknownPositionException.java new file mode 100644 index 0000000..5f38ab5 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/UnknownPositionException.java @@ -0,0 +1,28 @@ +// $Id: UnknownPositionException.java,v 1.2 2002-11-29 16:42:54 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that a position was not recognised. + * When rendering a tree out as PQF, each term is classified either as + * any, first, last or + * firstAndLast, depending on whether it begins and/or ends + * with the word-anchoring meta-character ^. Its + * classification is looked up as a position in the PQF + * configuration. If the position is not configured, we throw one of + * these babies. + * + * @version $Id: UnknownPositionException.java,v 1.2 2002-11-29 16:42:54 mike Exp $ + */ +public class UnknownPositionException extends PQFTranslationException { + /** + * Creates a new UnknownPositionException. + * @param s + * The position for which there was no PQF configuration. + */ + public UnknownPositionException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/UnknownRelationException.java b/src/main/java/org/z3950/zing/cql/UnknownRelationException.java new file mode 100644 index 0000000..4d65e72 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/UnknownRelationException.java @@ -0,0 +1,26 @@ +// $Id: UnknownRelationException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that a relation was not recognised. + * At compilation time, we accept any syntactically valid relation; + * but when rendering a tree out as PQF, we need to translate the + * relations into sets of Type-1 query attributes. If we can't do + * that, because the PQF configuration doesn't know about a relation, + * we throw one of these babies. + * + * @version $Id: UnknownRelationException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ + */ +public class UnknownRelationException extends PQFTranslationException { + /** + * Creates a new UnknownRelationException. + * @param s + * The relation for which there was no PQF configuration. + */ + public UnknownRelationException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/UnknownRelationModifierException.java b/src/main/java/org/z3950/zing/cql/UnknownRelationModifierException.java new file mode 100644 index 0000000..9d449b5 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/UnknownRelationModifierException.java @@ -0,0 +1,26 @@ +// $Id: UnknownRelationModifierException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ + +package org.z3950.zing.cql; +import java.lang.Exception; + + +/** + * Exception indicating that a relation modifier was not recognised. + * At compilation time, we accept any syntactically valid relation modifier; + * but when rendering a tree out as PQF, we need to translate the + * relation modifiers into sets of Type-1 query attributes. If we can't do + * that, because the PQF configuration doesn't know about a relation modifier, + * we throw one of these babies. + * + * @version $Id: UnknownRelationModifierException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ + */ +public class UnknownRelationModifierException extends PQFTranslationException { + /** + * Creates a new UnknownRelationModifierException. + * @param s + * The relation modifier for which there was no PQF configuration. + */ + public UnknownRelationModifierException(String s) { + super(s); + } +} diff --git a/src/main/java/org/z3950/zing/cql/Utils.java b/src/main/java/org/z3950/zing/cql/Utils.java new file mode 100644 index 0000000..6777e46 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/Utils.java @@ -0,0 +1,51 @@ +// $Id: Utils.java,v 1.2 2002-11-06 00:05:58 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Utility functions for the org.z3950.zing.cql package. + * Not intended for use outside this package. + * + * @version $Id: Utils.java,v 1.2 2002-11-06 00:05:58 mike Exp $ + */ +class Utils { + static String indent(int level) { + String x = ""; + while (level-- > 0) { + x += " "; + } + return x; + } + + // XML Quote -- + // s/&/&/g; + // s//>/g; + // This is hideously inefficient, but I just don't see a better + // way using the standard JAVA library. + // + static String xq(String str) { + str = replaceString(str, "&", "&"); + str = replaceString(str, "<", "<"); + str = replaceString(str, ">", ">"); + return str; + } + + // I can't _believe_ I have to write this by hand in 2002 ... + static String replaceString(String str, String from, String to) { + StringBuffer sb = new StringBuffer(); + int ix; // index of next `from' + int offset = 0; // index of previous `from' + length(from) + + while ((ix = str.indexOf(from, offset)) != -1) { + sb.append(str.substring(offset, ix)); + sb.append(to); + offset = ix + from.length(); + } + + // End of string: append last bit and we're done + sb.append(str.substring(offset)); + return sb.toString(); + } +} diff --git a/src/org/z3950/zing/cql/.cvsignore b/src/org/z3950/zing/cql/.cvsignore deleted file mode 100644 index 6b468b6..0000000 --- a/src/org/z3950/zing/cql/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.class diff --git a/src/org/z3950/zing/cql/CQLAndNode.java b/src/org/z3950/zing/cql/CQLAndNode.java deleted file mode 100644 index dc452db..0000000 --- a/src/org/z3950/zing/cql/CQLAndNode.java +++ /dev/null @@ -1,29 +0,0 @@ -// $Id: CQLAndNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - -package org.z3950.zing.cql; - - -/** - * Represents an AND node in a CQL parse-tree. - * - * @version $Id: CQLAndNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - */ -public class CQLAndNode extends CQLBooleanNode { - /** - * Creates a new AND node with the specified left- and right-hand - * sides and modifiers. - */ - public CQLAndNode(CQLNode left, CQLNode right, ModifierSet ms) { - super(left, right, ms); - } - - // ### Too much code duplication here with OR and NOT - byte[] opType1() { - byte[] op = new byte[5]; - putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator - putLen(2, op, 2); - putTag(CONTEXT, 0, PRIMITIVE, op, 3); // and - putLen(0, op, 4); - return op; - } -} diff --git a/src/org/z3950/zing/cql/CQLBooleanNode.java b/src/org/z3950/zing/cql/CQLBooleanNode.java deleted file mode 100644 index ec0608c..0000000 --- a/src/org/z3950/zing/cql/CQLBooleanNode.java +++ /dev/null @@ -1,90 +0,0 @@ -// $Id: CQLBooleanNode.java,v 1.18 2007-07-03 16:03:00 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Properties; -import java.util.Vector; - - -/** - * Represents a boolean node in a CQL parse-tree. - * - * @version $Id: CQLBooleanNode.java,v 1.18 2007-07-03 16:03:00 mike Exp $ - */ -public abstract class CQLBooleanNode extends CQLNode { - /** - * The root of a parse-tree representing the left-hand side. - */ - public CQLNode left; - - /** - * The root of a parse-tree representing the right-hand side. - */ - public CQLNode right; - - /** - * The set of modifiers that are applied to this boolean. - */ - public ModifierSet ms; - - protected CQLBooleanNode(CQLNode left, CQLNode right, ModifierSet ms) { - this.left = left; - this.right = right; - this.ms = ms; - } - - public String toXCQL(int level, Vector prefixes, - Vector sortkeys) { - return (indent(level) + "\n" + - renderPrefixes(level+1, prefixes) + - ms.toXCQL(level+1, "boolean") + - indent(level+1) + "\n" + - left.toXCQL(level+2) + - indent(level+1) + "\n" + - indent(level+1) + "\n" + - right.toXCQL(level+2) + - indent(level+1) + "\n" + - renderSortKeys(level+1, sortkeys) + - indent(level) + "\n"); - } - - public String toCQL() { - // ### We don't always need parens around the operands - return ("(" + left.toCQL() + ")" + - " " + ms.toCQL() + " " + - "(" + right.toCQL() + ")"); - } - - public String toPQF(Properties config) throws PQFTranslationException { - return ("@" + opPQF() + - " " + left.toPQF(config) + - " " + right.toPQF(config)); - } - - // represents the operation for PQF: overridden for CQLProxNode - String opPQF() { return ms.getBase(); } - - public byte[] toType1BER(Properties config) throws PQFTranslationException { - System.out.println("in CQLBooleanNode.toType1BER(): PQF=" + - toPQF(config)); - byte[] rpn1 = left.toType1BER(config); - byte[] rpn2 = right.toType1BER(config); - byte[] op = opType1(); - byte[] rpnStructure = new byte[rpn1.length+rpn2.length+op.length+4]; - - // rpnRpnOp - int offset = putTag(CONTEXT, 1, CONSTRUCTED, rpnStructure, 0); - - rpnStructure[offset++] = (byte)(0x80&0xff); // indefinite length - System.arraycopy(rpn1, 0, rpnStructure, offset, rpn1.length); - offset += rpn1.length; - System.arraycopy(rpn2, 0, rpnStructure, offset, rpn2.length); - offset += rpn2.length; - System.arraycopy(op, 0, rpnStructure, offset, op.length); - offset += op.length; - rpnStructure[offset++] = 0x00; // end rpnRpnOp - rpnStructure[offset++] = 0x00; - return rpnStructure; - } - - abstract byte[] opType1(); -} diff --git a/src/org/z3950/zing/cql/CQLGenerator.java b/src/org/z3950/zing/cql/CQLGenerator.java deleted file mode 100644 index 416b771..0000000 --- a/src/org/z3950/zing/cql/CQLGenerator.java +++ /dev/null @@ -1,317 +0,0 @@ -// $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Properties; -import java.util.Random; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; - - -/** - * A generator that produces random CQL queries. - *

- * Why is that useful? Mainly to produce test-cases for CQL parsers - * (including the CQLParser class in this package): you can - * generate a random search tree, render it to XCQL and remember the - * result. Then decompile the tree to CQL, feed the generated CQL to - * the parser of your choice, and check that the XCQL it comes up with - * is the same what you got from your initial rendering. - *

- * This code is based on the same grammar as the CQLParser class in - * this distribution - there is a generate_x() method - * for each grammar element X. - * - * @version $Id: CQLGenerator.java,v 1.9 2007-07-03 15:41:35 mike Exp $ - * @see http://zing.z3950.org/cql/index.html - */ -public class CQLGenerator { - Properties params; - Random rnd; - static private boolean DEBUG = false; - - /** - * Creates a new CQL generator with the specified parameters. - *

- * @param params - * A Properties table containing configuration - * parameters for the queries to be generated by this generator. - * Recognised parameters are: - *

- *

- *
seed
- *
- * If specified, this is a long used to seed the - * random number generator, so that the CQL generator can be - * run repeatably, giving the same results each time. If it's - * omitted, then no seed is explicitly specified, and the - * results of each run will be different (so long as you don't - * run it more that 2^32 times :-) - *

- *

- *
complexQuery
- *
- * [mandatory] A floating-point number between 0.0 and 1.0, - * indicating the probability for each cql-query node - * that it will be expanded into a ``complex query'' - * (cql-query boolean search-clause) rather - * than a search-clause. - *

- *

- *
complexClause
- *
- * [mandatory] A floating-point number between 0.0 and 1.0, - * indicating the probability for each search-clause - * node that it will be expanded into a full sub-query rather - * than an [ index relation ] term triplet. - *

- *

- *
proxOp
- *
- * [mandatory] A floating-point number between 0.0 and 1.0, - * indicating the probability that each boolean operator will - * be chosen to be proximity operation; otherwise, the three - * simpler boolean operations (and, or and - * not) are chosen with equal probability. - *

- *

- *
equalsRelation
- *
- * [mandatory] A floating-point number between 0.0 and 1.0, - * indicating the probability that each relation will be chosen - * to be = - this is treated as a special case, since - * it's likely to be by far the most common relation in - * ``real life'' searches. - *

- *

- *
numericRelation
- *
- * [mandatory] A floating-point number between 0.0 and 1.0, - * indicating the probability that a relation, having chosen - * not to be =, is instead chosen to be one of the six - * numeric relations (<, >, - * <=, >=, <> and - * =). - *

- *

- *
- */ - public CQLGenerator(Properties params) { - this.params = params; - String seed = params.getProperty("seed"); - if (seed != null) - rnd = new Random(new Long(seed).longValue()); - else - rnd = new Random(); - } - - private static void debug(String str) { - if (DEBUG) - System.err.println("DEBUG: " + str); - } - - /** - * Generates a single random CQL query. - *

- * Uses the parameters that were associated with the generator - * when it was created. You are free to create as many random - * queries as you wish from a single generator; each of them will - * use the same parameters. - *

- * @return - * A CQLNode that is the root of the generated tree. - * That tree may be rendered in XCQL using its toXCQL() - * method, or decompiled into CQL using its toCQL - * method. - */ - public CQLNode generate() throws MissingParameterException { - return generate_cql_query(); - } - - private CQLNode generate_cql_query() throws MissingParameterException { - if (!maybe("complexQuery")) { - return generate_search_clause(); - } - - CQLNode node1 = generate_cql_query(); - CQLNode node2 = generate_search_clause(); - // ### should generate prefix-mapping nodes - if (maybe("proxOp")) { - // ### generate proximity nodes - } else { - switch (rnd.nextInt(3)) { - case 0: return new CQLAndNode(node1, node2, new ModifierSet("and")); - case 1: return new CQLOrNode (node1, node2, new ModifierSet("or")); - case 2: return new CQLNotNode(node1, node2, new ModifierSet("not")); - } - } - - return generate_search_clause(); - } - - private CQLNode generate_search_clause() throws MissingParameterException { - if (maybe("complexClause")) { - return generate_cql_query(); - } - - // ### Should sometimes generate index/relation-free terms - String index = generate_index(); - CQLRelation relation = generate_relation(); - String term = generate_term(); - - return new CQLTermNode(index, relation, term); - } - - // ### Should probably be more configurable - private String generate_index() { - String index = ""; // shut up compiler warning - if (rnd.nextInt(2) == 0) { - switch (rnd.nextInt(3)) { - case 0: index = "dc.author"; break; - case 1: index = "dc.title"; break; - case 2: index = "dc.subject"; break; - } - } else { - switch (rnd.nextInt(4)) { - case 0: index = "bath.author"; break; - case 1: index = "bath.title"; break; - case 2: index = "bath.subject"; break; - case 3: index = "foo>bar"; break; - } - } - - return index; - } - - private CQLRelation generate_relation() throws MissingParameterException { - String base = generate_base_relation(); - CQLRelation rel = new CQLRelation(base); - // ### should generate modifiers too - return rel; - } - - private String generate_base_relation() throws MissingParameterException { - if (maybe("equalsRelation")) { - return "="; - } else if (maybe("numericRelation")) { - return generate_numeric_relation(); - } else { - switch (rnd.nextInt(3)) { - case 0: return "exact"; - case 1: return "all"; - case 2: return "any"; - } - } - - // NOTREACHED - return ""; // shut up compiler warning - } - - // ### could read candidate terms from /usr/dict/words - // ### should introduce wildcard characters - // ### should generate multi-word terms - private String generate_term() { - switch (rnd.nextInt(10)) { - case 0: return "cat"; - case 1: return "\"cat\""; - case 2: return "comp.os.linux"; - case 3: return "xml:element"; - case 4: return ""; - case 5: return "prox/word/>=/5"; - case 6: return ""; - case 7: return "frog fish"; - case 8: return "the complete dinosaur"; - case 9: return "foo*bar"; - } - - // NOTREACHED - return ""; // shut up compiler warning - } - - private String generate_numeric_relation() { - switch (rnd.nextInt(6)) { - case 0: return "<"; - case 1: return ">"; - case 2: return "<="; - case 3: return ">="; - case 4: return "<>"; - case 5: return "="; - } - - // NOTREACHED - return ""; // shut up compiler warning - } - - boolean maybe(String param) throws MissingParameterException { - String probability = params.getProperty(param); - if (probability == null) - throw new MissingParameterException(param); - - double dice = rnd.nextDouble(); - double threshhold = new Double(probability).doubleValue(); - boolean res = dice < threshhold; - debug("dice=" + String.valueOf(dice).substring(0, 8) + - " vs. " + threshhold + "='" + param + "': " + res); - return res; - } - - - /** - * A simple test-harness for the generator. - *

- * It generates a single random query using the parameters - * specified in a nominated properties file, plus any additional - * name value pairs provided on the command-line, and - * decompiles it into CQL which is written to standard output. - *

- * For example, - * java org.z3950.zing.cql.CQLGenerator - * etc/generate.properties seed 18398, - * where the file generate.properties contains:

-     *	complexQuery=0.4
-     *	complexClause=0.4
-     *	equalsRelation=0.5
-     *	numericRelation=0.7
-     *	proxOp=0.0
-     * 
- * yields:
-     *	((dc.author = "<xml.element>") or (bath.title = cat)) and
-     *		(dc.subject >= "the complete dinosaur")
-     * 
- *

- * @param configFile - * The name of a properties file from which to read the - * configuration parameters (see above). - * @param name - * The name of a configuration parameter. - * @param value - * The value to assign to the configuration parameter named in - * the immediately preceding command-line argument. - * @return - * A CQL query expressed in a form that should be comprehensible - * to all conformant CQL compilers. - */ - public static void main (String[] args) throws Exception { - if (args.length % 2 != 1) { - System.err.println("Usage: CQLGenerator "+ - "[ ]..."); - System.exit(1); - } - - String configFile = args[0]; - InputStream f = new FileInputStream(configFile); - if (f == null) - throw new FileNotFoundException(configFile); - - Properties params = new Properties(); - params.load(f); - f.close(); - for (int i = 1; i < args.length; i += 2) - params.setProperty(args[i], args[i+1]); - - CQLGenerator generator = new CQLGenerator(params); - CQLNode tree = generator.generate(); - System.out.println(tree.toCQL()); - } -} diff --git a/src/org/z3950/zing/cql/CQLLexer.java b/src/org/z3950/zing/cql/CQLLexer.java deleted file mode 100644 index 5df3822..0000000 --- a/src/org/z3950/zing/cql/CQLLexer.java +++ /dev/null @@ -1,236 +0,0 @@ -// $Id: CQLLexer.java,v 1.14 2007-07-03 13:30:42 mike Exp $ - -package org.z3950.zing.cql; -import java.io.StreamTokenizer; -import java.io.StringReader; -import java.util.Hashtable; - - -// This is a semi-trivial subclass for java.io.StreamTokenizer that: -// * Has a halfDecentPushBack() method that actually works -// * Includes a render() method -// * Knows about the multi-character tokens "<=", ">=" and "<>" -// * Recognises a set of keywords as tokens in their own right -// * Includes some primitive debugging-output facilities -// It's used only by CQLParser. -// -class CQLLexer extends StreamTokenizer { - // New publicly visible token-types - static int TT_LE = 1000; // The "<=" relation - static int TT_GE = 1001; // The ">=" relation - static int TT_NE = 1002; // The "<>" relation - static int TT_EQEQ = 1003; // The "==" relation - static int TT_AND = 1004; // The "and" boolean - static int TT_OR = 1005; // The "or" boolean - static int TT_NOT = 1006; // The "not" boolean - static int TT_PROX = 1007; // The "prox" boolean - static int TT_SORTBY = 1008; // The "sortby" operator - - // Support for keywords. It would be nice to compile this linear - // list into a Hashtable, but it's hard to store ints as hash - // values, and next to impossible to use them as hash keys. So - // we'll just scan the (very short) list every time we need to do - // a lookup. - private class Keyword { - int token; - String keyword; - Keyword(int token, String keyword) { - this.token = token; - this.keyword = keyword; - } - } - // This should logically be static, but Java won't allow it :-P - private Keyword[] keywords = { - new Keyword(TT_AND, "and"), - new Keyword(TT_OR, "or"), - new Keyword(TT_NOT, "not"), - new Keyword(TT_PROX, "prox"), - new Keyword(TT_SORTBY, "sortby"), - }; - - // For halfDecentPushBack() and the code at the top of nextToken() - private static int TT_UNDEFINED = -1000; - private int saved_ttype = TT_UNDEFINED; - private double saved_nval; - private String saved_sval; - - // Controls debugging output - private static boolean DEBUG; - - CQLLexer(String cql, boolean lexdebug) { - super(new StringReader(cql)); - wordChars('!', '?'); // ASCII-dependency! - wordChars('[', '`'); // ASCII-dependency! - quoteChar('"'); - ordinaryChar('='); - ordinaryChar('<'); - ordinaryChar('>'); - ordinaryChar('/'); - ordinaryChar('('); - ordinaryChar(')'); - wordChars('\'', '\''); // prevent this from introducing strings - parseNumbers(); - DEBUG = lexdebug; - } - - private static void debug(String str) { - if (DEBUG) - System.err.println("LEXDEBUG: " + str); - } - - // I don't honestly understand why we need this, but the - // documentation for java.io.StreamTokenizer.pushBack() is pretty - // vague about its semantics, and it seems to me that they could - // be summed up as "it doesn't work". This version has the very - // clear semantics "pretend I didn't call nextToken() just then". - // - private void halfDecentPushBack() { - saved_ttype = ttype; - saved_nval = nval; - saved_sval = sval; - } - - public int nextToken() throws java.io.IOException { - if (saved_ttype != TT_UNDEFINED) { - ttype = saved_ttype; - nval = saved_nval; - sval = saved_sval; - saved_ttype = TT_UNDEFINED; - debug("using saved ttype=" + ttype + ", " + - "nval=" + nval + ", sval='" + sval + "'"); - return ttype; - } - - underlyingNextToken(); - if (ttype == '<') { - debug("token starts with '<' ..."); - underlyingNextToken(); - if (ttype == '=') { - debug("token continues with '=' - it's '<='"); - ttype = TT_LE; - } else if (ttype == '>') { - debug("token continues with '>' - it's '<>'"); - ttype = TT_NE; - } else { - debug("next token is " + render() + " (pushed back)"); - halfDecentPushBack(); - ttype = '<'; - debug("AFTER: ttype is now " + ttype + " - " + render()); - } - } else if (ttype == '>') { - debug("token starts with '>' ..."); - underlyingNextToken(); - if (ttype == '=') { - debug("token continues with '=' - it's '>='"); - ttype = TT_GE; - } else { - debug("next token is " + render() + " (pushed back)"); - halfDecentPushBack(); - ttype = '>'; - debug("AFTER: ttype is now " + ttype + " - " + render()); - } - } else if (ttype == '=') { - debug("token starts with '=' ..."); - underlyingNextToken(); - if (ttype == '=') { - debug("token continues with '=' - it's '=='"); - ttype = TT_EQEQ; - } else { - debug("next token is " + render() + " (pushed back)"); - halfDecentPushBack(); - ttype = '='; - debug("AFTER: ttype is now " + ttype + " - " + render()); - } - } - - debug("done nextToken(): ttype=" + ttype + ", " + - "nval=" + nval + ", " + "sval='" + sval + "'" + - " (" + render() + ")"); - - return ttype; - } - - // It's important to do keyword recognition here at the lowest - // level, otherwise when one of these words follows "<" or ">" - // (which can be the beginning of multi-character tokens) it gets - // pushed back as a string, and its keywordiness is not - // recognised. - // - public int underlyingNextToken() throws java.io.IOException { - super.nextToken(); - if (ttype == TT_WORD) - for (int i = 0; i < keywords.length; i++) - if (sval.equalsIgnoreCase(keywords[i].keyword)) - ttype = keywords[i].token; - - return ttype; - } - - // Simpler interface for the usual case: current token with quoting - String render() { - return render(ttype, true); - } - - String render(int token, boolean quoteChars) { - if (token == TT_EOF) { - return "EOF"; - } else if (token == TT_NUMBER) { - if ((double) nval == (int) nval) { - return new Integer((int) nval).toString(); - } else { - return new Double((double) nval).toString(); - } - } else if (token == TT_WORD) { - return "word: " + sval; - } else if (token == '"') { - return "string: \"" + sval + "\""; - } else if (token == TT_LE) { - return "<="; - } else if (token == TT_GE) { - return ">="; - } else if (token == TT_NE) { - return "<>"; - } else if (token == TT_EQEQ) { - return "=="; - } - - // Check whether its associated with one of the keywords - for (int i = 0; i < keywords.length; i++) - if (token == keywords[i].token) - return keywords[i].keyword; - - // Otherwise it must be a single character, such as '(' or '/'. - String res = String.valueOf((char) token); - if (quoteChars) res = "'" + res + "'"; - return res; - } - - public static void main(String[] args) throws Exception { - if (args.length > 1) { - System.err.println("Usage: CQLLexer []"); - System.err.println("If unspecified, query is read from stdin"); - System.exit(1); - } - - String cql; - if (args.length == 1) { - cql = args[0]; - } else { - byte[] bytes = new byte[10000]; - try { - // Read in the whole of standard input in one go - int nbytes = System.in.read(bytes); - } catch (java.io.IOException ex) { - System.err.println("Can't read query: " + ex.getMessage()); - System.exit(2); - } - cql = new String(bytes); - } - - CQLLexer lexer = new CQLLexer(cql, true); - int token; - while ((token = lexer.nextToken()) != TT_EOF) { - // Nothing to do: debug() statements render tokens for us - } - } -} diff --git a/src/org/z3950/zing/cql/CQLNode.java b/src/org/z3950/zing/cql/CQLNode.java deleted file mode 100644 index 3e42e1e..0000000 --- a/src/org/z3950/zing/cql/CQLNode.java +++ /dev/null @@ -1,354 +0,0 @@ -// $Id: CQLNode.java,v 1.26 2007-07-03 13:36:03 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Properties; -import java.util.Vector; - - -/** - * Represents a node in a CQL parse-tree. - * - * @version $Id: CQLNode.java,v 1.26 2007-07-03 13:36:03 mike Exp $ - */ -public abstract class CQLNode { - CQLNode() {} // prevent javadoc from documenting this - - /** - * Returns the name of the result-set to which this query is a - * reference, if and only if the entire query consists only of a - * result-set reference. If it's anything else, including a - * boolean combination of a result-set reference with something - * else, then null is returned instead. - * @return the name of the referenced result-set - */ - public String getResultSetName() { - return null; - } - - /** - * Translates a parse-tree into an XCQL document. - *

- * @param level - * The number of levels to indent the top element of the XCQL - * document. This will typically be 0 when invoked by an - * application; it takes higher values when this method is - * invoked recursively for nodes further down the tree. - * @return - * A String containing an XCQL document equivalent to the - * parse-tree whose root is this node. - */ - public String toXCQL(int level) { - return toXCQL(level, null); - } - - public String toXCQL(int level, Vector prefixes) { - return toXCQL(level, prefixes, null); - } - - abstract public String toXCQL(int level, Vector prefixes, - Vector sortkeys); - - protected static String renderPrefixes(int level, Vector prefixes) { - if (prefixes == null || prefixes.size() == 0) - return ""; - String res = indent(level) + "\n"; - for (int i = 0; i < prefixes.size(); i++) { - CQLPrefix p = (CQLPrefix) prefixes.get(i); - res += indent(level+1) + "\n"; - if (p.name != null) - res += indent(level+2) + "" + p.name + "\n"; - res += indent(level+2) + - "" + p.identifier + "\n"; - res += indent(level+1) + "\n"; - } - return res + indent(level) + "\n"; - } - - protected static String renderSortKeys(int level, - Vector sortkeys) { - if (sortkeys == null || sortkeys.size() == 0) - return ""; - String res = indent(level) + "\n"; - for (int i = 0; i < sortkeys.size(); i++) { - ModifierSet key = sortkeys.get(i); - res += key.sortKeyToXCQL(level+1); - } - return res + indent(level) + "\n"; - } - - /** - * Decompiles a parse-tree into a CQL query. - *

- * @return - * A String containing a CQL query equivalent to the parse-tree - * whose root is this node, so that compiling that query will - * yield an identical tree. - */ - abstract public String toCQL(); - - /** - * Renders a parse-tree into a Yaz-style PQF string. - * PQF, or Prefix Query Format, is a cryptic but powerful notation - * that can be trivially mapped, one-to-one, int Z39.50 Type-1 and - * Type-101 queries. A specification for the format can be found - * in - * Chapter 7 (Supporting Tools) of the - * YAZ manual. - *

- * @param config - * A Properties object containing configuration - * information that specifies the mapping from CQL indexes, - * relations, etc. to Type-1 attributes. The mapping - * specification is described in the CQL-Java distribution's - * sample PQF-mapping configuration file, - * etc/pqf.properties, which see. - * @return - * A String containing a PQF query equivalent to the parse-tree - * whose root is this node. - */ - abstract public String toPQF(Properties config) - throws PQFTranslationException; - - /** - * Returns a String of spaces for indenting to the specified level. - */ - protected static String indent(int level) { return Utils.indent(level); } - - /** - * Returns the argument String quoted for XML. - * For example, each occurrence of < is translated to - * &lt;. - */ - protected static String xq(String str) { return Utils.xq(str); } - - /** - * Renders a parser-tree into a BER-endoded packet representing an - * equivalent Z39.50 Type-1 query. If you don't know what that - * means, then you don't need this method :-) This is useful - * primarily for SRW-to-Z39.50 gateways. - * - * @param config - * A Properties object containing configuration - * information that specifies the mapping from CQL indexes, - * relations, etc. to Type-1 attributes. The mapping - * specification is described in the CQL-Java distribution's - * sample PQF-mapping configuration file, - * etc/pqf.properties, which see. - * @return - * A byte array containing the BER packet. - * @see - * ftp://ftp.rsasecurity.com/pub/pkcs/ascii/layman.asc - */ - abstract public byte[] toType1BER(Properties config) - throws PQFTranslationException; - - // ANS.1 classes - protected static final int UNIVERSAL = 0; - protected static final int APPLICATION = 1; - protected static final int CONTEXT = 2; - protected static final int PRIVATE = 3; - - // ASN.1 tag forms - protected static final int PRIMITIVE = 0; - protected static final int CONSTRUCTED = 1; - - // ASN.1 UNIVERSAL data types - public static final byte BOOLEAN = 1; - public static final byte INTEGER = 2; - public static final byte BITSTRING = 3; - public static final byte OCTETSTRING = 4; - public static final byte NULL = 5; - public static final byte OBJECTIDENTIFIER = 6; - public static final byte OBJECTDESCRIPTOR = 7; - public static final byte EXTERNAL = 8; - public static final byte ENUMERATED = 10; - public static final byte SEQUENCE = 16; - public static final byte SET = 17; - public static final byte VISIBLESTRING = 26; - public static final byte GENERALSTRING = 27; - - protected static final int putTag(int asn1class, int fldid, int form, - byte[] record, int offset) { - if (fldid < 31) - record[offset++] = (byte)(fldid + asn1class*64 + form*32); - else { - record[offset++] = (byte)(31 + asn1class*64 + form*32); - if (fldid < 128) - record[offset++] = (byte)(fldid); - else { - record[offset++] = (byte)(128 + fldid/128); - record[offset++] = (byte)(fldid % 128); - } - } - return offset; - } - - /** - * Put a length directly into a BER record. - * - * @param len length to put into record - * @return the new, incremented value of the offset parameter. - */ - public // ### shouldn't this be protected? - static final int putLen(int len, byte[] record, int offset) { - - if (len < 128) - record[offset++] = (byte)len; - else { - int t; - record[offset] = (byte)(lenLen(len) - 1); - for (t = record[offset]; t > 0; t--) { - record[offset+t] = (byte)(len & 0xff); - len >>= 8; - } - t = offset; - offset += (record[offset]&0xff) + 1; - record[t] += 128; // turn on bit 8 in length byte. - } - return offset; - } - - /** - * Get the length needed to represent the given length. - * - * @param length determine length needed to encode this - * @return length needed to encode given length - */ - protected // ### shouldn't this be private? - static final int lenLen(int length) { - - return ((length < 128) ? 1 : - (length < 256) ? 2 : - (length < 65536L) ? 3 : 4); - } - - /** - * Get the length needed to represent the given number. - * - * @param num determine length needed to encode this - * @return length needed to encode given number - */ - protected static final int numLen(long num) { - num = num < 0 ? -num : num; - // ### Wouldn't this be better done algorithmically? - // Or at least with the constants expressed in hex? - return ((num < 128) ? 1 : - (num < 32768) ? 2 : - (num < 8388608) ? 3 : - (num < 2147483648L) ? 4 : - (num < 549755813888L) ? 5 : - (num < 140737488355328L) ? 6 : - (num < 36028797018963968L) ? 7 : 8); - } - - /** - * Put a number into a given buffer - * - * @param num number to put into buffer - * @param record buffer to use - * @param offset offset into buffer - * @return the new, incremented value of the offset parameter. - */ - protected static final int putNum(long num, byte record[], int offset) { - int cnt=numLen(num); - - for (int count = cnt - 1; count >= 0; count--) { - record[offset+count] = (byte)(num & 0xff); - num >>= 8; - } - return offset+cnt; - } - - // Used only by the makeOID() method - private static final java.util.Hashtable madeOIDs = - new java.util.Hashtable(10); - - protected static final byte[] makeOID(String oid) { - byte[] o; - int dot, offset = 0, oidOffset = 0, value; - - if ((o = (byte[])madeOIDs.get(oid)) == null) { - o = new byte[100]; - - // Isn't this kind of thing excruciating in Java? - while (oidOffset < oid.length() && - Character.isDigit(oid.charAt(oidOffset)) == true) { - if (offset > 90) // too large - return null; - - dot = oid.indexOf('.', oidOffset); - if (dot == -1) - dot = oid.length(); - - value = Integer.parseInt(oid.substring(oidOffset, dot)); - - if (offset == 0) { // 1st two are special - if (dot == -1) // ### can't happen: -1 is reassigned above - return null; // can't be this short - oidOffset = dot+1; // skip past '.' - - dot = oid.indexOf('.', oidOffset); - if (dot == -1) - dot = oid.length(); - - // ### Eh?! - value = value * 40 + - Integer.parseInt(oid.substring(oidOffset,dot)); - } - - if (value < 0x80) { - o[offset++] = (byte)value; - } else { - int count = 0; - byte bits[] = new byte[12]; // save a 84 (12*7) bit number - - while (value != 0) { - bits[count++] = (byte)(value & 0x7f); - value >>= 7; - } - - // Now place in the correct order - while (--count > 0) - o[offset++] = (byte)(bits[count] | 0x80); - - o[offset++] = bits[count]; - } - - dot = oid.indexOf('.', oidOffset); - if (dot == -1) - break; - - oidOffset = dot+1; - } - - byte[] ptr = new byte[offset]; - System.arraycopy(o, 0, ptr, 0, offset); - madeOIDs.put(oid, ptr); - return ptr; - } - return o; - } - - public static final byte[] makeQuery(CQLNode root, Properties properties) - throws PQFTranslationException { - byte[] rpnStructure = root.toType1BER(properties); - byte[] qry = new byte[rpnStructure.length+100]; - int offset = 0; - offset = putTag(CONTEXT, 1, CONSTRUCTED, qry, offset); - qry[offset++] = (byte)(0x80&0xff); // indefinite length - offset = putTag(UNIVERSAL, OBJECTIDENTIFIER, PRIMITIVE, qry, offset); - byte[] oid = makeOID("1.2.840.10003.3.1"); // bib-1 - offset = putLen(oid.length, qry, offset); - System.arraycopy(oid, 0, qry, offset, oid.length); - offset += oid.length; - System.arraycopy(rpnStructure, 0, qry, offset, rpnStructure.length); - offset += rpnStructure.length; - qry[offset++] = 0x00; // end of query - qry[offset++] = 0x00; - byte[] q = new byte[offset]; - System.arraycopy(qry, 0, q, 0, offset); - return q; - } -} diff --git a/src/org/z3950/zing/cql/CQLNotNode.java b/src/org/z3950/zing/cql/CQLNotNode.java deleted file mode 100644 index 7b56de3..0000000 --- a/src/org/z3950/zing/cql/CQLNotNode.java +++ /dev/null @@ -1,28 +0,0 @@ -// $Id: CQLNotNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - -package org.z3950.zing.cql; - - -/** - * Represents a NOT node in a CQL parse-tree. - * - * @version $Id: CQLNotNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - */ -public class CQLNotNode extends CQLBooleanNode { - /** - * Creates a new NOT node with the specified left- and right-hand - * sides and modifiers. - */ - public CQLNotNode(CQLNode left, CQLNode right, ModifierSet ms) { - super(left, right, ms); - } - - byte[] opType1() { - byte[] op = new byte[5]; - putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator - putLen(2, op, 2); - putTag(CONTEXT, 2, PRIMITIVE, op, 3); // and-not - putLen(0, op, 4); - return op; - } -} diff --git a/src/org/z3950/zing/cql/CQLOrNode.java b/src/org/z3950/zing/cql/CQLOrNode.java deleted file mode 100644 index 9e7d66e..0000000 --- a/src/org/z3950/zing/cql/CQLOrNode.java +++ /dev/null @@ -1,28 +0,0 @@ -// $Id: CQLOrNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - -package org.z3950.zing.cql; - - -/** - * Represents an OR node in a CQL parse-tree. - * - * @version $Id: CQLOrNode.java,v 1.9 2007-06-29 12:48:21 mike Exp $ - */ -public class CQLOrNode extends CQLBooleanNode { - /** - * Creates a new OR node with the specified left- and right-hand - * sides and modifiers. - */ - public CQLOrNode(CQLNode left, CQLNode right, ModifierSet ms) { - super(left, right, ms); - } - - byte[] opType1() { - byte[] op = new byte[5]; - putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator - putLen(2, op, 2); - putTag(CONTEXT, 1, PRIMITIVE, op, 3); // or - putLen(0, op, 4); - return op; - } -} diff --git a/src/org/z3950/zing/cql/CQLParseException.java b/src/org/z3950/zing/cql/CQLParseException.java deleted file mode 100644 index 845dfff..0000000 --- a/src/org/z3950/zing/cql/CQLParseException.java +++ /dev/null @@ -1,23 +0,0 @@ -// $Id: CQLParseException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that an error ocurred parsing CQL. - * - * @version $Id: CQLParseException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - */ -public class CQLParseException extends Exception { - /** - * Creates a new CQLParseException. - * @param s - * An error message describing the problem with the query, - * usually a syntax error of some kind. - */ - public CQLParseException(String s) { - super(s); - } -} - diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java deleted file mode 100644 index 58e0326..0000000 --- a/src/org/z3950/zing/cql/CQLParser.java +++ /dev/null @@ -1,447 +0,0 @@ -// $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ - -package org.z3950.zing.cql; -import java.io.IOException; -import java.util.Vector; -import java.util.Properties; -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; - - -/** - * Compiles CQL strings into parse trees of CQLNode subtypes. - * - * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ - * @see http://zing.z3950.org/cql/index.html - */ -public class CQLParser { - private CQLLexer lexer; - private int compat; // When false, implement CQL 1.2 - public static int V1POINT1 = 12368; - public static int V1POINT2 = 12369; - public static int V1POINT1SORT = 12370; - - static private boolean DEBUG = false; - static private boolean LEXDEBUG = false; - - /** - * The new parser implements a dialect of CQL specified by the - * compat argument: - *

    - *
  • V1POINT1 - CQL version 1.1 - *
  • - *
  • V1POINT2 - CQL version 1.2 - *
  • - *
  • V1POINT1SORT - CQL version 1.1 but including - * sortby as specified for CQL 1.2. - *
  • - *
- */ - public CQLParser(int compat) { - this.compat = compat; - } - - /** - * The new parser implements CQL 1.2 - */ - public CQLParser() { - this.compat = V1POINT2; - } - - private static void debug(String str) { - if (DEBUG) - System.err.println("PARSEDEBUG: " + str); - } - - /** - * Compiles a CQL query. - *

- * The resulting parse tree may be further processed by hand (see - * the individual node-types' documentation for details on the - * data structure) or, more often, simply rendered out in the - * desired form using one of the back-ends. toCQL() - * returns a decompiled CQL query equivalent to the one that was - * compiled in the first place; toXCQL() returns an - * XML snippet representing the query; and toPQF() - * returns the query rendered in Index Data's Prefix Query - * Format. - * - * @param cql The query - * @return A CQLNode object which is the root of a parse - * tree representing the query. */ - public CQLNode parse(String cql) - throws CQLParseException, IOException { - lexer = new CQLLexer(cql, LEXDEBUG); - - lexer.nextToken(); - debug("about to parseQuery()"); - CQLNode root = parseTopLevelPrefixes("cql.serverChoice", - new CQLRelation(compat == V1POINT2 ? "=" : "scr")); - if (lexer.ttype != lexer.TT_EOF) - throw new CQLParseException("junk after end: " + lexer.render()); - - return root; - } - - private CQLNode parseTopLevelPrefixes(String index, CQLRelation relation) - throws CQLParseException, IOException { - debug("top-level prefix mapping"); - - if (lexer.ttype == '>') { - return parsePrefix(index, relation, true); - } - - CQLNode node = parseQuery(index, relation); - if ((compat == V1POINT2 || compat == V1POINT1SORT) && - lexer.ttype == lexer.TT_SORTBY) { - match(lexer.ttype); - debug("sortspec"); - - CQLSortNode sortnode = new CQLSortNode(node); - while (lexer.ttype != lexer.TT_EOF) { - String sortindex = matchSymbol("sort index"); - ModifierSet ms = gatherModifiers(sortindex); - sortnode.addSortIndex(ms); - } - - if (sortnode.keys.size() == 0) { - throw new CQLParseException("no sort keys"); - } - - node = sortnode; - } - - return node; - } - - private CQLNode parseQuery(String index, CQLRelation relation) - throws CQLParseException, IOException { - debug("in parseQuery()"); - - CQLNode term = parseTerm(index, relation); - while (lexer.ttype != lexer.TT_EOF && - lexer.ttype != ')' && - lexer.ttype != lexer.TT_SORTBY) { - if (lexer.ttype == lexer.TT_AND || - lexer.ttype == lexer.TT_OR || - lexer.ttype == lexer.TT_NOT || - lexer.ttype == lexer.TT_PROX) { - int type = lexer.ttype; - String val = lexer.sval; - match(type); - ModifierSet ms = gatherModifiers(val); - CQLNode term2 = parseTerm(index, relation); - term = ((type == lexer.TT_AND) ? new CQLAndNode(term, term2, ms) : - (type == lexer.TT_OR) ? new CQLOrNode (term, term2, ms) : - (type == lexer.TT_NOT) ? new CQLNotNode(term, term2, ms) : - new CQLProxNode(term, term2, ms)); - } else { - throw new CQLParseException("expected boolean, got " + - lexer.render()); - } - } - - debug("no more ops"); - return term; - } - - private ModifierSet gatherModifiers(String base) - throws CQLParseException, IOException { - debug("in gatherModifiers()"); - - ModifierSet ms = new ModifierSet(base); - while (lexer.ttype == '/') { - match('/'); - if (lexer.ttype != lexer.TT_WORD) - throw new CQLParseException("expected modifier, " - + "got " + lexer.render()); - String type = lexer.sval.toLowerCase(); - match(lexer.ttype); - if (!isRelation()) { - // It's a simple modifier consisting of type only - ms.addModifier(type); - } else { - // It's a complex modifier of the form type=value - String comparision = lexer.render(lexer.ttype, false); - match(lexer.ttype); - String value = matchSymbol("modifier value"); - ms.addModifier(type, comparision, value); - } - } - - return ms; - } - - private CQLNode parseTerm(String index, CQLRelation relation) - throws CQLParseException, IOException { - debug("in parseTerm()"); - - String word; - while (true) { - if (lexer.ttype == '(') { - debug("parenthesised term"); - match('('); - CQLNode expr = parseQuery(index, relation); - match(')'); - return expr; - } else if (lexer.ttype == '>') { - return parsePrefix(index, relation, false); - } - - debug("non-parenthesised term"); - word = matchSymbol("index or term"); - if (!isRelation() && lexer.ttype != lexer.TT_WORD) - break; - - index = word; - String relstr = (lexer.ttype == lexer.TT_WORD ? - lexer.sval : lexer.render(lexer.ttype, false)); - relation = new CQLRelation(relstr); - match(lexer.ttype); - ModifierSet ms = gatherModifiers(relstr); - relation.setModifiers(ms); - debug("index='" + index + ", " + - "relation='" + relation.toCQL() + "'"); - } - - CQLTermNode node = new CQLTermNode(index, relation, word); - debug("made term node " + node.toCQL()); - return node; - } - - private CQLNode parsePrefix(String index, CQLRelation relation, - boolean topLevel) - throws CQLParseException, IOException { - debug("prefix mapping"); - - match('>'); - String name = null; - String identifier = matchSymbol("prefix-name"); - if (lexer.ttype == '=') { - match('='); - name = identifier; - identifier = matchSymbol("prefix-identifer"); - } - CQLNode node = topLevel ? - parseTopLevelPrefixes(index, relation) : - parseQuery(index, relation); - - return new CQLPrefixNode(name, identifier, node); - } - - // Checks for a relation - private boolean isRelation() { - debug("isRelation: checking ttype=" + lexer.ttype + - " (" + lexer.render() + ")"); - return (lexer.ttype == '<' || - lexer.ttype == '>' || - lexer.ttype == '=' || - lexer.ttype == lexer.TT_LE || - lexer.ttype == lexer.TT_GE || - lexer.ttype == lexer.TT_NE || - lexer.ttype == lexer.TT_EQEQ); - } - - private void match(int token) - throws CQLParseException, IOException { - debug("in match(" + lexer.render(token, true) + ")"); - if (lexer.ttype != token) - throw new CQLParseException("expected " + - lexer.render(token, true) + - ", " + "got " + lexer.render()); - int tmp = lexer.nextToken(); - debug("match() got token=" + lexer.ttype + ", " + - "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" + - " (tmp=" + tmp + ")"); - } - - private String matchSymbol(String expected) - throws CQLParseException, IOException { - - debug("in matchSymbol()"); - if (lexer.ttype == lexer.TT_WORD || - lexer.ttype == lexer.TT_NUMBER || - lexer.ttype == '"' || - // The following is a complete list of keywords. Because - // they're listed here, they can be used unquoted as - // indexes, terms, prefix names and prefix identifiers. - // ### Instead, we should ask the lexer whether what we - // have is a keyword, and let the knowledge reside there. - lexer.ttype == lexer.TT_AND || - lexer.ttype == lexer.TT_OR || - lexer.ttype == lexer.TT_NOT || - lexer.ttype == lexer.TT_PROX || - lexer.ttype == lexer.TT_SORTBY) { - String symbol = (lexer.ttype == lexer.TT_NUMBER) ? - lexer.render() : lexer.sval; - match(lexer.ttype); - return symbol; - } - - throw new CQLParseException("expected " + expected + ", " + - "got " + lexer.render()); - } - - - /** - * Simple test-harness for the CQLParser class. - *

- * Reads a CQL query either from its command-line argument, if - * there is one, or standard input otherwise. So these two - * invocations are equivalent: - *

-     *  CQLParser 'au=(Kerninghan or Ritchie) and ti=Unix'
-     *  echo au=(Kerninghan or Ritchie) and ti=Unix | CQLParser
-     * 
- * The test-harness parses the supplied query and renders is as - * XCQL, so that both of the invocations above produce the - * following output: - *
-     *	<triple>
-     *	  <boolean>
-     *	    <value>and</value>
-     *	  </boolean>
-     *	  <triple>
-     *	    <boolean>
-     *	      <value>or</value>
-     *	    </boolean>
-     *	    <searchClause>
-     *	      <index>au</index>
-     *	      <relation>
-     *	        <value>=</value>
-     *	      </relation>
-     *	      <term>Kerninghan</term>
-     *	    </searchClause>
-     *	    <searchClause>
-     *	      <index>au</index>
-     *	      <relation>
-     *	        <value>=</value>
-     *	      </relation>
-     *	      <term>Ritchie</term>
-     *	    </searchClause>
-     *	  </triple>
-     *	  <searchClause>
-     *	    <index>ti</index>
-     *	    <relation>
-     *	      <value>=</value>
-     *	    </relation>
-     *	    <term>Unix</term>
-     *	  </searchClause>
-     *	</triple>
-     * 
- *

- * @param -1 - * CQL version 1.1 (default version 1.2) - * @param -d - * Debug mode: extra output written to stderr. - * @param -c - * Causes the output to be written in CQL rather than XCQL - that - * is, a query equivalent to that which was input, is output. In - * effect, the test harness acts as a query canonicaliser. - * @return - * The input query, either as XCQL [default] or CQL [if the - * -c option is supplied]. - */ - public static void main (String[] args) { - char mode = 'x'; // x=XCQL, c=CQL, p=PQF - String pfile = null; - - Vector argv = new Vector(); - for (int i = 0; i < args.length; i++) { - argv.add(args[i]); - } - - int compat = V1POINT2; - if (argv.size() > 0 && argv.get(0).equals("-1")) { - compat = V1POINT1; - argv.remove(0); - } - - if (argv.size() > 0 && argv.get(0).equals("-d")) { - DEBUG = true; - argv.remove(0); - } - - if (argv.size() > 0 && argv.get(0).equals("-c")) { - mode = 'c'; - argv.remove(0); - } else if (argv.size() > 1 && argv.get(0).equals("-p")) { - mode = 'p'; - argv.remove(0); - pfile = (String) argv.get(0); - argv.remove(0); - } - - if (argv.size() > 1) { - System.err.println("Usage: CQLParser [-1] [-d] [-c] " + - "[-p []"); - System.err.println("If unspecified, query is read from stdin"); - System.exit(1); - } - - String cql; - if (argv.size() == 1) { - cql = (String) argv.get(0); - } else { - byte[] bytes = new byte[10000]; - try { - // Read in the whole of standard input in one go - int nbytes = System.in.read(bytes); - } catch (IOException ex) { - System.err.println("Can't read query: " + ex.getMessage()); - System.exit(2); - } - cql = new String(bytes); - } - - CQLParser parser = new CQLParser(compat); - CQLNode root = null; - try { - root = parser.parse(cql); - } catch (CQLParseException ex) { - System.err.println("Syntax error: " + ex.getMessage()); - System.exit(3); - } catch (IOException ex) { - System.err.println("Can't compile query: " + ex.getMessage()); - System.exit(4); - } - - try { - if (mode == 'c') { - System.out.println(root.toCQL()); - } else if (mode == 'p') { - InputStream f = new FileInputStream(pfile); - if (f == null) - throw new FileNotFoundException(pfile); - - Properties config = new Properties(); - config.load(f); - f.close(); - System.out.println(root.toPQF(config)); - } else { - System.out.print(root.toXCQL(0)); - } - } catch (IOException ex) { - System.err.println("Can't render query: " + ex.getMessage()); - System.exit(5); - } catch (UnknownIndexException ex) { - System.err.println("Unknown index: " + ex.getMessage()); - System.exit(6); - } catch (UnknownRelationException ex) { - System.err.println("Unknown relation: " + ex.getMessage()); - System.exit(7); - } catch (UnknownRelationModifierException ex) { - System.err.println("Unknown relation modifier: " + - ex.getMessage()); - System.exit(8); - } catch (UnknownPositionException ex) { - System.err.println("Unknown position: " + ex.getMessage()); - System.exit(9); - } catch (PQFTranslationException ex) { - // We catch all of this class's subclasses, so -- - throw new Error("can't get a PQFTranslationException"); - } - } -} diff --git a/src/org/z3950/zing/cql/CQLPrefix.java b/src/org/z3950/zing/cql/CQLPrefix.java deleted file mode 100644 index af7c906..0000000 --- a/src/org/z3950/zing/cql/CQLPrefix.java +++ /dev/null @@ -1,34 +0,0 @@ -// $Id: CQLPrefix.java,v 1.5 2007-06-27 22:39:55 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.String; - -/** - * Represents a CQL prefix mapping from short name to long identifier. - * - * @version $Id: CQLPrefix.java,v 1.5 2007-06-27 22:39:55 mike Exp $ - */ -public class CQLPrefix { - /** - * The short name of the prefix mapping. That is, the prefix - * itself, such as dc, as it might be used in an index - * like dc.title. - */ - public String name; - - /** - * The full identifier name of the prefix mapping. That is, - * typically, a URI permanently allocated to a specific index - * set, such as http://zthes.z3950.org/cql/1.0. - */ - public String identifier; - - /** - * Creates a new CQLPrefix mapping, which maps the specified name - * to the specified identifier. - */ - CQLPrefix(String name, String identifier) { - this.name = name; - this.identifier = identifier; - } -} diff --git a/src/org/z3950/zing/cql/CQLPrefixNode.java b/src/org/z3950/zing/cql/CQLPrefixNode.java deleted file mode 100644 index dd01d85..0000000 --- a/src/org/z3950/zing/cql/CQLPrefixNode.java +++ /dev/null @@ -1,68 +0,0 @@ -// $Id: CQLPrefixNode.java,v 1.10 2007-07-03 16:40:11 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.String; -import java.util.Properties; -import java.util.Vector; - - -/** - * Represents a prefix node in a CQL parse-tree. - * - * @version $Id: CQLPrefixNode.java,v 1.10 2007-07-03 16:40:11 mike Exp $ - */ -public class CQLPrefixNode extends CQLNode { - /** - * The prefix definition that governs the subtree. - */ - public CQLPrefix prefix; - - /** - * The root of a parse-tree representing the part of the query - * that is governed by this prefix definition. - */ - public CQLNode subtree; - - /** - * Creates a new CQLPrefixNode inducing a mapping from the - * specified index-set name to the specified identifier across - * the specified subtree. - */ - public CQLPrefixNode(String name, String identifier, CQLNode subtree) { - this.prefix = new CQLPrefix(name, identifier); - this.subtree = subtree; - } - - public String toXCQL(int level, Vector prefixes, - Vector sortkeys) { - Vector tmp = (prefixes == null ? - new Vector() : - new Vector(prefixes)); - tmp.add(prefix); - return subtree.toXCQL(level, tmp, sortkeys); - } - - public String toCQL() { - // ### We don't always need parens around the subtree - if (prefix.name == null) { - return ">\"" + prefix.identifier + "\" " + - "(" + subtree.toCQL() + ")"; - } else { - return ">" + prefix.name + "=\"" + prefix.identifier + "\" " + - "(" + subtree.toCQL() + ")"; - } - } - - public String toPQF(Properties config) throws PQFTranslationException { - // Prefixes and their identifiers don't actually play any role - // in PQF translation, since the meanings of the indexes, - // including their prefixes if any, are instead wired into - // `config'. - return subtree.toPQF(config); - } - - public byte[] toType1BER(Properties config) throws PQFTranslationException { - // See comment on toPQF() - return subtree.toType1BER(config); - } -} diff --git a/src/org/z3950/zing/cql/CQLProxNode.java b/src/org/z3950/zing/cql/CQLProxNode.java deleted file mode 100644 index 2ce355f..0000000 --- a/src/org/z3950/zing/cql/CQLProxNode.java +++ /dev/null @@ -1,127 +0,0 @@ -// $Id: CQLProxNode.java,v 1.14 2007-06-29 12:53:03 mike Exp $ - -package org.z3950.zing.cql; - - -/** - * Represents a proximity node in a CQL parse-tree. - * The left- and right-hand-sides must be satisfied by parts of the - * candidate records which are sufficiently close to each other, as - * specified by a set of proximity parameters. - * - * @version $Id: CQLProxNode.java,v 1.14 2007-06-29 12:53:03 mike Exp $ - */ -public class CQLProxNode extends CQLBooleanNode { - /** - * Creates a new PROX node with the specified left- and right-hand - * sides and modifiers. - */ - public CQLProxNode(CQLNode left, CQLNode right, ModifierSet ms) { - super(left, right, ms); - } - - /* - * proximity ::= exclusion distance ordered relation which-code unit-code. - * exclusion ::= '1' | '0' | 'void'. - * distance ::= integer. - * ordered ::= '1' | '0'. - * relation ::= integer. - * which-code ::= 'known' | 'private' | integer. - * unit-code ::= integer. - */ - String opPQF() { - int relCode = getRelCode(); - int unitCode = getProxUnitCode(); - - String res = "prox " + - "0 " + - ms.modifier("distance") + " " + - (ms.modifier("ordering").equals("ordered") ? 1 : 0) + " " + - relCode + " " + - "1 " + - unitCode; - - return res; - } - - private int getRelCode() { - String rel = ms.modifier("relation"); - if (rel.equals("<")) { - return 1; - } else if (rel.equals("<=")) { - return 2; - } else if (rel.equals("=")) { - return 3; - } else if (rel.equals(">=")) { - return 4; - } else if (rel.equals(">")) { - return 5; - } else if (rel.equals("<>")) { - return 6; - } - return 0; - } - - private int getProxUnitCode() { - String unit = ms.modifier("unit"); - if (unit.equals("word")) { - return 2; - } else if (unit.equals("sentence")) { - return 3; - } else if (unit.equals("paragraph")) { - return 4; - } else if (unit.equals("element")) { - return 8; - } - return 0; - } - - - byte[] opType1() { - byte[] op = new byte[100]; - int offset, value; - offset = putTag(CONTEXT, 46, CONSTRUCTED, op, 0); // Operator - op[offset++] = (byte)(0x80&0xff); // indefinite length - - offset = putTag(CONTEXT, 3, CONSTRUCTED, op, offset); // prox - op[offset++] = (byte)(0x80&0xff); // indefinite length - - offset = putTag(CONTEXT, 1, PRIMITIVE, op, offset); // exclusion - value = 0; // false - offset = putLen(numLen(value), op, offset); - offset = putNum(value, op, offset); - - offset = putTag(CONTEXT, 2, PRIMITIVE, op, offset); // distance - value = Integer.parseInt(ms.modifier("distance")); - offset = putLen(numLen(value), op, offset); - offset = putNum(value, op, offset); - - offset = putTag(CONTEXT, 3, PRIMITIVE, op, offset); // ordered - value = ms.modifier("ordering").equals("ordered") ? 1 : 0; - offset = putLen(numLen(value), op, offset); - offset = putNum(value, op, offset); - - offset = putTag(CONTEXT, 4, PRIMITIVE, op, offset); // relationType - value = getRelCode(); - offset = putLen(numLen(value), op, offset); - offset = putNum(value, op, offset); - - offset = putTag(CONTEXT, 5, CONSTRUCTED, op, offset); // proximityUnitCode - op[offset++] = (byte)(0x80&0xff); // indefinite length - offset = putTag(CONTEXT, 1, PRIMITIVE, op, offset); // known - value = getProxUnitCode(); - offset = putLen(numLen(value), op, offset); - offset = putNum(value, op, offset); - op[offset++] = 0x00; // end of proximityUnitCode - op[offset++] = 0x00; - - op[offset++] = 0x00; // end of prox - op[offset++] = 0x00; - op[offset++] = 0x00; // end of Operator - op[offset++] = 0x00; - - byte[] o = new byte[offset]; - System.arraycopy(op, 0, o, 0, offset); - return o; - } -} diff --git a/src/org/z3950/zing/cql/CQLRelation.java b/src/org/z3950/zing/cql/CQLRelation.java deleted file mode 100644 index 570afff..0000000 --- a/src/org/z3950/zing/cql/CQLRelation.java +++ /dev/null @@ -1,76 +0,0 @@ -// $Id: CQLRelation.java,v 1.19 2007-07-03 13:40:58 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Vector; -import java.util.Properties; -import java.lang.StringBuffer; - -/** - * Represents a relation between a CQL index and term. - * - * @version $Id: CQLRelation.java,v 1.19 2007-07-03 13:40:58 mike Exp $ - */ -public class CQLRelation extends CQLNode { - ModifierSet ms; - - /** - * Creates a new CQLRelation with the specified base relation. - * Typical base relations include the usual six ordering relations - * (<=, >, etc.), the text - * relations any, all and exact, the - * old server-choice relation scr and profiled relations of - * the form prefix.name. - */ - // ### Seems wrong: a modifier set should not have a base, a - // relation should - public CQLRelation(String base) { - ms = new ModifierSet(base); - } - - /** - * Returns the base relation with which the CQLRelation was - * originally created. - */ - public String getBase() { - return ms.getBase(); - } - - /** - * Sets the modifiers of the specified CQLRelation. - * Typical relation modifiers include relevant, - * fuzzy, stem and phonetic. On the - * whole, these modifiers have a meaningful interpretation only - * for the text relations. - */ - public void setModifiers(ModifierSet ms) { - this.ms = ms; - } - - /** - * Returns an array of the modifiers associated with a CQLRelation. - * @return - * An array of Modifier objects. - */ - public Vector getModifiers() { - return ms.getModifiers(); - } - - public String toXCQL(int level, Vector prefixes, Vector sortkeys) { - if (sortkeys != null) - throw new Error("CQLRelation.toXCQL() called with sortkeys"); - - return ms.toXCQL(level, "relation"); - } - - public String toCQL() { - return ms.toCQL(); - } - - public String toPQF(Properties config) throws PQFTranslationException { - throw new Error("CQLRelation.toPQF() can never be called"); - } - - public byte[] toType1BER(Properties config) { - throw new Error("CQLRelation.toType1BER() can never be called"); - } -} diff --git a/src/org/z3950/zing/cql/CQLSortNode.java b/src/org/z3950/zing/cql/CQLSortNode.java deleted file mode 100644 index 6e7f21d..0000000 --- a/src/org/z3950/zing/cql/CQLSortNode.java +++ /dev/null @@ -1,72 +0,0 @@ -// $Id: CQLSortNode.java,v 1.2 2008-04-11 12:05:15 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Properties; -import java.util.Vector; - - -/** - * Represents a sort node in a CQL parse-tree. - * - * @version $Id: CQLSortNode.java,v 1.2 2008-04-11 12:05:15 mike Exp $ - */ -public class CQLSortNode extends CQLNode { - /** - * The root of a subtree representing the query whose result is to - * be sorted. - */ - public CQLNode subtree; - - /** - * The set of sort keys by which results are to be sorted, - * each expressed as an index together with zero or more - * modifiers. - */ - Vector keys; - - public CQLSortNode(CQLNode subtree) { - this.subtree = subtree; - keys = new Vector(); - } - - public void addSortIndex(ModifierSet key) { - keys.add(key); - } - - public Vector getSortIndexes() { - return keys; - } - - public String toXCQL(int level, Vector prefixes, - Vector sortkeys) { - if (sortkeys != null) - throw new Error("CQLSortNode.toXCQL() called with sortkeys"); - return subtree.toXCQL(level, prefixes, keys); - } - - public String toCQL() { - StringBuffer buf = new StringBuffer(subtree.toCQL()); - - if (keys != null) { - buf.append(" sortby"); - for (int i = 0; i < keys.size(); i++) { - ModifierSet key = keys.get(i); - buf.append(" " + key.toCQL()); - } - } - - return buf.toString(); - } - - public String toPQF(Properties config) throws PQFTranslationException { - return "@attr 1=oops \"###\""; - } - - public byte[] toType1BER(Properties config) - throws PQFTranslationException { - // There is no way to represent sorting in a standard Z39.50 - // Type-1 query, so the best we can do is return the - // underlying query and ignore the sort-specification. - return subtree.toType1BER(config); - } -} diff --git a/src/org/z3950/zing/cql/CQLTermNode.java b/src/org/z3950/zing/cql/CQLTermNode.java deleted file mode 100644 index f9b17ac..0000000 --- a/src/org/z3950/zing/cql/CQLTermNode.java +++ /dev/null @@ -1,280 +0,0 @@ -// $Id: CQLTermNode.java,v 1.28 2007-07-03 13:41:24 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Properties; -import java.util.Vector; - - -/** - * Represents a terminal node in a CQL parse-tree. - * A term node consists of the term String itself, together with, - * optionally, an index string and a relation. Neither or both of - * these must be provided - you can't have an index without a - * relation or vice versa. - * - * @version $Id: CQLTermNode.java,v 1.28 2007-07-03 13:41:24 mike Exp $ - */ -public class CQLTermNode extends CQLNode { - private String index; - private CQLRelation relation; - private String term; - - /** - * Creates a new term node with the specified index, - * relation and term. The first two may be - * null, but the term may not. - */ - public CQLTermNode(String index, CQLRelation relation, String term) { - this.index = index; - this.relation = relation; - this.term = term; - } - - public String getIndex() { return index; } - public CQLRelation getRelation() { return relation; } - public String getTerm() { return term; } - - private static boolean isResultSetIndex(String qual) { - return (qual.equals("srw.resultSet") || - qual.equals("srw.resultSetId") || - qual.equals("srw.resultSetName") || - qual.equals("cql.resultSet") || - qual.equals("cql.resultSetId") || - qual.equals("cql.resultSetName")); - } - - public String getResultSetName() { - if (isResultSetIndex(index)) - return term; - else - return null; - } - - public String toXCQL(int level, Vector prefixes, - Vector sortkeys) { - return (indent(level) + "\n" + - renderPrefixes(level+1, prefixes) + - indent(level+1) + "" + xq(index) + "\n" + - relation.toXCQL(level+1) + - indent(level+1) + "" + xq(term) + "\n" + - renderSortKeys(level+1, sortkeys) + - indent(level) + "\n"); - } - - public String toCQL() { - String quotedIndex = maybeQuote(index); - String quotedTerm = maybeQuote(term); - String res = quotedTerm; - - if (index != null && - !index.equalsIgnoreCase("srw.serverChoice") && - !index.equalsIgnoreCase("cql.serverChoice")) { - // ### We don't always need spaces around `relation'. - res = quotedIndex + " " + relation.toCQL() + " " + quotedTerm; - } - - return res; - } - - // ### Interaction between this and its callers is not good as - // regards truncation of the term and generation of truncation - // attributes. Change the interface to fix this. - private Vector getAttrs(Properties config) throws PQFTranslationException { - Vector attrs = new Vector(); - - // Do this first so that if any other truncation or - // completeness attributes are generated, they "overwrite" - // those specified here. - // - // ### This approach relies on an unpleasant detail of Index - // Data's (admittedly definitive) implementation of PQF, - // and should not relied upon. - // - String attr = config.getProperty("always"); - if (attr != null) - attrs.add(attr); - - attr = config.getProperty("index." + index); - if (attr == null) - throw new UnknownIndexException(index); - attrs.add(attr); - - String rel = relation.getBase(); - if (rel.equals("=")) { - rel = "eq"; - } else if (rel.equals("<=")) { - rel = "le"; - } else if (rel.equals(">=")) { - rel = "ge"; - } - // ### Handling "any" and "all" properly would involve breaking - // the string down into a bunch of individual words and ORring - // or ANDing them together. Another day. - attr = config.getProperty("relation." + rel); - if (attr == null) - throw new UnknownRelationException(rel); - attrs.add(attr); - - Vector mods = relation.getModifiers(); - for (int i = 0; i < mods.size(); i++) { - String type = mods.get(i).type; - attr = config.getProperty("relationModifier." + type); - if (attr == null) - throw new UnknownRelationModifierException(type); - attrs.add(attr); - } - - String pos = "any"; - String text = term; - if (text.length() > 0 && text.substring(0, 1).equals("^")) { - text = text.substring(1); // ### change not seen by caller - pos = "first"; - } - int len = text.length(); - if (len > 0 && text.substring(len-1, len).equals("^")) { - text = text.substring(0, len-1); // ### change not seen by caller - pos = pos.equals("first") ? "firstAndLast" : "last"; - // ### in the firstAndLast case, the standard - // pqf.properties file specifies that we generate a - // completeness=whole-field attributem, which means that - // we don't generate a position attribute at all. Do we - // care? Does it matter? - } - - attr = config.getProperty("position." + pos); - if (attr == null) - throw new UnknownPositionException(pos); - attrs.add(attr); - - attr = config.getProperty("structure." + rel); - if (attr == null) - attr = config.getProperty("structure.*"); - attrs.add(attr); - - return attrs; - } - - public String toPQF(Properties config) throws PQFTranslationException { - if (isResultSetIndex(index)) { - // Special case: ignore relation, modifiers, wildcards, etc. - // There's parallel code in toType1BER() - return "@set " + maybeQuote(term); - } - - Vector attrs = getAttrs(config); - - String attr, s = ""; - for (int i = 0; i < attrs.size(); i++) { - attr = (String) attrs.get(i); - s += "@attr " + Utils.replaceString(attr, " ", " @attr ") + " "; - } - - String text = term; - if (text.length() > 0 && text.substring(0, 1).equals("^")) - text = text.substring(1); - int len = text.length(); - if (len > 0 && text.substring(len-1, len).equals("^")) - text = text.substring(0, len-1); - - return s + maybeQuote(text); - } - - static String maybeQuote(String str) { - if (str == null) - return null; - - // There _must_ be a better way to make this test ... - if (str.length() == 0 || - str.indexOf('"') != -1 || - str.indexOf(' ') != -1 || - str.indexOf('\t') != -1 || - str.indexOf('=') != -1 || - str.indexOf('<') != -1 || - str.indexOf('>') != -1 || - str.indexOf('/') != -1 || - str.indexOf('(') != -1 || - str.indexOf(')') != -1) { - str = '"' + Utils.replaceString(str, "\"", "\\\"") + '"'; - } - - return str; - } - - public byte[] toType1BER(Properties config) throws PQFTranslationException { - if (isResultSetIndex(index)) { - // Special case: ignore relation, modifiers, wildcards, etc. - // There's parallel code in toPQF() - byte[] operand = new byte[term.length()+100]; - int offset; - offset = putTag(CONTEXT, 0, CONSTRUCTED, operand, 0); // op - operand[offset++] = (byte)(0x80&0xff); // indefinite length - offset = putTag(CONTEXT, 31, PRIMITIVE, operand, offset); // ResultSetId - byte[] t = term.getBytes(); - offset = putLen(t.length, operand, offset); - System.arraycopy(t, 0, operand, offset, t.length); - offset += t.length; - operand[offset++] = 0x00; // end of Operand - operand[offset++] = 0x00; - byte[] o = new byte[offset]; - System.arraycopy(operand, 0, o, 0, offset); - return o; - } - - String text = term; - if (text.length() > 0 && text.substring(0, 1).equals("^")) - text = text.substring(1); - int len = text.length(); - if (len > 0 && text.substring(len-1, len).equals("^")) - text = text.substring(0, len-1); - - String attr, attrList, term = text; - byte[] operand = new byte[text.length()+100]; - int i, j, offset, type, value; - offset = putTag(CONTEXT, 0, CONSTRUCTED, operand, 0); // op - operand[offset++]=(byte)(0x80&0xff); // indefinite length - offset = putTag(CONTEXT, 102, CONSTRUCTED, operand, offset); // AttributesPlusTerm - operand[offset++] = (byte)(0x80&0xff); // indefinite length - offset = putTag(CONTEXT, 44, CONSTRUCTED, operand, offset); // AttributeList - operand[offset++] = (byte)(0x80&0xff); // indefinite length - - Vector attrs = getAttrs(config); - for(i = 0; i < attrs.size(); i++) { - attrList = (String) attrs.get(i); - java.util.StringTokenizer st = - new java.util.StringTokenizer(attrList); - while (st.hasMoreTokens()) { - attr = st.nextToken(); - j = attr.indexOf('='); - offset = putTag(UNIVERSAL, SEQUENCE, CONSTRUCTED, operand, offset); - operand[offset++] = (byte)(0x80&0xff); - offset = putTag(CONTEXT, 120, PRIMITIVE, operand, offset); - type = Integer.parseInt(attr.substring(0, j)); - offset = putLen(numLen(type), operand, offset); - offset = putNum(type, operand, offset); - - offset = putTag(CONTEXT, 121, PRIMITIVE, operand, offset); - value = Integer.parseInt(attr.substring(j+1)); - offset = putLen(numLen(value), operand, offset); - offset = putNum(value, operand, offset); - operand[offset++] = 0x00; // end of SEQUENCE - operand[offset++] = 0x00; - } - } - operand[offset++] = 0x00; // end of AttributeList - operand[offset++] = 0x00; - - offset = putTag(CONTEXT, 45, PRIMITIVE, operand, offset); // general Term - byte[] t = term.getBytes(); - offset = putLen(t.length, operand, offset); - System.arraycopy(t, 0, operand, offset, t.length); - offset += t.length; - - operand[offset++] = 0x00; // end of AttributesPlusTerm - operand[offset++] = 0x00; - operand[offset++] = 0x00; // end of Operand - operand[offset++] = 0x00; - byte[] o = new byte[offset]; - System.arraycopy(operand, 0, o, 0, offset); - return o; - } -} diff --git a/src/org/z3950/zing/cql/Makefile b/src/org/z3950/zing/cql/Makefile deleted file mode 100644 index 39c4f21..0000000 --- a/src/org/z3950/zing/cql/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -# $Id: Makefile,v 1.18 2007-07-03 12:56:29 mike Exp $ -# -# Your Java compiler will require that this source directory is on the -# classpath. The best way to do that is just to add the CQL-Java -# distribution's "src" subdirectory to your CLASSPATH environment -# variable, like this: -# CLASSPATH=$CLASSPATH:/where/ever/you/unpacked/it/cql-java-VERSION/src - -OBJ = Utils.class \ - CQLNode.class CQLTermNode.class CQLBooleanNode.class \ - CQLAndNode.class CQLOrNode.class CQLNotNode.class \ - CQLProxNode.class CQLPrefixNode.class CQLSortNode.class \ - CQLPrefix.class \ - CQLRelation.class Modifier.class ModifierSet.class \ - CQLParser.class CQLLexer.class CQLGenerator.class \ - CQLParseException.class MissingParameterException.class \ - PQFTranslationException.class \ - UnknownIndexException.class UnknownRelationException.class \ - UnknownRelationModifierException.class UnknownPositionException.class - -JARPATH = ../lib/cql-java.jar -JAR = ../../../../$(JARPATH) -$(JAR): $(OBJ) - cd ../../../..; jar cf $(JARPATH) org/z3950/zing/cql/*.class - -%.class: %.java - javac -Xlint:unchecked *.java - -test: $(JAR) - cd ../../../../../test/regression && make - -clean: - rm -f $(OBJ) 'CQLLexer$$Keyword.class' - diff --git a/src/org/z3950/zing/cql/MissingParameterException.java b/src/org/z3950/zing/cql/MissingParameterException.java deleted file mode 100644 index 3b54668..0000000 --- a/src/org/z3950/zing/cql/MissingParameterException.java +++ /dev/null @@ -1,21 +0,0 @@ -// $Id: MissingParameterException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that a required property was not specified. - * - * @version $Id: MissingParameterException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - */ -public class MissingParameterException extends Exception { - /** - * Creates a new MissingParameterException. - * @param s - * The name of the property whose value was required but not supplied. - */ - public MissingParameterException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/Modifier.java b/src/org/z3950/zing/cql/Modifier.java deleted file mode 100644 index 4992b25..0000000 --- a/src/org/z3950/zing/cql/Modifier.java +++ /dev/null @@ -1,87 +0,0 @@ -// $Id: Modifier.java,v 1.4 2007-07-03 13:29:34 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Vector; -import java.lang.StringBuffer; - -/** - * Represents a single modifier, consisting of three elements: a type, - * a comparision and a value. For example, "distance", "<", "3". The - * type is mandatory; either the comparison and value must both occur, - * or neither must. - *

- * This class is used only by ModifierSet. - * - * @version $Id: Modifier.java,v 1.4 2007-07-03 13:29:34 mike Exp $ - */ -public class Modifier { - String type; - String comparison; - String value; - - /** - * Creates a new Modifier with the specified type, comparison - * and value. - */ - public Modifier(String type, String comparison, String value) { - this.type = type; - this.comparison = comparison; - this.value = value; - //System.err.println("Made new modifier with " + "type='" + type + "', " + "comparison='" + comparison + "', " + "value='" + value + "',\n"); - } - - /** - * Creates a new Modifier with the specified type but no - * comparison or value. - */ - public Modifier(String type) { - this.type = type; - //System.err.println("Made new modifier of type '" + type + "'\n"); - } - - /** - * Returns the type with which the Modifier was created. - */ - public String getType() { - return type; - } - - /** - * Returns the comparison with which the Modifier was created. - */ - public String getComparison() { - return comparison; - } - - /** - * Returns the value with which the Modifier was created. - */ - public String getValue() { - return value; - } - - public String toXCQL(int level, String relationElement) { - StringBuffer buf = new StringBuffer(); - - buf.append(Utils.indent(level) + "\n"); - buf.append(Utils.indent(level+1) + - "" + Utils.xq(type) + "\n"); - if (value != null) { - buf.append(Utils.indent(level+1) + "<" + relationElement + ">" + - Utils.xq(comparison) + "\n"); - buf.append(Utils.indent(level+1) + - "" + Utils.xq(value) + "\n"); - } - - buf.append(Utils.indent(level) + "\n"); - return buf.toString(); - } - - public String toCQL() { - StringBuffer buf = new StringBuffer(type); - if (value != null) - buf.append(" " + comparison + " " + value); - - return buf.toString(); - } -} diff --git a/src/org/z3950/zing/cql/ModifierSet.java b/src/org/z3950/zing/cql/ModifierSet.java deleted file mode 100644 index 7b6991a..0000000 --- a/src/org/z3950/zing/cql/ModifierSet.java +++ /dev/null @@ -1,128 +0,0 @@ -// $Id: ModifierSet.java,v 1.13 2007-07-03 13:30:18 mike Exp $ - -package org.z3950.zing.cql; -import java.util.Vector; -import java.lang.StringBuffer; - -/** - * Represents a base String and a set of Modifiers. - *

- * This class is used as a workhorse delegate by both CQLRelation and - * CQLProxNode - two functionally very separate classes that happen to - * require similar data structures and functionality. - *

- * A ModifierSet consists of a ``base'' string together with a set of - * zero or more type comparison value pairs, - * where type, comparison and value are all strings. - * - * @version $Id: ModifierSet.java,v 1.13 2007-07-03 13:30:18 mike Exp $ - */ -public class ModifierSet { - String base; - Vector modifiers; - - /** - * Creates a new ModifierSet with the specified base. - */ - public ModifierSet(String base) { - this.base = base; - modifiers = new Vector(); - } - - /** - * Returns the base string with which the ModifierSet was created. - */ - public String getBase() { - return base; - } - - /** - * Adds a modifier of the specified type, - * comparison and value to a ModifierSet. - */ - public void addModifier(String type, String comparison, String value) { - Modifier modifier = new Modifier(type, comparison, value); - modifiers.add(modifier); - } - - /** - * Adds a modifier of the specified type, but with no - * comparison and value, to a ModifierSet. - */ - public void addModifier(String type) { - Modifier modifier = new Modifier(type); - modifiers.add(modifier); - } - - /** - * Returns the value of the modifier in the specified ModifierSet - * that corresponds to the specified type. - */ - public String modifier(String type) { - int n = modifiers.size(); - for (int i = 0; i < n; i++) { - Modifier mod = modifiers.get(i); - if (mod.type.equals(type)) - return mod.value; - } - return null; - } - - /** - * Returns an array of the modifiers in a ModifierSet. - * @return - * An array of Modifiers. - */ - public Vector getModifiers() { - return modifiers; - } - - public String toXCQL(int level, String topLevelElement) { - return underlyingToXCQL(level, topLevelElement, "value"); - } - - public String sortKeyToXCQL(int level) { - return underlyingToXCQL(level, "key", "index"); - } - - private String underlyingToXCQL(int level, String topLevelElement, - String valueElement) { - StringBuffer buf = new StringBuffer(); - buf.append(Utils.indent(level) + "<" + topLevelElement + ">\n"); - buf.append(Utils.indent(level+1) + - "<" + valueElement + ">" + Utils.xq(base) + - "\n"); - if (modifiers.size() > 0) { - buf.append(Utils.indent(level+1) + "\n"); - for (int i = 0; i < modifiers.size(); i++) { - buf.append(modifiers.get(i).toXCQL(level+2, "comparison")); - } - buf.append(Utils.indent(level+1) + "\n"); - } - buf.append(Utils.indent(level) + "\n"); - return buf.toString(); - } - - public String toCQL() { - StringBuffer buf = new StringBuffer(base); - for (int i = 0; i < modifiers.size(); i++) { - buf.append("/" + modifiers.get(i).toCQL()); - } - - return buf.toString(); - } - - public static void main(String[] args) { - if (args.length < 1) { - System.err.println("Usage: ModifierSet [ ]..."); - System.exit(1); - } - - ModifierSet res = new ModifierSet(args[0]); - for (int i = 1; i < args.length; i += 3) { - res.addModifier(args[i], args[i+1], args[i+2]); - } - - System.out.println(res.toCQL()); - } -} diff --git a/src/org/z3950/zing/cql/PQFTranslationException.java b/src/org/z3950/zing/cql/PQFTranslationException.java deleted file mode 100644 index 85ec184..0000000 --- a/src/org/z3950/zing/cql/PQFTranslationException.java +++ /dev/null @@ -1,16 +0,0 @@ -// $Id: PQFTranslationException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Base class for exceptions occurring when translating parse trees to PQF. - * - * @version $Id: PQFTranslationException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ - */ -public class PQFTranslationException extends Exception { - PQFTranslationException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/UnknownIndexException.java b/src/org/z3950/zing/cql/UnknownIndexException.java deleted file mode 100644 index b4201d3..0000000 --- a/src/org/z3950/zing/cql/UnknownIndexException.java +++ /dev/null @@ -1,26 +0,0 @@ -// $Id: UnknownIndexException.java,v 1.2 2007-06-27 22:44:40 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that an index was not recognised. - * At compilation time, we accept any syntactically valid index; - * but when rendering a tree out as PQF, we need to translate the - * indexes into sets of Type-1 query attributes. If we can't do - * that, because the PQF configuration doesn't know about a relation, - * we throw one of these babies. - * - * @version $Id: UnknownIndexException.java,v 1.2 2007-06-27 22:44:40 mike Exp $ - */ -public class UnknownIndexException extends PQFTranslationException { - /** - * Creates a new UnknownIndexException. - * @param s - * The index for which there was no PQF configuration. - */ - public UnknownIndexException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/UnknownPositionException.java b/src/org/z3950/zing/cql/UnknownPositionException.java deleted file mode 100644 index 5f38ab5..0000000 --- a/src/org/z3950/zing/cql/UnknownPositionException.java +++ /dev/null @@ -1,28 +0,0 @@ -// $Id: UnknownPositionException.java,v 1.2 2002-11-29 16:42:54 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that a position was not recognised. - * When rendering a tree out as PQF, each term is classified either as - * any, first, last or - * firstAndLast, depending on whether it begins and/or ends - * with the word-anchoring meta-character ^. Its - * classification is looked up as a position in the PQF - * configuration. If the position is not configured, we throw one of - * these babies. - * - * @version $Id: UnknownPositionException.java,v 1.2 2002-11-29 16:42:54 mike Exp $ - */ -public class UnknownPositionException extends PQFTranslationException { - /** - * Creates a new UnknownPositionException. - * @param s - * The position for which there was no PQF configuration. - */ - public UnknownPositionException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/UnknownRelationException.java b/src/org/z3950/zing/cql/UnknownRelationException.java deleted file mode 100644 index 4d65e72..0000000 --- a/src/org/z3950/zing/cql/UnknownRelationException.java +++ /dev/null @@ -1,26 +0,0 @@ -// $Id: UnknownRelationException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that a relation was not recognised. - * At compilation time, we accept any syntactically valid relation; - * but when rendering a tree out as PQF, we need to translate the - * relations into sets of Type-1 query attributes. If we can't do - * that, because the PQF configuration doesn't know about a relation, - * we throw one of these babies. - * - * @version $Id: UnknownRelationException.java,v 1.2 2002-11-06 20:13:45 mike Exp $ - */ -public class UnknownRelationException extends PQFTranslationException { - /** - * Creates a new UnknownRelationException. - * @param s - * The relation for which there was no PQF configuration. - */ - public UnknownRelationException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/UnknownRelationModifierException.java b/src/org/z3950/zing/cql/UnknownRelationModifierException.java deleted file mode 100644 index 9d449b5..0000000 --- a/src/org/z3950/zing/cql/UnknownRelationModifierException.java +++ /dev/null @@ -1,26 +0,0 @@ -// $Id: UnknownRelationModifierException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ - -package org.z3950.zing.cql; -import java.lang.Exception; - - -/** - * Exception indicating that a relation modifier was not recognised. - * At compilation time, we accept any syntactically valid relation modifier; - * but when rendering a tree out as PQF, we need to translate the - * relation modifiers into sets of Type-1 query attributes. If we can't do - * that, because the PQF configuration doesn't know about a relation modifier, - * we throw one of these babies. - * - * @version $Id: UnknownRelationModifierException.java,v 1.1 2002-11-06 20:13:45 mike Exp $ - */ -public class UnknownRelationModifierException extends PQFTranslationException { - /** - * Creates a new UnknownRelationModifierException. - * @param s - * The relation modifier for which there was no PQF configuration. - */ - public UnknownRelationModifierException(String s) { - super(s); - } -} diff --git a/src/org/z3950/zing/cql/Utils.java b/src/org/z3950/zing/cql/Utils.java deleted file mode 100644 index 6777e46..0000000 --- a/src/org/z3950/zing/cql/Utils.java +++ /dev/null @@ -1,51 +0,0 @@ -// $Id: Utils.java,v 1.2 2002-11-06 00:05:58 mike Exp $ - -package org.z3950.zing.cql; - - -/** - * Utility functions for the org.z3950.zing.cql package. - * Not intended for use outside this package. - * - * @version $Id: Utils.java,v 1.2 2002-11-06 00:05:58 mike Exp $ - */ -class Utils { - static String indent(int level) { - String x = ""; - while (level-- > 0) { - x += " "; - } - return x; - } - - // XML Quote -- - // s/&/&/g; - // s//>/g; - // This is hideously inefficient, but I just don't see a better - // way using the standard JAVA library. - // - static String xq(String str) { - str = replaceString(str, "&", "&"); - str = replaceString(str, "<", "<"); - str = replaceString(str, ">", ">"); - return str; - } - - // I can't _believe_ I have to write this by hand in 2002 ... - static String replaceString(String str, String from, String to) { - StringBuffer sb = new StringBuffer(); - int ix; // index of next `from' - int offset = 0; // index of previous `from' + length(from) - - while ((ix = str.indexOf(from, offset)) != -1) { - sb.append(str.substring(offset, ix)); - sb.append(to); - offset = ix + from.length(); - } - - // End of string: append last bit and we're done - sb.append(str.substring(offset)); - return sb.toString(); - } -}