*
* @author jakub
*/
-public interface CQLLexer {
-
- public static final int TT_EOF = -1;
- public static final int TT_WORD = -3;
- public static final int TT_NOTHING = -4;
-
- public final static int TT_LE = 1000; // The "<=" relation
- public final static int TT_GE = 1001; // The ">=" relation
- public final static int TT_NE = 1002; // The "<>" relation
- public final static int TT_EQEQ = 1003; // The "==" relation
- public final static int TT_AND = 1004; // The "and" boolean
- public final static int TT_OR = 1005; // The "or" boolean
- public final static int TT_NOT = 1006; // The "not" boolean
- public final static int TT_PROX = 1007; // The "prox" boolean
- public final static int TT_SORTBY = 1008; // The "sortby" operator
-
- public void move();
-
- public String value();
-
- public int what();
-
- public String render();
-
- public String render(int what, boolean quote);
-
- public int pos();
+public class CQLLexer implements CQLTokenizer {
+ private String qs;
+ private int qi;
+ private int ql;
+ private int what = TT_NOTHING;
+ private String val;
+ private String lval;
+ private StringBuilder buf = new StringBuilder();
+
+ public CQLLexer(String cql, boolean debug) {
+ qs = cql;
+ ql = cql.length();
+ }
+
+ @Override
+ public void move() {
+ //eat whitespace
+ while (qi < ql && strchr(" \t\r\n", qs.charAt(qi)))
+ qi++;
+ //eof
+ if (qi == ql) {
+ what = TT_EOF;
+ return;
+ }
+ //current char
+ char c = qs.charAt(qi);
+ //separators
+ if (strchr("()/", c)) {
+ what = c;
+ qi++;
+ //comparitor
+ } else if (strchr("<>=", c)) {
+ what = c;
+ qi++;
+ //two-char comparitor
+ if (qi < ql) {
+ char d = qs.charAt(qi);
+ String comp = String.valueOf((char) c) + String.valueOf((char) d);
+ if (comp.equals("==")) {
+ what = TT_EQEQ;
+ qi++;
+ }
+ else if (comp.equals("<=")) {
+ what = TT_LE;
+ qi++;
+ }
+ else if (comp.equals(">=")) {
+ what = TT_GE;
+ qi++;
+ }
+ else if (comp.equals("<>")) {
+ what = TT_NE;
+ qi++;
+ }
+ }
+ //quoted string
+ } else if (strchr("\"", c)) { //no single-quotes
+ what = '"';
+ //remember quote char
+ char mark = c;
+ qi++;
+ boolean escaped = false;
+ buf.setLength(0); //reset buffer
+ while (qi < ql) {
+ if (!escaped && qs.charAt(qi) == mark) //terminator
+ break;
+ if (escaped && strchr("*?^\\", qs.charAt(qi))) //no escaping for d-quote
+ buf.append("\\");
+ if (!escaped && qs.charAt(qi) == '\\') { //escape-char
+ escaped = true;
+ qi++;
+ continue;
+ }
+ escaped = false; //reset escape
+ buf.append(qs.charAt(qi));
+ qi++;
+ }
+ val = buf.toString();
+ lval = val.toLowerCase();
+ if (qi < ql)
+ qi++;
+ else //unterminated
+ what = TT_EOF; //notify error
+ //unquoted string
+ } else {
+ what = TT_WORD;
+ buf.setLength(0); //reset buffer
+ while (qi < ql
+ && !strchr("()/<>= \t\r\n", qs.charAt(qi))) {
+ buf.append(qs.charAt(qi));
+ qi++;
+ }
+ val = buf.toString();
+ lval = val.toLowerCase();
+ if (lval.equals("or")) what = TT_OR;
+ else if (lval.equals("and")) what = TT_AND;
+ else if (lval.equals("not")) what = TT_NOT;
+ else if (lval.equals("prox")) what = TT_PROX;
+ else if (lval.equals("sortby")) what = TT_SORTBY;
+ }
+ }
+
+ private boolean strchr(String s, char ch) {
+ return s.indexOf(ch) >= 0;
+ }
+
+ @Override
+ public String value() {
+ return val;
+ }
+
+ @Override
+ public int what() {
+ return what;
+ }
+
+ @Override
+ public String render() {
+ return render(what, true);
+ }
+
+ @Override
+ public String render(int token, boolean quoteChars) {
+ switch (token) {
+ case TT_EOF:
+ return "EOF";
+ case TT_WORD:
+ return "word: " + val;
+ case '"':
+ return "string: \"" + val + "\"";
+ case TT_LE:
+ return "<=";
+ case TT_GE:
+ return ">=";
+ case TT_NE:
+ return "<>";
+ case TT_EQEQ:
+ return "==";
+ case TT_AND:
+ return "and";
+ case TT_NOT:
+ return "not";
+ case TT_OR:
+ return "or";
+ case TT_PROX:
+ return "prox";
+ case TT_SORTBY:
+ return "sortby";
+ default:
+ //a single character, such as '(' or '/' or relation
+ String res = String.valueOf((char) token);
+ if (quoteChars)
+ res = "'" + res + "'";
+ return res;
+ }
+ }
+
+ @Override
+ public int pos() {
+ return qi;
+ }
+ public static void main(String[] args) throws Exception {
+ if (args.length > 1) {
+ System.err.println("Usage: CQLLexer [<CQL-query>]");
+ System.err.println("If unspecified, query is read from stdin");
+ System.exit(1);
+ }
+
+ String cql;
+ if (args.length == 1) {
+ cql = args[0];
+ } else {
+ byte[] bytes = new byte[10000];
+ try {
+ // Read in the whole of standard input in one go
+ int nbytes = System.in.read(bytes);
+ } catch (java.io.IOException ex) {
+ System.err.println("Can't read query: " + ex.getMessage());
+ System.exit(2);
+ }
+ cql = new String(bytes);
+ }
+
+ CQLTokenizer lexer = new CQLLexer(cql, true);
+ while ((lexer.what()) != TT_EOF) {
+ lexer.move();
+ System.out.println(lexer.render());
+ }
+ }
}
+++ /dev/null
-/*
- * Copyright (c) 1995-2014, Index Datassss
- * All rights reserved.
- * See the file LICENSE for details.
- */
-package org.z3950.zing.cql;
-
-/**
- *
- * @author jakub
- */
-public class CQLLexerSimple implements CQLLexer {
- private String qs;
- private int qi;
- private int ql;
- private int what = TT_NOTHING;
- private String val;
- private String lval;
- private StringBuilder buf = new StringBuilder();
-
- public CQLLexerSimple(String cql, boolean debug) {
- qs = cql;
- ql = cql.length();
- }
-
- @Override
- public void move() {
- //eat whitespace
- while (qi < ql && strchr(" \t\r\n", qs.charAt(qi)))
- qi++;
- //eof
- if (qi == ql) {
- what = TT_EOF;
- return;
- }
- //current char
- char c = qs.charAt(qi);
- //separators
- if (strchr("()/", c)) {
- what = c;
- qi++;
- //comparitor
- } else if (strchr("<>=", c)) {
- what = c;
- qi++;
- //two-char comparitor
- if (qi < ql) {
- char d = qs.charAt(qi);
- String comp = String.valueOf((char) c) + String.valueOf((char) d);
- if (comp.equals("==")) {
- what = TT_EQEQ;
- qi++;
- }
- else if (comp.equals("<=")) {
- what = TT_LE;
- qi++;
- }
- else if (comp.equals(">=")) {
- what = TT_GE;
- qi++;
- }
- else if (comp.equals("<>")) {
- what = TT_NE;
- qi++;
- }
- }
- //quoted string
- } else if (strchr("\"", c)) { //no single-quotes
- what = '"';
- //remember quote char
- char mark = c;
- qi++;
- boolean escaped = false;
- buf.setLength(0); //reset buffer
- while (qi < ql) {
- if (!escaped && qs.charAt(qi) == mark) //terminator
- break;
- if (escaped && strchr("*?^\\", qs.charAt(qi))) //no escaping for d-quote
- buf.append("\\");
- if (!escaped && qs.charAt(qi) == '\\') { //escape-char
- escaped = true;
- qi++;
- continue;
- }
- escaped = false; //reset escape
- buf.append(qs.charAt(qi));
- qi++;
- }
- val = buf.toString();
- lval = val.toLowerCase();
- if (qi < ql)
- qi++;
- else //unterminated
- what = TT_EOF; //notify error
- //unquoted string
- } else {
- what = TT_WORD;
- buf.setLength(0); //reset buffer
- while (qi < ql
- && !strchr("()/<>= \t\r\n", qs.charAt(qi))) {
- buf.append(qs.charAt(qi));
- qi++;
- }
- val = buf.toString();
- lval = val.toLowerCase();
- if (lval.equals("or")) what = TT_OR;
- else if (lval.equals("and")) what = TT_AND;
- else if (lval.equals("not")) what = TT_NOT;
- else if (lval.equals("prox")) what = TT_PROX;
- else if (lval.equals("sortby")) what = TT_SORTBY;
- }
- }
-
- private boolean strchr(String s, char ch) {
- return s.indexOf(ch) >= 0;
- }
-
- @Override
- public String value() {
- return val;
- }
-
- @Override
- public int what() {
- return what;
- }
-
- @Override
- public String render() {
- return render(what, true);
- }
-
- @Override
- public String render(int token, boolean quoteChars) {
- switch (token) {
- case TT_EOF:
- return "EOF";
- case TT_WORD:
- return "word: " + val;
- case '"':
- return "string: \"" + val + "\"";
- case TT_LE:
- return "<=";
- case TT_GE:
- return ">=";
- case TT_NE:
- return "<>";
- case TT_EQEQ:
- return "==";
- case TT_AND:
- return "and";
- case TT_NOT:
- return "not";
- case TT_OR:
- return "or";
- case TT_PROX:
- return "prox";
- case TT_SORTBY:
- return "sortby";
- default:
- //a single character, such as '(' or '/' or relation
- String res = String.valueOf((char) token);
- if (quoteChars)
- res = "'" + res + "'";
- return res;
- }
- }
-
- @Override
- public int pos() {
- return qi;
- }
-}
* >http://zing.z3950.org/cql/index.html</A>
*/
public class CQLParser {
- private CQLLexer lexer;
+ private CQLTokenizer lexer;
private final int compat; // When false, implement CQL 1.2
private final Set<String> customRelations = new HashSet<String>();
* tree representing the query. */
public CQLNode parse(String cql)
throws CQLParseException, IOException {
- lexer = new CQLLexerSimple(cql, LEXDEBUG);
+ lexer = new CQLLexer(cql, LEXDEBUG);
lexer.move();
debug("about to parseQuery()");
CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
- if (lexer.what() != CQLLexer.TT_EOF)
+ if (lexer.what() != CQLTokenizer.TT_EOF)
throw new CQLParseException("junk after end: " + lexer.render(),
lexer.pos());
CQLNode node = parseQuery(index, relation);
if ((compat == V1POINT2 || compat == V1POINT1SORT) &&
- lexer.what() == CQLLexer.TT_SORTBY) {
+ lexer.what() == CQLTokenizer.TT_SORTBY) {
match(lexer.what());
debug("sortspec");
CQLSortNode sortnode = new CQLSortNode(node);
- while (lexer.what() != CQLLexer.TT_EOF) {
+ while (lexer.what() != CQLTokenizer.TT_EOF) {
String sortindex = matchSymbol("sort index");
ModifierSet ms = gatherModifiers(sortindex);
sortnode.addSortIndex(ms);
debug("in parseQuery()");
CQLNode term = parseTerm(index, relation);
- while (lexer.what() != CQLLexer.TT_EOF &&
+ while (lexer.what() != CQLTokenizer.TT_EOF &&
lexer.what() != ')' &&
- lexer.what() != CQLLexer.TT_SORTBY) {
- if (lexer.what() == CQLLexer.TT_AND ||
- lexer.what() == CQLLexer.TT_OR ||
- lexer.what() == CQLLexer.TT_NOT ||
- lexer.what() == CQLLexer.TT_PROX) {
+ lexer.what() != CQLTokenizer.TT_SORTBY) {
+ if (lexer.what() == CQLTokenizer.TT_AND ||
+ lexer.what() == CQLTokenizer.TT_OR ||
+ lexer.what() == CQLTokenizer.TT_NOT ||
+ lexer.what() == CQLTokenizer.TT_PROX) {
int type = lexer.what();
String val = lexer.value();
match(type);
ModifierSet ms = gatherModifiers(val);
CQLNode term2 = parseTerm(index, relation);
- term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) :
- (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) :
- (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
+ term = ((type == CQLTokenizer.TT_AND) ? new CQLAndNode(term, term2, ms) :
+ (type == CQLTokenizer.TT_OR) ? new CQLOrNode (term, term2, ms) :
+ (type == CQLTokenizer.TT_NOT) ? new CQLNotNode(term, term2, ms) :
new CQLProxNode(term, term2, ms));
} else {
throw new CQLParseException("expected boolean, got " +
ModifierSet ms = new ModifierSet(base);
while (lexer.what() == '/') {
match('/');
- if (lexer.what() != CQLLexer.TT_WORD)
+ if (lexer.what() != CQLTokenizer.TT_WORD)
throw new CQLParseException("expected modifier, "
+ "got " + lexer.render(),
lexer.pos());
debug("non-parenthesised term");
word = matchSymbol("index or term");
- while (lexer.what() == CQLLexer.TT_WORD && !isRelation()) {
+ while (lexer.what() == CQLTokenizer.TT_WORD && !isRelation()) {
word = word + " " + lexer.value();
- match(CQLLexer.TT_WORD);
+ match(CQLTokenizer.TT_WORD);
}
if (!isRelation())
break;
index = word;
- String relstr = (lexer.what() == CQLLexer.TT_WORD ?
+ String relstr = (lexer.what() == CQLTokenizer.TT_WORD ?
lexer.value() : lexer.render(lexer.what(), false));
relation = new CQLRelation(relstr);
match(lexer.what());
private boolean isRelation() {
debug("isRelation: checking what()=" + lexer.what() +
" (" + lexer.render() + ")");
- if (lexer.what() == CQLLexer.TT_WORD &&
+ if (lexer.what() == CQLTokenizer.TT_WORD &&
(lexer.value().indexOf('.') >= 0 ||
lexer.value().equals("any") ||
lexer.value().equals("all") ||
return (lexer.what() == '<' ||
lexer.what() == '>' ||
lexer.what() == '=' ||
- lexer.what() == CQLLexer.TT_LE ||
- lexer.what() == CQLLexer.TT_GE ||
- lexer.what() == CQLLexer.TT_NE ||
- lexer.what() == CQLLexer.TT_EQEQ);
+ lexer.what() == CQLTokenizer.TT_LE ||
+ lexer.what() == CQLTokenizer.TT_GE ||
+ lexer.what() == CQLTokenizer.TT_NE ||
+ lexer.what() == CQLTokenizer.TT_EQEQ);
}
private void match(int token)
throws CQLParseException, IOException {
debug("in matchSymbol()");
- if (lexer.what() == CQLLexer.TT_WORD ||
+ if (lexer.what() == CQLTokenizer.TT_WORD ||
lexer.what() == '"' ||
// The following is a complete list of keywords. Because
// they're listed here, they can be used unquoted as
// ### Instead, we should ask the lexer whether what we
// have is a keyword, and let the knowledge reside there.
(allowKeywordTerms &&
- lexer.what() == CQLLexer.TT_AND ||
- lexer.what() == CQLLexer.TT_OR ||
- lexer.what() == CQLLexer.TT_NOT ||
- lexer.what() == CQLLexer.TT_PROX ||
- lexer.what() == CQLLexer.TT_SORTBY)) {
+ lexer.what() == CQLTokenizer.TT_AND ||
+ lexer.what() == CQLTokenizer.TT_OR ||
+ lexer.what() == CQLTokenizer.TT_NOT ||
+ lexer.what() == CQLTokenizer.TT_PROX ||
+ lexer.what() == CQLTokenizer.TT_SORTBY)) {
String symbol = lexer.value();
match(lexer.what());
return symbol;
--- /dev/null
+/*
+ * Copyright (c) 1995-2014, Index Datassss
+ * All rights reserved.
+ * See the file LICENSE for details.
+ */
+package org.z3950.zing.cql;
+
+/**
+ *
+ * @author jakub
+ */
+public interface CQLTokenizer {
+
+ public static final int TT_EOF = -1;
+ public static final int TT_WORD = -3;
+ public static final int TT_NOTHING = -4;
+
+ public final static int TT_LE = 1000; // The "<=" relation
+ public final static int TT_GE = 1001; // The ">=" relation
+ public final static int TT_NE = 1002; // The "<>" relation
+ public final static int TT_EQEQ = 1003; // The "==" relation
+ public final static int TT_AND = 1004; // The "and" boolean
+ public final static int TT_OR = 1005; // The "or" boolean
+ public final static int TT_NOT = 1006; // The "not" boolean
+ public final static int TT_PROX = 1007; // The "prox" boolean
+ public final static int TT_SORTBY = 1008; // The "sortby" operator
+
+ public void move();
+
+ public String value();
+
+ public int what();
+
+ public String render();
+
+ public String render(int what, boolean quote);
+
+ public int pos();
+
+}