From b684edf7793ad37c6901acadbbab96b031c743c4 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Wed, 2 Mar 2022 17:56:41 +0100 Subject: [PATCH 1/9] Alternative endpoint draft parsing json. --- .../citeplag/controller/BaseXController.java | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/citeplag/controller/BaseXController.java b/src/main/java/org/citeplag/controller/BaseXController.java index 6cc087f..e0dd22e 100644 --- a/src/main/java/org/citeplag/controller/BaseXController.java +++ b/src/main/java/org/citeplag/controller/BaseXController.java @@ -11,10 +11,8 @@ import org.citeplag.domain.MathUpdate; import org.citeplag.beans.BaseXGenericResponse; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; import javax.servlet.http.HttpServletRequest; import java.io.IOException; @@ -66,13 +64,43 @@ public MathRequest xQueryProcessing( return process(query, "xquery", request); } + /* @PostMapping("/mwsquery") + //@RequestMapping(consumes="application/json") @ApiOperation(value = "Run MWS query on BaseX") public MathRequest mwsProcessing( - @RequestParam String query, - HttpServletRequest request) { + @RequestParam String query + ,HttpServletRequest request) { + return process(query, "mws", request); } + */ + @PostMapping( + value = "mwsquery", + consumes = {MediaType.APPLICATION_JSON_VALUE}, + produces = {MediaType.APPLICATION_JSON_VALUE}) //tbd clarify produces + @ApiOperation(value = "Run MWS query on BaseX", consumes = MediaType.APPLICATION_JSON_VALUE) + public MathRequest mwsProcessing(@RequestBody com.fasterxml.jackson.databind.JsonNode complete_query, HttpServletRequest request){ // @RequestBody Object person) { + + Object field = complete_query.get("query"); + if(field==null){ + // TBD Return invalid data here + return process(null, "mws", request); + } + + String query = complete_query.get("query").textValue(); + return process(query, "mws", request); + } + + /* + @PostMapping(value="/mwsquery") + public MathRequest process(@RequestParam String query, @RequestBody com.fasterxml.jackson.databind.JsonNode request) { + // Just a test for json + return process(query, "mws", null); + + } + */ + private MathRequest process(String query, String type, HttpServletRequest request) { if (!startServerIfNecessary()) { From 599da221e6a717833505b9022a6ad8e532459388 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Wed, 2 Mar 2022 19:14:56 +0100 Subject: [PATCH 2/9] Working mwsquery with formulasearch --- .../citeplag/controller/BaseXController.java | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/citeplag/controller/BaseXController.java b/src/main/java/org/citeplag/controller/BaseXController.java index e0dd22e..f71eb4f 100644 --- a/src/main/java/org/citeplag/controller/BaseXController.java +++ b/src/main/java/org/citeplag/controller/BaseXController.java @@ -11,11 +11,15 @@ import org.citeplag.domain.MathUpdate; import org.citeplag.beans.BaseXGenericResponse; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.configurationprocessor.json.JSONException; +import org.springframework.boot.configurationprocessor.json.JSONObject; import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.*; import javax.servlet.http.HttpServletRequest; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; import java.nio.file.Path; @@ -64,24 +68,38 @@ public MathRequest xQueryProcessing( return process(query, "xquery", request); } - /* + + @PostMapping("/mwsquery_old") + @ApiOperation(value = "Run MWS query on BaseX") + public MathRequest mwsProcessingOld(@RequestParam String query, HttpServletRequest request) { + return process(query, "mws", request); + } + + @PostMapping("/mwsquery") - //@RequestMapping(consumes="application/json") @ApiOperation(value = "Run MWS query on BaseX") - public MathRequest mwsProcessing( - @RequestParam String query - ,HttpServletRequest request) { + public MathRequest mwsProcessing(@RequestBody String data, HttpServletRequest request) { + String query = null; + try { + String result = java.net.URLDecoder.decode(data, StandardCharsets.UTF_8.name()); + JSONObject jsonObject = new JSONObject(result); + query = jsonObject.get("query").toString(); + } catch (Exception e) { + // not going to happen - value came from JDK's own StandardCharsets + e.printStackTrace(); + } return process(query, "mws", request); } - */ + + /* @PostMapping( value = "mwsquery", - consumes = {MediaType.APPLICATION_JSON_VALUE}, - produces = {MediaType.APPLICATION_JSON_VALUE}) //tbd clarify produces - @ApiOperation(value = "Run MWS query on BaseX", consumes = MediaType.APPLICATION_JSON_VALUE) + consumes = {MediaType.ALL_VALUE}, + produces = {MediaType.ALL_VALUE}) //tbd clarify produces + @ApiOperation(value = "Run MWS query on BaseX", consumes = MediaType.ALL_VALUE) public MathRequest mwsProcessing(@RequestBody com.fasterxml.jackson.databind.JsonNode complete_query, HttpServletRequest request){ // @RequestBody Object person) { - + // Problem with accept headers from mediawiki Object field = complete_query.get("query"); if(field==null){ // TBD Return invalid data here @@ -91,7 +109,7 @@ public MathRequest mwsProcessing(@RequestBody com.fasterxml.jackson.databind.Jso String query = complete_query.get("query").textValue(); return process(query, "mws", request); } - + */ /* @PostMapping(value="/mwsquery") public MathRequest process(@RequestParam String query, @RequestBody com.fasterxml.jackson.databind.JsonNode request) { From ac611f573129a01be565529ba9820226ed8f1444 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Thu, 3 Mar 2022 15:05:51 +0100 Subject: [PATCH 3/9] Refactor endpoint a bit. Ok for structured-search. --- .../citeplag/controller/BaseXController.java | 60 +++++-------------- 1 file changed, 15 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/citeplag/controller/BaseXController.java b/src/main/java/org/citeplag/controller/BaseXController.java index f71eb4f..cab9731 100644 --- a/src/main/java/org/citeplag/controller/BaseXController.java +++ b/src/main/java/org/citeplag/controller/BaseXController.java @@ -11,14 +11,10 @@ import org.citeplag.domain.MathUpdate; import org.citeplag.beans.BaseXGenericResponse; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.configurationprocessor.json.JSONException; import org.springframework.boot.configurationprocessor.json.JSONObject; -import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.*; - import javax.servlet.http.HttpServletRequest; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.InvalidPathException; @@ -68,58 +64,32 @@ public MathRequest xQueryProcessing( return process(query, "xquery", request); } - - @PostMapping("/mwsquery_old") - @ApiOperation(value = "Run MWS query on BaseX") - public MathRequest mwsProcessingOld(@RequestParam String query, HttpServletRequest request) { - return process(query, "mws", request); - } - - @PostMapping("/mwsquery") @ApiOperation(value = "Run MWS query on BaseX") public MathRequest mwsProcessing(@RequestBody String data, HttpServletRequest request) { + String query = extractQueryFromData(data); + if(query==null){ return null; } + return process(query, "mws", request); + } + /** + * Extracting the query input from url encoded string of data. + * This is required to handle requests coming from FormulaSearch-extension. + * @param data url encoded string which container query as json. + * @return query as string or data + */ + private String extractQueryFromData(String data){ String query = null; try { - String result = java.net.URLDecoder.decode(data, StandardCharsets.UTF_8.name()); - JSONObject jsonObject = new JSONObject(result); - query = jsonObject.get("query").toString(); + String result = java.net.URLDecoder.decode(data, StandardCharsets.UTF_8.name()); + JSONObject jsonObject = new JSONObject(result); + query = jsonObject.get("query").toString(); } catch (Exception e) { - // not going to happen - value came from JDK's own StandardCharsets e.printStackTrace(); } - return process(query, "mws", request); + return query; } - /* - @PostMapping( - value = "mwsquery", - consumes = {MediaType.ALL_VALUE}, - produces = {MediaType.ALL_VALUE}) //tbd clarify produces - @ApiOperation(value = "Run MWS query on BaseX", consumes = MediaType.ALL_VALUE) - public MathRequest mwsProcessing(@RequestBody com.fasterxml.jackson.databind.JsonNode complete_query, HttpServletRequest request){ // @RequestBody Object person) { - // Problem with accept headers from mediawiki - Object field = complete_query.get("query"); - if(field==null){ - // TBD Return invalid data here - return process(null, "mws", request); - } - - String query = complete_query.get("query").textValue(); - return process(query, "mws", request); - } - */ - /* - @PostMapping(value="/mwsquery") - public MathRequest process(@RequestParam String query, @RequestBody com.fasterxml.jackson.databind.JsonNode request) { - // Just a test for json - return process(query, "mws", null); - - } - */ - - private MathRequest process(String query, String type, HttpServletRequest request) { if (!startServerIfNecessary()) { LOG.warn("Return null for request, because BaseX server is not running."); From 38f0e296f47d0b4cd7d68edfbc264c80d7e4107f Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Thu, 3 Mar 2022 17:10:51 +0100 Subject: [PATCH 4/9] adding local modules for lacast and formula-cloud-server --- .gitmodules | 3 +++ README.md | 4 ++++ pom.xml | 9 ++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 237b7cb..3b9bfb3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,6 @@ path = LaCASt url = git@github.com:ag-gipp/LaCASt.git branch = wikipedia +[submodule "formula-cloud-server"] + path = formula-cloud-server + url = git@github.com:ag-gipp/formula-cloud-server.git diff --git a/README.md b/README.md index 8475eb6..e16f1ae 100755 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ Task 3 is executed via an external JS widget written by students at the HTW Berl ## Build ## +### Initialize the submodules ### +`git submodule update --init --recursive` + +### Do maven build ### Check-out this project and build it via Maven. In the `/target` directory you will find the executable server instance `mathpipeline.jar`. diff --git a/pom.xml b/pom.xml index 38547cc..b652590 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,14 @@ com.formulasearchengine vmext-demo - 1.0-SNAPSHOT + pom + 1.0 + + + + formula-cloud-server + LaCASt + org.springframework.boot From d006dd698b279677548db1f98ac76deb20d82366 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Thu, 3 Mar 2022 18:23:50 +0100 Subject: [PATCH 5/9] adding basex locally from com.formulasearchengine --- .../java/org/citeplag/basex/BaseXClient.java | 408 ++++++++++++++++ .../java/org/citeplag/basex/Benchmark.java | 142 ++++++ src/main/java/org/citeplag/basex/Client.java | 462 ++++++++++++++++++ src/main/java/org/citeplag/basex/Server.java | 78 +++ .../org/citeplag/basex/TexQueryGenerator.java | 121 +++++ .../org/citeplag/basex/types/Formula.java | 97 ++++ .../java/org/citeplag/basex/types/Hit.java | 57 +++ .../java/org/citeplag/basex/types/Qvar.java | 24 + .../java/org/citeplag/basex/types/Result.java | 76 +++ .../org/citeplag/basex/types/Results.java | 73 +++ .../java/org/citeplag/basex/types/Run.java | 89 ++++ 11 files changed, 1627 insertions(+) create mode 100644 src/main/java/org/citeplag/basex/BaseXClient.java create mode 100644 src/main/java/org/citeplag/basex/Benchmark.java create mode 100644 src/main/java/org/citeplag/basex/Client.java create mode 100644 src/main/java/org/citeplag/basex/Server.java create mode 100644 src/main/java/org/citeplag/basex/TexQueryGenerator.java create mode 100644 src/main/java/org/citeplag/basex/types/Formula.java create mode 100644 src/main/java/org/citeplag/basex/types/Hit.java create mode 100644 src/main/java/org/citeplag/basex/types/Qvar.java create mode 100644 src/main/java/org/citeplag/basex/types/Result.java create mode 100644 src/main/java/org/citeplag/basex/types/Results.java create mode 100644 src/main/java/org/citeplag/basex/types/Run.java diff --git a/src/main/java/org/citeplag/basex/BaseXClient.java b/src/main/java/org/citeplag/basex/BaseXClient.java new file mode 100644 index 0000000..a14e6ef --- /dev/null +++ b/src/main/java/org/citeplag/basex/BaseXClient.java @@ -0,0 +1,408 @@ +package org.citeplag.basex; + + +import java.io.*; +import java.net.*; +import java.nio.charset.*; +import java.security.*; +import java.util.*; + +/** + * Implementation of XQuery v3.1 compliant BaseXClient + * Taken from basex-examples on the BaseXDb Github repository. + * + * Java client for BaseX. + * Works with BaseX 7.0 and later + * + * Documentation: http://docs.basex.org/wiki/Clients + * + * (C) BaseX Team 2005-15, BSD License + */ +public final class BaseXClient { + /** UTF-8 charset. */ + private static final Charset UTF8 = Charset.forName("UTF-8"); + /** Output stream. */ + private final OutputStream out; + /** Input stream (buffered). */ + private final BufferedInputStream in; + + /** Socket. */ + private final Socket socket; + /** Command info. */ + private String info; + + /** + * Constructor. + * @param host server name + * @param port server port + * @param username user name + * @param password password + * @throws IOException Exception + */ + public BaseXClient(final String host, final int port, final String username, + final String password) throws IOException { + + socket = new Socket(); + socket.connect(new InetSocketAddress(host, port), 5000); + in = new BufferedInputStream(socket.getInputStream()); + out = socket.getOutputStream(); + + // receive server response + final String[] response = receive().split(":"); + final String code, nonce; + if(response.length > 1) { + // support for digest authentication + code = username + ':' + response[0] + ':' + password; + nonce = response[1]; + } else { + // support for cram-md5 (Version < 8.0) + code = password; + nonce = response[0]; + } + + send(username); + send(md5(md5(code) + nonce)); + + // receive success flag + if(!ok()) throw new IOException("Access denied."); + } + + /** + * Executes a command and serializes the result to an output stream. + * @param command command + * @param output output stream + * @throws IOException Exception + */ + public void execute(final String command, final OutputStream output) throws IOException { + // send {Command}0 + send(command); + receive(in, output); + info = receive(); + if(!ok()) throw new IOException(info); + } + + /** + * Executes a command and returns the result. + * @param command command + * @return result + * @throws IOException Exception + */ + public String execute(final String command) throws IOException { + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + execute(command, os); + return new String(os.toByteArray(), UTF8); + } + + /** + * Creates a query object. + * @param query query string + * @return query + * @throws IOException Exception + */ + public Query query(final String query) throws IOException { + return new Query(query); + } + + /** + * Creates a database. + * @param name name of database + * @param input xml input + * @throws IOException I/O exception + */ + public void create(final String name, final InputStream input) throws IOException { + send(8, name, input); + } + + /** + * Adds a document to a database. + * @param path path to resource + * @param input xml input + * @throws IOException I/O exception + */ + public void add(final String path, final InputStream input) throws IOException { + send(9, path, input); + } + + /** + * Replaces a document in a database. + * @param path path to resource + * @param input xml input + * @throws IOException I/O exception + */ + public void replace(final String path, final InputStream input) throws IOException { + send(12, path, input); + } + + /** + * Stores a binary resource in a database. + * @param path path to resource + * @param input xml input + * @throws IOException I/O exception + */ + public void store(final String path, final InputStream input) throws IOException { + send(13, path, input); + } + + /** + * Returns command information. + * @return string info + */ + public String info() { + return info; + } + + /** + * Closes the session. + * @throws IOException Exception + */ + public void close() throws IOException { + send("exit"); + out.flush(); + socket.close(); + } + + /** + * Checks the next success flag. + * @return value of check + * @throws IOException Exception + */ + private boolean ok() throws IOException { + out.flush(); + return in.read() == 0; + } + + /** + * Returns the next received string. + * @return String result or info + * @throws IOException I/O exception + */ + private String receive() throws IOException { + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + receive(in, os); + return new String(os.toByteArray(), UTF8); + } + + /** + * Sends a string to the server. + * @param string string to be sent + * @throws IOException I/O exception + */ + private void send(final String string) throws IOException { + out.write((string + '\0').getBytes(UTF8)); + } + + /** + * Receives a string and writes it to the specified output stream. + * @param input input stream + * @param output output stream + * @throws IOException I/O exception + */ + private static void receive(final InputStream input, final OutputStream output) + throws IOException { + for(int b; (b = input.read()) > 0;) { + // read next byte if 0xFF is received + output.write( b == 0xFF ? input.read() : b ); + } + } + + /** + * Sends a command, argument, and input. + * @param code command code + * @param path name, or path to resource + * @param input xml input + * @throws IOException I/O exception + */ + private void send(final int code, final String path, final InputStream input) throws IOException { + out.write(code); + send(path); + send(input); + } + + /** + * Sends an input stream to the server. + * @param input xml input + * @throws IOException I/O exception + */ + private void send(final InputStream input) throws IOException { + final BufferedInputStream bis = new BufferedInputStream(input); + final BufferedOutputStream bos = new BufferedOutputStream(out); + for(int b; (b = bis.read()) != -1;) { + // 0x00 and 0xFF will be prefixed by 0xFF + if ( b == 0x00 || b == 0xFF ) bos.write( 0xFF ); + bos.write( b ); + } + bos.write(0); + bos.flush(); + info = receive(); + if(!ok()) throw new IOException(info); + } + + /** + * Returns an MD5 hash. + * @param pw String + * @return String + */ + private static String md5(final String pw) { + final StringBuilder sb = new StringBuilder(); + try { + final MessageDigest md = MessageDigest.getInstance( "MD5" ); + md.update( pw.getBytes() ); + for ( final byte b : md.digest() ) { + final String s = Integer.toHexString( b & 0xFF ); + if ( s.length() == 1 ) sb.append( '0' ); + sb.append( s ); + } + } catch(final NoSuchAlgorithmException ex) { + // should not occur + ex.printStackTrace(); + } + return sb.toString(); + } + + /** + * Inner class for iterative query execution. + */ + public class Query { + /** Query id. */ + private final String id; + /** Cached results. */ + private ArrayList cache; + /** Cache pointer. */ + private int pos; + + /** + * Standard constructor. + * @param query query string + * @throws IOException I/O exception + */ + Query(final String query) throws IOException { + id = exec(0, query); + } + + /** + * Binds a value to an external variable. + * @param name name of variable + * @param value value + * @throws IOException I/O exception + */ + public void bind(final String name, final String value) throws IOException { + bind(name, value, ""); + } + + /** + * Binds a value with the specified type to an external variable. + * @param name name of variable + * @param value value + * @param type type (can be an empty string) + * @throws IOException I/O exception + */ + public void bind(final String name, final String value, final String type) throws IOException { + cache = null; + exec(3, id + '\0' + name + '\0' + value + '\0' + type); + } + + /** + * Binds a value to the context item. + * @param value value + * @throws IOException I/O exception + */ + public void context(final String value) throws IOException { + context(value, ""); + } + + /** + * Binds a value with the specified type to the context item. + * @param value value + * @param type type (can be an empty string) + * @throws IOException I/O exception + */ + public void context(final String value, final String type) throws IOException { + cache = null; + exec(14, id + '\0' + value + '\0' + type); + } + + /** + * Checks for the next item. + * @return result of check + * @throws IOException I/O exception + */ + public boolean more() throws IOException { + if(cache == null) { + out.write( 4 ); + send( id ); + cache = new ArrayList<>(); + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + while ( in.read() > 0 ) { + receive( in, os ); + cache.add( os.toByteArray() ); + os.reset(); + } + if ( !ok() ) throw new IOException( receive() ); + pos = 0; + } + if(pos < cache.size()) return true; + cache = null; + return false; + } + + /** + * Returns the next item. + * @return item string + * @throws IOException I/O Exception + */ + public String next() throws IOException { + return more() ? new String(cache.set(pos++, null), UTF8) : null; + } + + /** + * Returns the whole result of the query. + * @return query result + * @throws IOException I/O Exception + */ + public String execute() throws IOException { + return exec(5, id); + } + + /** + * Returns query info in a string. + * @return query info + * @throws IOException I/O exception + */ + public String info() throws IOException { + return exec(6, id); + } + + /** + * Returns serialization parameters in a string. + * @return query info + * @throws IOException I/O exception + */ + public String options() throws IOException { + return exec(7, id); + } + + /** + * Closes the query. + * @throws IOException I/O exception + */ + public void close() throws IOException { + exec(2, id); + } + + /** + * Executes the specified command. + * @param code command code + * @param arg argument + * @return resulting string + * @throws IOException I/O exception + */ + private String exec(final int code, final String arg) throws IOException { + out.write(code); + send(arg); + final String s = receive(); + if(!ok()) throw new IOException(receive()); + return s; + } + } +} + diff --git a/src/main/java/org/citeplag/basex/Benchmark.java b/src/main/java/org/citeplag/basex/Benchmark.java new file mode 100644 index 0000000..fdbb771 --- /dev/null +++ b/src/main/java/org/citeplag/basex/Benchmark.java @@ -0,0 +1,142 @@ +package org.citeplag.basex; + + +import com.formulasearchengine.mathmlquerygenerator.NtcirPattern; +import com.formulasearchengine.mathmlquerygenerator.NtcirTopicReader; +import org.citeplag.basex.Client; +import org.citeplag.basex.Server; +import org.apache.commons.cli.*; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xquery.XQException; +import java.io.File; +import java.io.IOException; +import java.util.List; + +/** + * Parses user input and starts benchmark. This class contains the main method + * + * @author Tobias Uhlich + * @author Thanh Phuong Luu + * @author Moritz Schubotz + */ + +public class Benchmark { + //By default, MathMLQueryGenerator constructs XQueries for a DB2 instance, so we need to change that into basex format + //Search elements + public static final String BASEX_HEADER = "declare default element namespace \"http://www.w3.org/1998/Math/MathML\";\n" + + "for $m in //*:expr return \n"; + //Return URL of element containing matches for queries + public static final String BASEX_FOOTER = " data($m/@url) \n"; + //Return hit as XML with required NTCIR data and highlighting + public static final String NTCIR_FOOTER = + "{map:for-each($q,function($k,$v){for $value in $v return })}"; + + private final CommandLine line; + + public Benchmark( CommandLine line ) { + this.line = line; + } + + /** + * Program entry point + */ + public static void main( String[] args ) { + Options options = new Options(); + Option help = new Option( "help", "print this message" ); + //Option projecthelp = new Option( "projecthelp", "print project help information" ); + //Option version = new Option( "version", "print the version information and exit" ); + //Option quiet = new Option( "quiet", "be extra quiet" ); + //Option verbose = new Option( "verbose", "be extra verbose" ); + //Option debug = new Option( "debug", "print debugging information" ); + Option dataSource = OptionBuilder.withArgName( "file" ) + .hasArg() + .isRequired() + .withDescription("use given file for data source") + .withLongOpt("datasource") + .create("d"); + Option querySource = OptionBuilder.withArgName( "file" ) + .hasArg() + .isRequired() + .withDescription("use given file for query source") + .withLongOpt("querysource") + .create("q"); + Option resultSink = OptionBuilder.withArgName( "file" ) + .hasArg() + .withDescription("specify file for the output") + .withLongOpt("output") + .create("o"); + options.addOption( dataSource ) + .addOption(querySource) + .addOption(resultSink) + .addOption(help) + .addOption("c", "CSV", false, "Print CSV instead of XML output") + .addOption("i", "ignoreLength", false, "Includes matches were the matching is tree is longer" + + "than the search pattern. For example $x+y+z$ for the pattern $x+y$."); + CommandLineParser parser = new GnuParser(); + try { + CommandLine line = parser.parse( options, args ); + if ( line.hasOption( "help" ) ) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp( "java -jar FILENAME.jar", options ); + } else { + (new Benchmark( line )).run(); + } + + } catch ( ParseException exp ) { + System.err.println( "Parsing failed. Reason: " + exp.getMessage() ); + return; + } catch ( IOException e ) { + System.err.println( "IO Error: " + e.getMessage() ); + e.printStackTrace(); + } catch ( ParserConfigurationException e ) { + System.err.println( "Error parsing query file: " + e.getMessage() ); + e.printStackTrace(); + } catch ( SAXException e ) { + System.err.println( "XML Error in query file: " + e.getMessage() ); + e.printStackTrace(); + } catch ( XPathExpressionException e ) { + System.err.println( "XPath Error in query file: " + e.getMessage() ); + e.printStackTrace(); + } catch (XQException e ) { + System.err.println( "Error in connection to the server: " + e.getMessage() ); + e.printStackTrace(); + } + } + + private void run() throws IOException, ParserConfigurationException, SAXException, XPathExpressionException, XQException { + File f = new File( line.getOptionValue( "datasource" ) ); + Server srv = Server.getInstance(); + srv.startup(f); + File queries = new File( line.getOptionValue( "querysource" ) ); + final NtcirTopicReader ntcirTopicReader = new NtcirTopicReader( queries ); + ntcirTopicReader.setPathToRoot("//*:expr"); + ntcirTopicReader.setRestrictLength( !line.hasOption( "i" ) ); + ntcirTopicReader.setAddQvarMap( false ); + List patterns = ntcirTopicReader.extractPatterns(); + final Client client = new Client( patterns ); + srv.shutdown(); + String result; + if ( line.hasOption( "c" ) ) { + result = "CSV option has been disabled for now. Use https://github.com/physikerwelt/xstlprocJ/blob/master/test/transform.xsl"; + } else { + result = client.getXML(); + } + boolean written = false; + if ( line.hasOption( "output" ) ) { + try { + File dest = new File( line.getOptionValue( "output" ) ); + org.apache.commons.io.FileUtils.writeStringToFile( dest, result ); + written = true; + } catch ( Exception e ) { + System.err.println( "Could not print to file" + e.getMessage() ); + } + } + if ( !written ) { + System.out.println( result ); + } + } + +} diff --git a/src/main/java/org/citeplag/basex/Client.java b/src/main/java/org/citeplag/basex/Client.java new file mode 100644 index 0000000..6fec9ac --- /dev/null +++ b/src/main/java/org/citeplag/basex/Client.java @@ -0,0 +1,462 @@ +package org.citeplag.basex; + +import com.formulasearchengine.mathmlquerygenerator.NtcirPattern; +import com.formulasearchengine.mathmlquerygenerator.QVarXQueryGenerator; +import com.formulasearchengine.mathmlquerygenerator.XQueryGenerator; +import com.formulasearchengine.mathmltools.xmlhelper.XMLHelper; +import org.citeplag.basex.BaseXClient; +import org.citeplag.basex.Benchmark; +import org.citeplag.basex.Server; +import org.citeplag.basex.TexQueryGenerator; +import org.citeplag.basex.types.Hit; +import org.citeplag.basex.types.Result; +import org.citeplag.basex.types.Results; +import org.citeplag.basex.types.Run; +import com.thoughtworks.xstream.XStream; +import com.thoughtworks.xstream.io.xml.XmlFriendlyNameCoder; +import com.thoughtworks.xstream.io.xml.Xpp3Driver; +import net.xqj.basex.BaseXXQDataSource; +import org.intellij.lang.annotations.Language; +import org.w3c.dom.Document; +import org.w3c.dom.Node; + +import javax.xml.namespace.QName; +import javax.xml.stream.*; +import javax.xml.stream.events.Attribute; +import javax.xml.stream.events.StartElement; +import javax.xml.stream.events.XMLEvent; +import javax.xml.transform.TransformerException; +import javax.xml.xquery.*; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +/** + * Inputs NtcirPattern queries, runs them through the BaseX search engine, and then outputs results. + * Created by Moritz on 08.11.2014. + */ +public class Client { + public static final String USER = "admin"; + public static final String PASSWORD = "admin"; + private static final Pattern CR_PATTERN = Pattern.compile("\r"); + private Results results = new Results(); + private Run currentRun = new Run( "baseX" + System.currentTimeMillis(), "automated" ); + private Result currentResult = new Result( "NTCIR11-Math-" ); + private Long lastQueryDuration; + private boolean useXQ = true; + private boolean showTime = true; + + /** + * Constructs a new empty Client. Used for running individual queries. + */ + public Client() {} + + /** + * Constructs a new Client with the given queryset. This constructor will also search all queries immediately. + * @param patterns List of NtcirPattern + */ + public Client(List patterns) throws XQException { + for (final NtcirPattern pattern : patterns) { + processPattern( pattern ); + } + results.addRun( currentRun ); + } + + /** + * @return Returns given Result as XML string, and shows time based on showTime + */ + public static String resultToXML( Result result ) { + //Use custom coder to disable underscore escaping so run_type is properly printed + final XStream stream = new XStream( new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) ); + if ( !result.getShowTime() ) { + stream.omitField( Result.class, "ms" ); + } + stream.processAnnotations( Result.class ); + return "\n" + stream.toXML( result ); + } + + /** + * @return Returns given Results as XML string, and shows time based on showTime + */ + public static String resultsToXML( Results results ) { + //Use custom coder to disable underscore escaping so run_type is properly printed + final XStream stream = new XStream(new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) ); + if ( !results.getShowTime() ) { + stream.omitField( Run.class, "ms" ); + stream.omitField( Result.class, "ms" ); + } + stream.processAnnotations( Results.class ); + return "\n" + stream.toXML( results ); + } + + /** + * @return the given XML string as an object of the given class. note that this method disables + * underscore as an escape character if the class is Results so the attribute "run_type" is printed correctly. + */ + public static Object xmlToClass( String xml, Class convertClass ) { + final XStream stream; + if ( convertClass.equals( Results.class )) { + //Use custom coder to disable underscore escaping so run_type is properly printed + stream = new XStream( new Xpp3Driver( new XmlFriendlyNameCoder( "_-", "_" ) ) ); + } else { + stream = new XStream(); + } + stream.processAnnotations( convertClass ); + return stream.fromXML( xml ); + } + + private static XQConnection getXqConnection() throws XQException { + final Server srv = Server.getInstance(); + final XQDataSource xqs = new BaseXXQDataSource(); + //Other properties: description, logLevel, loginTimeout, readOnly + xqs.setProperty("serverName", Server.SERVER_NAME); + xqs.setProperty("port", String.valueOf(Server.PORT)); + xqs.setProperty("databaseName", Server.DATABASE_NAME); + xqs.setProperty("user", USER); + xqs.setProperty("password", PASSWORD); + + return xqs.getConnection(USER, PASSWORD); + } + + //Alternative API that enables XQuery v3.1 + private static BaseXClient getBaseXClient() throws IOException { + final Server srv = Server.getInstance(); + final BaseXClient session = new BaseXClient(Server.SERVER_NAME, Server.PORT, USER, PASSWORD); + session.execute("OPEN " + Server.DATABASE_NAME); + return session; + } + + /** + * @return Returns new StartElement with replaced value for given attribute + */ + public static StartElement replaceAttr(StartElement event, String attribute, String value) { + final XMLEventFactory eventFactory = XMLEventFactory.newInstance(); + final Iterator attributeIterator = event.getAttributes(); + final List attrs = new ArrayList<>(); + while (attributeIterator.hasNext()) { + final Attribute curAttr = attributeIterator.next(); + if (attribute.equals(curAttr.getName().getLocalPart())) { + attrs.add(eventFactory.createAttribute(new QName(attribute), value)); + } else { + attrs.add(curAttr); + } + } + return eventFactory.createStartElement(new QName(event.getName().getLocalPart()), attrs.iterator(), event.getNamespaces()); + } + + /** + * Runs a query with no timing or effects on {@link #currentResult} + * + * @param query XQuery string + * @return XQResult in string format + * @throws XQException + */ + static String directXQuery(String query) throws XQException { + final StringBuilder outputBuilder = new StringBuilder(); + final XQConnection conn = getXqConnection(); + try { + final XQPreparedExpression xqpe = conn.prepareExpression(query); + final XQResultSequence rs = xqpe.executeQuery(); + while (rs.next()) { + outputBuilder.append(CR_PATTERN.matcher(rs.getItemAsString(null)).replaceAll("")); + } + } finally { + conn.close(); + } + return outputBuilder.toString(); + } + + /** + * @return Returns results in XML format. + */ + public String getXML() { + results.setShowTime(showTime); + return resultsToXML(results); + } + + /** + * Setter for whether or not to show time in results. + * @param showTime Boolean for showing time or not + */ + public void setShowTime (boolean showTime) { + this.showTime = showTime; + results.setShowTime( showTime ); + } + + /** + * Setter for whether or not to use XQuery expression. + * @param useXQ Boolean for using XQuery expressions. + */ + public void setUseXQ (boolean useXQ) { + this.useXQ = useXQ; + } + + private void processPattern(NtcirPattern pattern) throws XQException { + currentResult = new Result( pattern.getNum() ); + currentResult.setShowTime( showTime ); + basex( pattern.getxQueryExpression() ); + currentRun.addResult( currentResult ); + } + + /** + * Wrapper around XQuery search method runQueryBaseXSimple() which handles exceptions and returns the length of time + * it took to run that query. + * @param query Query in XQuery string format. + * @return Time it took to run the query. + */ + public Long basex(String query) throws XQException { + runQueryBaseXSimple( query ); + return lastQueryDuration; + } + + /** + * Connects with the BaseX database, sending the given query as an XQuery query and saves the + * result in the currentResult list. Assumes NTCIR_FOOTER is used as the result return type. + * @param query Query in XQuery string format. + * @param queryID ID number to mark this query (required for NTCIR search highlight format) + * @return Result in NTCIR_FOOTER XML format (not in full NTCIR format) + * @throws XQException When getXqConnection() falis to connect to the BaseX server, XQJ fails to process the query, + * or XQJ fails to execute the query. + * @throws XMLStreamException When the output fails to parse as XML + * @throws IOException When the client fails to open properly + * @throws TransformerException When the XML reader/writers fail + */ + protected Result runQueryNTCIR( String query, String queryID ) + throws XQException, XMLStreamException, IOException, TransformerException, java.io.UnsupportedEncodingException { + int score = 0; + int rank = 1; + if ( useXQ ) { + return null; + } else { + final BaseXClient session = getBaseXClient(); + try { + lastQueryDuration = System.nanoTime(); + final BaseXClient.Query querySession = session.query( query ); + lastQueryDuration = System.nanoTime() - lastQueryDuration; + currentResult.setTime( lastQueryDuration ); + currentResult.setShowTime( showTime ); + + while ( querySession.more() ) { + final String result = querySession.next(); + final byte[] byteArray = result.getBytes( "UTF-8" ); + final ByteArrayInputStream inputStream = new ByteArrayInputStream( byteArray ); + final XMLEventReader reader = XMLInputFactory.newFactory().createXMLEventReader( inputStream ); + final StringWriter hitWriter = new StringWriter(); + final XMLEventWriter writer = XMLOutputFactory.newInstance().createXMLEventWriter( hitWriter ); + + while ( reader.hasNext() ) { + final XMLEvent curEvent = reader.nextEvent(); + switch ( curEvent.getEventType() ) { + case XMLStreamConstants.START_ELEMENT: + if ( "formula".equals( curEvent.asStartElement().getName().getLocalPart() ) ) { + writer.add( replaceAttr( curEvent.asStartElement(), "for", queryID ) ); + } else { + writer.add( curEvent ); + } + break; + case XMLStreamConstants.START_DOCUMENT: + //do nothing + break; + default: + writer.add( curEvent ); + break; + } + } + currentResult.addHit( (Hit) xmlToClass( hitWriter.toString(), Hit.class ) ); + } + } finally { + session.close(); + } + return currentResult; + } + } + + /** + * Connects with the BaseX database, sending the given query as an XQuery query and saves the + * result in the currentResult list. Assumes BASEX_FOOTER is used as the result return type. + * @param query Query in XQuery string format. + * @return Number of results. + * @throws XQException When getXqConnection() fails to connect to the BaseX server, XQJ fails to process the query, + * or XQJ fails to execute the query. + */ + protected int runQueryBaseXSimple( String query ) throws XQException { + int score = 10; + int rank = 1; + if ( useXQ ) { + final XQConnection conn = getXqConnection(); + try { + final XQPreparedExpression xqpe = conn.prepareExpression( query ); + lastQueryDuration = System.nanoTime(); + final XQResultSequence rs = xqpe.executeQuery(); + lastQueryDuration = System.nanoTime() - lastQueryDuration; + currentResult.setTime( lastQueryDuration ); + currentResult.setShowTime( showTime ); + while ( rs.next() ) { + final String result = rs.getItemAsString( null ); + currentResult.addHit( new Hit( CR_PATTERN.matcher( result ).replaceAll( "" ), "", score, rank ) ); + rank++; + } + } finally { + conn.close(); + } + } else { + //TODO: This does not yet work +/* measurement = System.nanoTime(); + new Open("math").execute( Server.context ); + QueryProcessor proc = new QueryProcessor(query, Server.context ); + Iter iter = proc.iter(); + for(Item item; (item = iter.next()) != null;) { + Object o = item.toJava(); + String s; + if(o instanceof String){ + s = (String) o; + } else { + s = item.toString(); + } + currentResult.addHit( s, "", score, rank ); + rank++; + }*/ + } + return rank-1; + } + + /** + * Calls {@link #runQueryBaseXSimple(String)} and wraps the result with the NTCIR XML format. + * This adds the result to {@link #currentResult} + * @param query XQuery string + * @throws XQException when the server xq connection fails + * @return NTCIR XML formatted result + */ + public Results runQueryNtcirWrap( String query ) throws XQException { + currentResult = new Result( "NTCIR11-Math-"); + currentResult.setShowTime( showTime ); + runQueryBaseXSimple( query ); + final Results resultsFrame = new Results(); + resultsFrame.setShowTime( showTime ); + if ( currentResult.getNumHits() != 0 ) { + final Run run = new Run( "", "" ); + run.setShowTime( showTime ); + run.addResult( currentResult ); + resultsFrame.addRun( run ); + } + return resultsFrame; + } + + /** + * Calls {@link #runQueryNtcirWrap(String)} given a MathML MathWebSearch XML document query + * @param mwsQuery Document in MathML MathWebSearch query format + * @throws XQException when the server xq connection fails + * @return NTCIR XML formatted result + */ + public Results runMWSQuery( Document mwsQuery ) throws XQException { + if ( mwsQuery == null ){ + throw new IllegalArgumentException( "Got empty MathML document" ); + } + final QVarXQueryGenerator generator = new QVarXQueryGenerator(mwsQuery); + generator.setPathToRoot("//*:expr"); + generator.setReturnFormat(Benchmark.BASEX_FOOTER ); + generator.setAddQvarMap( false ); + return runQueryNtcirWrap(generator.toString()); + } + + /** + * Calls {@link #runMWSQuery(Document)} given a Tex string. + * Converts the Tex string into MathML MathWebSearch XML document query format and then runs the search. + * @param tex Tex string + * + * @throws XQException when the server xq connection fails + * @throws IOException when the tex to MathML conversion fails + * @return NTCIR XML formatted result + */ + public Results runTexQuery( String tex ) throws IOException, XQException { + if (tex == null || tex.isEmpty()){ + throw new IllegalArgumentException( "Got empty TeX query" ); + } + final TexQueryGenerator t = new TexQueryGenerator(); + final String mmlString = t.request(tex); + final Document doc = XMLHelper.string2Doc( mmlString, true ); + return runMWSQuery( doc ); + } + + /** + * Returns XQuery expression for matching formulae based on revision number + * @param rev Revision number to match + * @return XQuery expression + */ + private String getRevFormula( int rev ) { + return "expr[matches(@url, '" + rev + "#(.*)')]"; + } + + /** + * Shortcut call on {@link #directXQuery(String)} to count the number of formulae with specified revision number + * @param rev Revision number to count + * @return Number of formulae with specified revision number + */ + public int countRevisionFormula(int rev){ + try { + return Integer.parseInt( directXQuery( "count(//*:" + getRevFormula( rev ) + ")" + ) ); + } catch (final XQException e) { + e.printStackTrace(); + return 0; + } + } + + /** + * Shortcut call on {@link #directXQuery(String)} to count the total number of formulae + * @return Total number of formulae + */ + public int countAllFormula(){ + try { + return Integer.parseInt( directXQuery( "count(./*/*)" ) ); + } catch (final XQException e) { + e.printStackTrace(); + return 0; + } + } + + /** + * Shortcut call on {@link #directXQuery(String)} to delete all formulae with specified revision number + * @param rev Revision number + * @return Whether or not this operation succeeded + */ + public boolean deleteRevisionFormula(int rev){ + try { + directXQuery( "delete node //*:"+ getRevFormula( rev ) ); + return countRevisionFormula(rev) == 0; + } catch (XQException e) { + e.printStackTrace(); + return false; + } + } + + /** + * Inserts the first formula from a harvest XML file into the server database. + * @param n Node to start with + * @return Whether or not this operation succeeded + */ + public boolean updateFormula(Node n) { + try { + @Language("XQuery") final String xUpdate = "declare namespace mws=\"http://search.mathweb.org/ns\";\n" + + "declare variable $input external;\n" + + "for $e in $input/mws:expr\n" + + "return ( delete node //*[@url=$e/@url], insert node $e into /mws:harvest[1])"; + final XQConnection conn = getXqConnection(); + try { + final XQPreparedExpression xqpe = conn.prepareExpression( xUpdate ); + xqpe.bindNode( new QName( "input" ), n, null ); + xqpe.executeQuery(); + } finally { + conn.close(); + } + return true; + } catch (final XQException e ) { + e.printStackTrace(); + return false; + } + } +} diff --git a/src/main/java/org/citeplag/basex/Server.java b/src/main/java/org/citeplag/basex/Server.java new file mode 100644 index 0000000..d34e1c6 --- /dev/null +++ b/src/main/java/org/citeplag/basex/Server.java @@ -0,0 +1,78 @@ +package org.citeplag.basex; + +import java.io.File; +import java.io.IOException; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.basex.BaseXServer; +import org.jetbrains.annotations.NotNull; + +/** + * Singleton server for handling BaseX queries. + * Created by Moritz on 08.11.2014. + */ +public final class Server { + private static Server serverInstance; + public BaseXServer baseXServer; + public static final String SERVER_NAME = "localhost"; + public static final int PORT = 1984; + public static final String DATABASE_NAME = "math"; + private static final Logger log = LogManager.getLogger(Server.class.getName()); + + + private Server() { + } + + /** + * @return The only instance of Server. + */ + @NotNull public static Server getInstance() { + synchronized (Server.class) { + if (serverInstance == null) { + serverInstance = new Server(); + } + } + return serverInstance; + } + + /** + * Shuts down the server if it is already running, and starts it with the specified the data file. + * Schedules the monitor task as well. + * + * @param input The data file or directory to use. + * @throws IOException Thrown if it fails to read input + */ + public void startup(@NotNull File input) throws IOException { + shutdown(); + + /* [CG] If a client is used (as I initially) proposed, the database will get lost + * once the client connection is closed. So we’ll have (at least) 2 options here: + * + * - Create a client, set MAINMEM to true and create database only close it if server is closed + * - Create main-memory database at startup (it will then be bound to the server process). + * + * I went for the second option... */ + + // "-d" for debug + baseXServer = new BaseXServer( "-p" + PORT, "-n" + SERVER_NAME, + "-c " + "set mainmem on;set intparse on;create db " + DATABASE_NAME + " " + input.getAbsolutePath()); + + /* [CG] I dropped all health checks. If something should be going wrong here, please give me a note; + * it should definitely be fixed! */ + + log.info("Import completed."); + } + + /** + * Shuts down the server. + * + * @throws IOException Thrown if server fails to shutdown. + */ + public void shutdown() throws IOException { + if (baseXServer != null) { + baseXServer.stop(); + baseXServer = null; + } + } +} diff --git a/src/main/java/org/citeplag/basex/TexQueryGenerator.java b/src/main/java/org/citeplag/basex/TexQueryGenerator.java new file mode 100644 index 0000000..e442e3d --- /dev/null +++ b/src/main/java/org/citeplag/basex/TexQueryGenerator.java @@ -0,0 +1,121 @@ +package org.citeplag.basex; + + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.formulasearchengine.mathmlconverters.latexml.LaTeXMLConverter; +import com.formulasearchengine.mathmlconverters.latexml.LaTeXMLServiceResponse; +import com.formulasearchengine.mathmlconverters.latexml.LateXMLConfig; +import com.google.common.collect.Lists; +import net.sf.saxon.trans.Err; +import net.xqj.basex.bin.L; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.message.BasicNameValuePair; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.springframework.web.client.HttpClientErrorException; + +import java.io.IOException; +import java.io.InputStream; +import java.util.*; + +public class TexQueryGenerator { + + private List params = getDefaultParams(); + private LateXMLConfig lateXMLConfig = new LateXMLConfig() + //.setUrl("http://drmf-latexml.wmflabs.org") + //.setUrl("https://mathoid-beta.wmflabs.org") // bad request + .setUrl("https://wikimedia.org/api/rest_") + .setParams(updateParamFormat(this.getParams())); + private LaTeXMLServiceResponse serviceResponse; + + + List getParams() { + return params; + } + + @NotNull + private ArrayList getDefaultParams() { + ArrayList pDefault = new ArrayList<>(); + pDefault.add(new BasicNameValuePair("format", "xhtml")); + pDefault.add(new BasicNameValuePair("whatsin", "math")); + pDefault.add(new BasicNameValuePair("whatsout", "math")); + pDefault.add(new BasicNameValuePair("cmml", "")); + pDefault.add(new BasicNameValuePair("nodefaultresources", "")); + pDefault.add(new BasicNameValuePair("preload", "LaTeX.pool")); + pDefault.add(new BasicNameValuePair("preload", "article.cls")); + pDefault.add(new BasicNameValuePair("preload", "amsmath.sty")); + pDefault.add(new BasicNameValuePair("preload", "amsthm.sty")); + pDefault.add(new BasicNameValuePair("preload", "amstext.sty")); + pDefault.add(new BasicNameValuePair("preload", "amssymb.sty")); + pDefault.add(new BasicNameValuePair("preload", "eucal.sty")); + pDefault.add(new BasicNameValuePair("preload", "[dvipsnames]xcolor.sty")); + pDefault.add(new BasicNameValuePair("preload", "url.sty")); + pDefault.add(new BasicNameValuePair("preload", "hyperref.sty")); + pDefault.add(new BasicNameValuePair("preload", "mws.sty")); + pDefault.add(new BasicNameValuePair("preload", "texvc")); + return pDefault; + } + + void setParams(List params) { + this.params = params; + lateXMLConfig.setParams(updateParamFormat(params)); + } + + private Map updateParamFormat(List params) { + HashMap map = new HashMap<>(); + for (NameValuePair pair : params) { + if (map.containsKey(pair.getName())) { + Object oSetting = map.get(pair.getName()); + if (oSetting instanceof List) { + ((List) oSetting).add(pair.getValue()); + } else { + assert (oSetting instanceof String); + map.put(pair.getName(), Lists.newArrayList(pair.getValue(), oSetting)); + } + } else { + map.put(pair.getName(), pair.getValue()); + } + } + return map; + } + + String getLaTeXMLURL() { + return lateXMLConfig.getUrl(); + } + + Map getOb() { + HashMap hashMap = new HashMap(); + hashMap.put("status_code", serviceResponse.getStatusCode()); + hashMap.put("status", serviceResponse.getStatus()); + hashMap.put("log", serviceResponse.getLog()); + hashMap.put("result", serviceResponse.getResult()); + return hashMap; + } + + void setLaTeXMLURL(String laTeXMLURL) { + lateXMLConfig.setUrl(laTeXMLURL); + } + + String request(String tex) throws IOException, IllegalStateException { + LaTeXMLConverter converter = new LaTeXMLConverter(lateXMLConfig); + try { + serviceResponse = converter.convertLatexmlService(tex); + } catch (Exception e) { + throw new IOException("Tex request to MathML conversion server produced failed response.", e); + } + if (serviceResponse.getStatusCode() > 1) { + throw new IOException("Tex request to MathML conversion server produced failed response.", + new IOException(serviceResponse.getResult())); + } + return serviceResponse.getResult(); + + } +} diff --git a/src/main/java/org/citeplag/basex/types/Formula.java b/src/main/java/org/citeplag/basex/types/Formula.java new file mode 100644 index 0000000..4c1d63b --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Formula.java @@ -0,0 +1,97 @@ +package org.citeplag.basex.types; + + +import com.formulasearchengine.mathosphere.basex.types.Qvar; +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamConverter; +import com.thoughtworks.xstream.annotations.XStreamImplicit; +import com.thoughtworks.xstream.converters.Converter; +import com.thoughtworks.xstream.converters.MarshallingContext; +import com.thoughtworks.xstream.converters.UnmarshallingContext; +import com.thoughtworks.xstream.io.HierarchicalStreamReader; +import com.thoughtworks.xstream.io.HierarchicalStreamWriter; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores formula in Ntcir format. + * Created by jjl4 on 6/24/15. + */ +@XStreamAlias("formula") +public class Formula { + @XStreamAlias("id") + @XStreamAsAttribute + private String id; + + @XStreamAlias("for") + @XStreamAsAttribute + private String queryFormulaID; + + @XStreamAlias("xref") + @XStreamAsAttribute + private String filename; + + //This is a string so that "" strings are deserialized correctly + @XStreamAlias("score") + @XStreamAsAttribute + private String score; + + @XStreamAlias("qvar") + @XStreamImplicit + private List qvars; + + public Formula( String id, String queryFormulaID, String filenameAndFormulaID, Integer score ) { + this.id = id; + this.queryFormulaID = queryFormulaID; + this.filename = filenameAndFormulaID; + //Null assignment makes attribute disappear + this.score = score == null ? null : String.valueOf( score ); + qvars = new ArrayList<>(); + } + + public void addQvar( com.formulasearchengine.mathosphere.basex.types.Qvar qvar ) { + qvars.add( qvar ); + } + + public void setQvars( List qvars ) { + this.qvars = new ArrayList<>( qvars ); + } + + public List getQvars() { + return new ArrayList<>( qvars ); + } + + public void setScore( Integer score ) { + this.score = score == null ? "" : String.valueOf( score ); + } + + public Integer getScore() { + return score != null && score.isEmpty() ? null : Integer.valueOf( score ).intValue(); + } + + public String getId() { + return id; + } + + public void setId( String id ) { + this.id = id; + } + + public String getFor() { + return queryFormulaID; + } + + public void setFor( String queryFormulaID ) { + this.queryFormulaID = queryFormulaID; + } + + public String getXref() { + return filename; + } + + public void setXref( String filename ) { + this.filename = filename; + } +} diff --git a/src/main/java/org/citeplag/basex/types/Hit.java b/src/main/java/org/citeplag/basex/types/Hit.java new file mode 100644 index 0000000..9d1e2c4 --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Hit.java @@ -0,0 +1,57 @@ +package org.citeplag.basex.types; + +import org.citeplag.basex.types.Formula; +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamImplicit; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores hits in Ntcir format. + * Created by jjl4 on 6/24/15. + */ +@XStreamAlias("hit") +public class Hit { + @XStreamAlias("id") + @XStreamAsAttribute + private final String id; + + @XStreamAlias("xref") + @XStreamAsAttribute + private final String filename; + + //These are strings so that "" strings are deserialized correctly + @XStreamAlias("score") + @XStreamAsAttribute + private final String score; + + @XStreamAlias("rank") + @XStreamAsAttribute + private final String rank; + + @XStreamImplicit + private List formulae; + + public Hit( String id, String filename, Integer score, Integer rank ) { + this.id = id; + this.filename = filename; + this.score = score == null ? "" : String.valueOf( score ); + this.rank = rank == null ? "" : String.valueOf( rank ); + this.formulae = new ArrayList<>(); + } + + public void addFormula( Formula formula ) { + formulae.add( formula ); + } + + public void setFormulae( List formulae ) { + this.formulae = new ArrayList<>( formulae ); + } + + public List getFormulae() { + return new ArrayList<>( formulae ); + } + +} diff --git a/src/main/java/org/citeplag/basex/types/Qvar.java b/src/main/java/org/citeplag/basex/types/Qvar.java new file mode 100644 index 0000000..1771ab5 --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Qvar.java @@ -0,0 +1,24 @@ +package org.citeplag.basex.types; + +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; + +/** + * Stores qvar in Ntcir format. + * Created by jjl4 on 6/24/15. + */ +@XStreamAlias("qvar") +public class Qvar { + @XStreamAlias("for") + @XStreamAsAttribute + private final String queryQvarID; + + @XStreamAlias("xref") + @XStreamAsAttribute + private final String qvarID; + + public Qvar( String queryQvarID, String qvarID ) { + this.queryQvarID = queryQvarID; + this.qvarID = qvarID; + } +} diff --git a/src/main/java/org/citeplag/basex/types/Result.java b/src/main/java/org/citeplag/basex/types/Result.java new file mode 100644 index 0000000..b4eec17 --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Result.java @@ -0,0 +1,76 @@ +package org.citeplag.basex.types; + +import org.citeplag.basex.types.Hit; +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamImplicit; +import com.thoughtworks.xstream.annotations.XStreamOmitField; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores Result in Ntcir format. + * Created by jjl4 on 6/24/15. + */ +@XStreamAlias("result") +public class Result { + @XStreamAlias("for") + @XStreamAsAttribute + private final String queryID; + + //This is a string so that "" strings are deserialized correctly + @XStreamAlias("runtime") + @XStreamAsAttribute + private String ms; + + @XStreamImplicit + private List hits; + + @XStreamOmitField + private boolean showTime = true; + + public Result( String queryIDNum, Long ms ) { + this.ms = ms == null ? "" : String.valueOf( ms ); + this.queryID = queryIDNum; + this.hits = new ArrayList<>(); + } + + public Result( String queryIDNum ) { + this.queryID = queryIDNum; + this.hits = new ArrayList<>(); + this.ms = ""; + } + + public void setShowTime( boolean showTime ) { + this.showTime = showTime; + } + + public boolean getShowTime() { + return showTime; + } + + public Long getTime() { + return ms != null && ms.isEmpty() ? null : Long.valueOf( ms ); + } + + public void setTime( Long ms ) { + this.ms = ms == null ? "" : String.valueOf( ms ); + } + + public void addHit( Hit hit ) { + hits.add( hit ); + } + + public void setHits( List hits ) { + this.hits = new ArrayList<>( hits ); + } + + public List getHits() { + return new ArrayList( hits ); + } + + public int getNumHits() { + return hits.size(); + } +} diff --git a/src/main/java/org/citeplag/basex/types/Results.java b/src/main/java/org/citeplag/basex/types/Results.java new file mode 100644 index 0000000..746f5c1 --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Results.java @@ -0,0 +1,73 @@ +package org.citeplag.basex.types; + +import org.citeplag.basex.types.Run; +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamImplicit; +import com.thoughtworks.xstream.annotations.XStreamOmitField; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stores results in NTCIR format. + * + * @author Tobias Uhlich + * @author Thanh Phuong Luu + */ +@XStreamAlias("results") +public class Results { + @XStreamImplicit + private List runs; + + @XStreamAlias("xmlns") + @XStreamAsAttribute + private String xmlns="http://ntcir-math.nii.ac.jp/"; + + @XStreamOmitField + private boolean showTime = true; + + public Results() { + this.runs = new ArrayList<>(); + //hack b/c xstream does not support default values + this.xmlns = xmlns; + } + + public Results( List runs ) { + this.runs = new ArrayList( runs ); + this.xmlns = xmlns; + } + + public void setShowTime( boolean showTime ) { + this.showTime = showTime; + if ( runs != null ) { + for ( final Run run : runs ) { + run.setShowTime( showTime ); + } + } + } + + public boolean getShowTime() { + return this.showTime; + } + + public void addRun( Run run ) { + run.setShowTime( showTime ); + this.runs.add( run ); + } + + public void setRuns( List runs ) { + this.runs = new ArrayList<>( runs ); + for ( final Run run : runs ) { + run.setShowTime( showTime ); + } + } + + public List getRuns() { + return runs; + } + + public int getNumRuns() { + return runs.size(); + } +} diff --git a/src/main/java/org/citeplag/basex/types/Run.java b/src/main/java/org/citeplag/basex/types/Run.java new file mode 100644 index 0000000..9bbe124 --- /dev/null +++ b/src/main/java/org/citeplag/basex/types/Run.java @@ -0,0 +1,89 @@ +package org.citeplag.basex.types; + +import org.citeplag.basex.types.Result; +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamImplicit; +import com.thoughtworks.xstream.annotations.XStreamOmitField; + +import java.util.ArrayList; +import java.util.List; + +/** + * Store Run in Ntcir format. + * Created by jjl4 on 6/24/15. + */ +@XStreamAlias("run") +public class Run { + @XStreamAlias("runtag") + @XStreamAsAttribute + private final String runtag; + + //This is a string so that "" strings are deserialized correctly + @XStreamAlias("runtime") + @XStreamAsAttribute + private String ms; + + @XStreamAlias("run_type") + @XStreamAsAttribute + private final String type; + + @XStreamImplicit + private List results; + + @XStreamOmitField + private boolean showTime = true; + + public Run( String runtag, Long ms, String type ) { + this.runtag = runtag; + this.ms = ms == null ? "" : String.valueOf( ms ); + this.type = type; + this.results = new ArrayList<>(); + } + + public Run( String runtag, String type ) { + this.runtag = runtag; + this.type = type; + this.results = new ArrayList<>(); + this.ms = ""; + } + + public void setShowTime( boolean showTime ) { + this.showTime = showTime; + + if ( results != null ) { + for ( final Result result : results ) { + result.setShowTime( showTime ); + } + } + } + + public boolean getShowTime() { + return this.showTime; + } + + public void setTime( Long ms ) { + this.ms = ms == null ? "" : String.valueOf( ms ); + } + + public void addResult( Result result ) { + result.setShowTime( showTime ); + results.add( result ); + } + + public List getResults() { + return new ArrayList<>( results ); + } + + public void setResults( List results ) { + this.results = new ArrayList<>( results ); + + for ( final Result result : results ) { + result.setShowTime( showTime ); + } + } + + public int getNumResults() { + return results.size(); + } +} From c96fc1d74f2ef0fa4f5ddd1078df1f8eeb567226 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Thu, 3 Mar 2022 19:06:31 +0100 Subject: [PATCH 6/9] fixing the basex command --- src/main/java/org/citeplag/basex/Server.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/citeplag/basex/Server.java b/src/main/java/org/citeplag/basex/Server.java index d34e1c6..10d8153 100644 --- a/src/main/java/org/citeplag/basex/Server.java +++ b/src/main/java/org/citeplag/basex/Server.java @@ -56,7 +56,7 @@ public void startup(@NotNull File input) throws IOException { // "-d" for debug baseXServer = new BaseXServer( "-p" + PORT, "-n" + SERVER_NAME, - "-c " + "set mainmem on;set intparse on;create db " + DATABASE_NAME + " " + input.getAbsolutePath()); + "-c " + "set mainmem true\nset intparse on\n create db " + DATABASE_NAME + " " + input.getAbsolutePath()); /* [CG] I dropped all health checks. If something should be going wrong here, please give me a note; * it should definitely be fixed! */ From 174b3f6c58fca4208596a746c047912864e357c7 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Thu, 3 Mar 2022 19:25:48 +0100 Subject: [PATCH 7/9] adding snapshot --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b652590..1a9147d 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ com.formulasearchengine vmext-demo pom - 1.0 + 1.0-SNAPSHOT From 7fe5eb03b5f73a00c219ef2317f1269143d141ac Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Fri, 4 Mar 2022 20:02:47 +0100 Subject: [PATCH 8/9] fix ref in BaseXController --- src/main/java/org/citeplag/controller/BaseXController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/citeplag/controller/BaseXController.java b/src/main/java/org/citeplag/controller/BaseXController.java index cab9731..83414ab 100644 --- a/src/main/java/org/citeplag/controller/BaseXController.java +++ b/src/main/java/org/citeplag/controller/BaseXController.java @@ -1,7 +1,7 @@ package org.citeplag.controller; import com.formulasearchengine.mathosphere.basex.Client; -import com.formulasearchengine.mathosphere.basex.Server; +import org.citeplag.basex.Server; import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; import org.apache.log4j.LogManager; From 8ea038db1d51896ae2862a4882dfd0da77b6a979 Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Fri, 4 Mar 2022 20:07:12 +0100 Subject: [PATCH 9/9] upload working configuration for basex-config * MathController and application.yaml contain temporary fixes --- application.yaml | 4 +- my_harvests/math000000000000.xml | 73 +++++++++++++++++++ .../citeplag/controller/MathController.java | 23 +----- 3 files changed, 76 insertions(+), 24 deletions(-) create mode 100755 my_harvests/math000000000000.xml diff --git a/application.yaml b/application.yaml index 30d6c32..a9f2852 100644 --- a/application.yaml +++ b/application.yaml @@ -1,6 +1,6 @@ server: # servlet-path: /pipe # custom servlet-path - port: 8080 # default server port, if not set otherwise + port: 8097 # default server port, if not set otherwise # Math AST Renderer - Main URL mast.url: https://vmext.wmflabs.org/ @@ -27,7 +27,7 @@ translator: jarPath: "LaCASt/bin/latex-to-cas-translator.jar" basexserver: - harvestPath: "sampleHarvest.xml" + harvestPath: "/home/johannes/Repositories/vmext-demo/my_harvests/" # -db /opt/zbmath/empty-dump/ -tfidf /opt/zbmath/tfidf/ -index zbmath -esMaxHits 200 -minDF 25 -maxDF 50000 formulacloud: diff --git a/my_harvests/math000000000000.xml b/my_harvests/math000000000000.xml new file mode 100755 index 0000000..e2b2446 --- /dev/null +++ b/my_harvests/math000000000000.xml @@ -0,0 +1,73 @@ + + + + + + + E + = + + m + + + c + 2 + + + + + + + 𝐸 + + + 𝑚 + + superscript + 𝑐 + 2 + + + + + {\displaystyle{\displaystyle E=mc^{2}}} + + + + + + + + + E + = + + m + + + c + 2 + + + + + + + 𝐸 + + + 𝑚 + + superscript + 𝑐 + 2 + + + + + {\displaystyle{\displaystyle E=mc^{2}}} + + + + + \ No newline at end of file diff --git a/src/main/java/org/citeplag/controller/MathController.java b/src/main/java/org/citeplag/controller/MathController.java index a66901c..7b76a41 100755 --- a/src/main/java/org/citeplag/controller/MathController.java +++ b/src/main/java/org/citeplag/controller/MathController.java @@ -15,7 +15,6 @@ import gov.nist.drmf.interpreter.common.pojo.SemanticEnhancedAnnotationStatus; import gov.nist.drmf.interpreter.generic.GenericLatexSemanticEnhancer; import gov.nist.drmf.interpreter.generic.SemanticEnhancedDocumentBuilder; -import gov.nist.drmf.interpreter.generic.mediawiki.DefiningFormula; import gov.nist.drmf.interpreter.generic.mlp.pojo.MOIPresentations; import gov.nist.drmf.interpreter.generic.mlp.pojo.SemanticEnhancedDocument; import gov.nist.drmf.interpreter.pom.extensions.PrintablePomTaggedExpression; @@ -213,7 +212,7 @@ public TranslationResponse translation( SemanticLatexTranslator translator = cas.getTranslator(); try { - TranslationFeature feature = experimental ? new GenericReplacementTool() : null; + TranslationFeature feature = experimental ? null : null; TranslationInformation translationInf = translator.translateToObject(latex, label, feature); TranslationResponse tr = new TranslationResponse(); tr.setResult(translationInf.getTranslatedExpression()); @@ -273,26 +272,6 @@ public SemanticEnhancedDocument wikidataLoader( return builder.getDocumentFromWikidataItem(qid); } - @PostMapping( - value = "/suggestWikidataItemDefiningFormula", - produces = {MediaType.APPLICATION_JSON_VALUE} - ) - @ApiOperation( - value = "Suggests defining formula and its elements for a given Wikidata QID" - ) - @ApiResponses( - value = { - @ApiResponse(code = 200, message = "Scored Suggestions", response = DefiningFormula[].class), - @ApiResponse(code = 500, message = "Unable generate suggestions for given Wikidata QID") - } - ) - public List wikidataSuggestor( - @RequestParam() String qid - ) throws MediaWikiApiErrorException, IOException { - SemanticEnhancedDocumentBuilder builder = SemanticEnhancedDocumentBuilder.getDefaultBuilder(); - return builder.enhanceWikidataItem(qid); - } - @PostMapping("/appendTranslationsToDocument") @ApiOperation( value = "Adds translations to semantic LaTeX and CAS to each formulae in the provided document"