From c64a28bd78597eedda103cf1dca6d938d42618a0 Mon Sep 17 00:00:00 2001 From: Ostrzyciel Date: Sat, 31 Jan 2026 21:01:44 +0100 Subject: [PATCH 1/3] It works, let me check test coverage --- .../jelly/cli/command/rdf/RdfFromJelly.scala | 62 ++++++--- .../cli/command/rdf/RdfSerDesCommand.scala | 25 ++-- .../jelly/cli/command/rdf/RdfToJelly.scala | 16 ++- .../cli/command/rdf/util/RdfFormat.scala | 12 ++ .../cli/command/rdf/RdfFromJellySpec.scala | 125 +++++++++++++----- 5 files changed, 165 insertions(+), 75 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index 9c43619..cc1fda3 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -60,6 +60,12 @@ case class RdfFromJellyOptions( "Ignored otherwise. Take care with input size, as this option will load everything into memory.", ) combine: Boolean = false, + @HelpMessage( + "Discard the named graph information, treating the input as triples in the default graph. " + + "This allows you to convert a Jelly file containing quads to Turtle/N-Triples in a lossy manner. " + + "This option has no impact on frame boundaries. To merge frames, use the --combine option.", + ) + mergeGraphs: Boolean = false, @Recurse rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(), ) extends HasJellyCommandOptions @@ -72,8 +78,14 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Writeable] = RdfFromJellyPrint - val defaultAction: (InputStream, OutputStream) => Unit = - (in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang)) + val defaultAction: WriteAction = + (in, out, opt) => + jellyToLang( + in, + StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang), + RdfFormat.NQuads, + opt, + ) private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames") @@ -84,34 +96,34 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ takeFrames val (inputStream, outputStream) = this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile) - parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile) + parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile, options) override def matchFormatToAction( format: RdfFormat.Writeable, - ): Option[(InputStream, OutputStream) => Unit] = + ): Option[WriteAction] = (format, getOptions.combine) match case (j: RdfFormat.Jena.StreamWriteable, _) => - Some((in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang))) + Some((in, out, opt) => + jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang), j, opt), + ) case (j: RdfFormat.Jena.BatchWriteable, true) => - Some((in, out) => - StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x => jellyToLang(in, x)), + Some((in, out, opt) => + StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x => + jellyToLang(in, x, j, opt), + ), ) case (j: RdfFormat.Jena.BatchWriteable, false) => - Some((in, out) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang))) + Some((in, out, opt) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang), j, opt)) case (RdfFormat.JellyText, _) => Some(jellyBinaryToText) /** This method reads the Jelly file, rewrites it to specified format and writes it to some output * stream - * @param jenaLang - * Language that jelly should be converted to - * @param inputStream - * InputStream - * @param outputStream - * OutputStream */ private def jellyToLang( inputStream: InputStream, writer: StreamRDF, + format: RdfFormat, + options: RdfFromJellyOptions, ): Unit = // Whether the output is active at this moment var outputEnabled = false @@ -125,7 +137,16 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ } override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = { - if outputEnabled then writer.quad(Quad.create(graph, subject, predicate, `object`)) + if outputEnabled then + if format.supportsQuads then writer.quad(Quad.create(graph, subject, predicate, `object`)) + else if options.mergeGraphs || Quad.isDefaultGraph(graph) then + writer.triple(Triple.create(subject, predicate, `object`)) + else + throw new CriticalException( + f"Encountered a quad in the input ($subject $predicate ${`object`} $graph), " + + f"but the output format ($format) does not support quads. Either choose a different output format " + + "or use the --merge-graphs option to merge all named graphs into the default graph.", + ) } } @@ -153,13 +174,12 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ /** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output * stream - * @param inputStream - * InputStream - * @param outputStream - * OutputStream */ - private def jellyBinaryToText(inputStream: InputStream, outputStream: OutputStream): Unit = - + private def jellyBinaryToText( + inputStream: InputStream, + outputStream: OutputStream, + opt: RdfFromJellyOptions, + ): Unit = inline def writeFrameToOutput(f: RdfStreamFrame, frameIndex: Int): Unit = // we want to write a comment to the file before each frame val comment = f"# Frame $frameIndex\n" diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfSerDesCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfSerDesCommand.scala index b16072d..274bbf0 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfSerDesCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfSerDesCommand.scala @@ -17,37 +17,28 @@ abstract class RdfSerDesCommand[ F <: RdfFormat: Typeable, ] extends JellyCommand[T]: + type WriteAction = (InputStream, OutputStream, T) => Unit + override final def group = "rdf" /** What is the default action if no formats specified */ - val defaultAction: (InputStream, OutputStream) => Unit + val defaultAction: WriteAction /** The print util responsible for handling the specific formats etc the command requires */ lazy val printUtil: RdfCommandPrintUtil[F] /** The method responsible for matching the format to a given action */ - def matchFormatToAction(format: F): Option[(InputStream, OutputStream) => Unit] + def matchFormatToAction(format: F): Option[WriteAction] /** This method takes care of proper error handling and takes care of the parameter priorities in * matching the input to a given format conversion - * - * @param inputStream - * InputStream - * @param outputStream - * OutputStream - * @param format - * Option[String] - * @param fileName - * Option[String] - * @throws JellyDeserializationError - * @throws JenaRiotException - * @throws InvalidJellyFile */ final def parseFormatArgs( inputStream: InputStream, outputStream: OutputStream, format: Option[String], fileName: Option[String], + opt: T, ): Unit = try { val explicitFormat = if (format.isDefined) RdfFormat.find(format.get) else None @@ -55,14 +46,14 @@ abstract class RdfSerDesCommand[ if (fileName.isDefined) RdfFormat.inferFormat(fileName.get) else None (explicitFormat, implicitFormat) match { case (Some(f: F), _) => - matchFormatToAction(f).get(inputStream, outputStream) + matchFormatToAction(f).get(inputStream, outputStream, opt) // If format explicitly defined but does not match any available actions or formats, we throw an error case (_, _) if format.isDefined => throw InvalidFormatSpecified(format.get, printUtil.validFormatsString) case (_, Some(f: F)) => - matchFormatToAction(f).get(inputStream, outputStream) + matchFormatToAction(f).get(inputStream, outputStream, opt) // If format not explicitly defined but implicitly not understandable we default to this - case (_, _) => defaultAction(inputStream, outputStream) + case (_, _) => defaultAction(inputStream, outputStream, opt) } } catch case e: RiotException => diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 84fd30b..4183eb1 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -76,8 +76,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint - val defaultAction: (InputStream, OutputStream) => Unit = - langToJelly(RdfFormat.NQuads, _, _) + val defaultAction: WriteAction = + langToJelly(RdfFormat.NQuads, _, _, _) private def loadOptionsFromFile(filename: String): RdfStreamOptions = val inputStream = new FileInputStream(filename) @@ -108,13 +108,14 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable outputStream, options.inputFormat, remainingArgs.remaining.headOption, + options, ) if !isQuietMode then checkAndWarnTypeCombination() override def matchFormatToAction( format: RdfFormat.Readable, - ): Option[(InputStream, OutputStream) => Unit] = format match { - case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _)) + ): Option[WriteAction] = format match { + case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _, _)) case f: RdfFormat.JellyText.type => Some(jellyTextToJelly) } @@ -130,6 +131,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable format: RdfFormat.Jena, inputStream: InputStream, outputStream: OutputStream, + opt: RdfToJellyOptions, ): Unit = val jellyOpt = getOptions.jellySerializationOptions.asRdfStreamOptions // Configure the writer @@ -202,7 +204,11 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable * @param outputStream * Jelly binary output stream */ - private def jellyTextToJelly(inputStream: InputStream, outputStream: OutputStream): Unit = + private def jellyTextToJelly( + inputStream: InputStream, + outputStream: OutputStream, + opt: RdfToJellyOptions, + ): Unit = if !isQuietMode then printLine( "WARNING: The Jelly text format is not stable and may change in incompatible " + diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala index 3aae663..1403e26 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala @@ -7,6 +7,8 @@ sealed trait RdfFormat: val fullName: String val cliOptions: List[String] val supportsBaseIri: Boolean + val supportsQuads: Boolean + override final def toString: String = fullName object RdfFormat: @@ -31,6 +33,7 @@ object RdfFormat: override val cliOptions: List[String] = List("nq", "nquads") override val jenaLang: Lang = RDFLanguages.NQUADS override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = true case object NTriples extends RdfFormat.Jena.StreamWriteable, @@ -40,18 +43,21 @@ object RdfFormat: override val cliOptions: List[String] = List("nt", "ntriples") override val jenaLang: Lang = RDFLanguages.NTRIPLES override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = false case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: override val fullName: String = "Turtle" override val cliOptions: List[String] = List("ttl", "turtle") override val jenaLang: Lang = RDFLanguages.TURTLE override val supportsBaseIri: Boolean = true + override val supportsQuads: Boolean = false case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: override val fullName: String = "TriG" override val cliOptions: List[String] = List("trig") override val jenaLang: Lang = RDFLanguages.TRIG override val supportsBaseIri: Boolean = true + override val supportsQuads: Boolean = true case object RdfProto extends RdfFormat.Jena.StreamWriteable, @@ -61,6 +67,7 @@ object RdfFormat: override val cliOptions: List[String] = List("jenaproto", "jena-proto") override val jenaLang: Lang = RDFLanguages.RDFPROTO override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = true case object Thrift extends RdfFormat.Jena.StreamWriteable, @@ -70,18 +77,21 @@ object RdfFormat: override val cliOptions: List[String] = List("jenathrift", "jena-thrift") override val jenaLang: Lang = RDFLanguages.RDFTHRIFT override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = true case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable: override val fullName: String = "RDF/XML" override val cliOptions: List[String] = List("rdfxml", "rdf-xml") override val jenaLang: Lang = RDFLanguages.RDFXML override val supportsBaseIri: Boolean = true + override val supportsQuads: Boolean = false case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable: override val fullName: String = "JSON-LD" override val cliOptions: List[String] = List("jsonld", "json-ld") override val jenaLang: Lang = RDFLanguages.JSONLD override val supportsBaseIri: Boolean = true + override val supportsQuads: Boolean = true // We do not ever want to write or read from Jelly to Jelly // So better not have it as Writeable or Readable, just mark that it's integrated into Jena @@ -90,6 +100,7 @@ object RdfFormat: override val cliOptions: List[String] = List("jelly") override val jenaLang: Lang = JellyLanguage.JELLY override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = true case object JellyText extends RdfFormat, @@ -100,6 +111,7 @@ object RdfFormat: override val cliOptions: List[String] = List("jelly-text") val extension = ".jelly.txt" override val supportsBaseIri: Boolean = false + override val supportsQuads: Boolean = true private val rdfFormats: List[RdfFormat] = List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala index f108687..22ad6fa 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala @@ -4,15 +4,16 @@ import com.google.protobuf.InvalidProtocolBufferException import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.helpers.* import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat +import eu.neverblink.jelly.convert.jena.riot.JellyFormat import eu.neverblink.jelly.core.proto.v1.{PhysicalStreamType, RdfStreamFrame} import eu.neverblink.jelly.core.{JellyOptions, JellyTranscoderFactory} import org.apache.jena.query.DatasetFactory import org.apache.jena.rdf.model.ModelFactory -import org.apache.jena.riot.RDFDataMgr +import org.apache.jena.riot.{RDFDataMgr, RDFWriter} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, FileOutputStream} import java.nio.file.attribute.PosixFilePermissions import java.nio.file.{Files, Paths} import scala.io.Source @@ -223,6 +224,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper: for (lang, header) <- Seq( (RdfFormat.JsonLd, "\\{\n {4}\"@graph\":".r), (RdfFormat.RdfXml, " + RDFWriter + .source(testDs) + .format(JellyFormat.JELLY_BIG_ALL_FEATURES) + .build() + .output(FileOutputStream(File(j))) + + val e = intercept[ExitException] { + RdfFromJelly.runTestCommand( + List( + "rdf", + "from-jelly", + j, + "--out-format", + format.cliOptions.head, + ), + ) + } + val cause = e.getCause.asInstanceOf[CriticalException] + cause.getMessage should include(format.toString) + cause.getMessage should include("Encountered a quad") + cause.getMessage should include("--merge-graphs") + } + } + + f"merge graphs when converting to $format with the --merge-graphs option" in { + withEmptyJellyFile { j => + RDFWriter + .source(testDs) + .format(JellyFormat.JELLY_BIG_ALL_FEATURES) + .build() + .output(FileOutputStream(File(j))) + + val (out, err) = RdfFromJelly.runTestCommand( + List( + "rdf", + "from-jelly", + j, + "--out-format", + format.cliOptions.head, + "--merge-graphs", + ), + ) + + val newModel = ModelFactory.createDefaultModel() + RDFDataMgr.read(newModel, new ByteArrayInputStream(out.getBytes()), format.jenaLang) + flattenedModel.isIsomorphicWith(newModel) shouldBe true + } + } + } } From 89cc175d0d748b5a8bc7dcb092997174db021481 Mon Sep 17 00:00:00 2001 From: Ostrzyciel Date: Sat, 31 Jan 2026 21:06:43 +0100 Subject: [PATCH 2/3] Doc fixes --- .../jelly/cli/command/rdf/RdfFromJelly.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index cc1fda3..f9352dc 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -31,11 +31,11 @@ object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]: "If no input file is specified, the input is read from stdin.\n" + "If no output file is specified, the output is written to stdout.\n" + "If an error is detected, the program will exit with a non-zero code.\n" + - "Otherwise, the program will exit with code 0.\n" + + "Otherwise, the program will exit with code 0.\n\n" + "Note: this command works in a streaming manner where possible and scales well to\n" + "large files. Non-streaming formats (e.g. RDF/XML) by default work on a\n" + "frame-by-frame basis, but they can be combined into one dataset with the\n" + - "--combine option. RDF/XML will only serialize the default model.", + "--combine option.", ) @ArgsName("") case class RdfFromJellyOptions( @@ -56,14 +56,16 @@ case class RdfFromJellyOptions( ) takeFrames: String = "", @HelpMessage( - "Add to combine the results into one dataset, when using a non-streaming output format. " + - "Ignored otherwise. Take care with input size, as this option will load everything into memory.", + "Add to combine all stream frames into one dataset, when using a non-streaming output format. " + + "Ignored otherwise. Take care with input size, as this option will load everything into memory. " + + "Default: false.", ) combine: Boolean = false, @HelpMessage( "Discard the named graph information, treating the input as triples in the default graph. " + "This allows you to convert a Jelly file containing quads to Turtle/N-Triples in a lossy manner. " + - "This option has no impact on frame boundaries. To merge frames, use the --combine option.", + "This option has no impact on frame boundaries. To merge frames, use the --combine option. " + + "Default: false.", ) mergeGraphs: Boolean = false, @Recurse From 562a2fa5f94df8837f57037bd74079b39e3af329 Mon Sep 17 00:00:00 2001 From: Ostrzyciel Date: Sat, 31 Jan 2026 21:51:27 +0100 Subject: [PATCH 3/3] fix behavior inconsistency --- .../eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala | 6 ++++-- .../jelly/cli/command/rdf/RdfFromJellySpec.scala | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index f9352dc..6487222 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -140,8 +140,10 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = { if outputEnabled then - if format.supportsQuads then writer.quad(Quad.create(graph, subject, predicate, `object`)) - else if options.mergeGraphs || Quad.isDefaultGraph(graph) then + if options.mergeGraphs then writer.triple(Triple.create(subject, predicate, `object`)) + else if format.supportsQuads then + writer.quad(Quad.create(graph, subject, predicate, `object`)) + else if Quad.isDefaultGraph(graph) then writer.triple(Triple.create(subject, predicate, `object`)) else throw new CriticalException( diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala index 22ad6fa..c5c7b99 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala @@ -440,6 +440,13 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper: } } + val allFormats = tripleFormats ++ Seq( + RdfFormat.TriG, + RdfFormat.JsonLd, + RdfFormat.NQuads, + ) + // --merge-graphs should also work for formats supporting quads + for format <- allFormats do f"merge graphs when converting to $format with the --merge-graphs option" in { withEmptyJellyFile { j => RDFWriter