Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]:
"If no input file is specified, the input is read from stdin.\n" +
"If no output file is specified, the output is written to stdout.\n" +
"If an error is detected, the program will exit with a non-zero code.\n" +
"Otherwise, the program will exit with code 0.\n" +
"Otherwise, the program will exit with code 0.\n\n" +
"Note: this command works in a streaming manner where possible and scales well to\n" +
"large files. Non-streaming formats (e.g. RDF/XML) by default work on a\n" +
"frame-by-frame basis, but they can be combined into one dataset with the\n" +
"--combine option. RDF/XML will only serialize the default model.",
"--combine option.",
)
@ArgsName("<file-to-convert>")
case class RdfFromJellyOptions(
Expand All @@ -56,10 +56,18 @@ case class RdfFromJellyOptions(
)
takeFrames: String = "",
@HelpMessage(
"Add to combine the results into one dataset, when using a non-streaming output format. " +
"Ignored otherwise. Take care with input size, as this option will load everything into memory.",
"Add to combine all stream frames into one dataset, when using a non-streaming output format. " +
"Ignored otherwise. Take care with input size, as this option will load everything into memory. " +
"Default: false.",
)
combine: Boolean = false,
@HelpMessage(
"Discard the named graph information, treating the input as triples in the default graph. " +
"This allows you to convert a Jelly file containing quads to Turtle/N-Triples in a lossy manner. " +
"This option has no impact on frame boundaries. To merge frames, use the --combine option. " +
"Default: false.",
)
mergeGraphs: Boolean = false,
@Recurse
rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(),
) extends HasJellyCommandOptions
Expand All @@ -72,8 +80,14 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ

lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Writeable] = RdfFromJellyPrint

val defaultAction: (InputStream, OutputStream) => Unit =
(in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang))
val defaultAction: WriteAction =
(in, out, opt) =>
jellyToLang(
in,
StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang),
RdfFormat.NQuads,
opt,
)

private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames")

Expand All @@ -84,34 +98,34 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
takeFrames
val (inputStream, outputStream) =
this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile)
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile)
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile, options)

override def matchFormatToAction(
format: RdfFormat.Writeable,
): Option[(InputStream, OutputStream) => Unit] =
): Option[WriteAction] =
(format, getOptions.combine) match
case (j: RdfFormat.Jena.StreamWriteable, _) =>
Some((in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang)))
Some((in, out, opt) =>
jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang), j, opt),
)
case (j: RdfFormat.Jena.BatchWriteable, true) =>
Some((in, out) =>
StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x => jellyToLang(in, x)),
Some((in, out, opt) =>
StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x =>
jellyToLang(in, x, j, opt),
),
)
case (j: RdfFormat.Jena.BatchWriteable, false) =>
Some((in, out) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang)))
Some((in, out, opt) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang), j, opt))
case (RdfFormat.JellyText, _) => Some(jellyBinaryToText)

/** This method reads the Jelly file, rewrites it to specified format and writes it to some output
* stream
* @param jenaLang
* Language that jelly should be converted to
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
*/
private def jellyToLang(
inputStream: InputStream,
writer: StreamRDF,
format: RdfFormat,
options: RdfFromJellyOptions,
): Unit =
// Whether the output is active at this moment
var outputEnabled = false
Expand All @@ -125,7 +139,18 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
}

override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = {
if outputEnabled then writer.quad(Quad.create(graph, subject, predicate, `object`))
if outputEnabled then
if options.mergeGraphs then writer.triple(Triple.create(subject, predicate, `object`))
else if format.supportsQuads then
writer.quad(Quad.create(graph, subject, predicate, `object`))
else if Quad.isDefaultGraph(graph) then
writer.triple(Triple.create(subject, predicate, `object`))
else
throw new CriticalException(
f"Encountered a quad in the input ($subject $predicate ${`object`} $graph), " +
f"but the output format ($format) does not support quads. Either choose a different output format " +
"or use the --merge-graphs option to merge all named graphs into the default graph.",
)
}
}

Expand Down Expand Up @@ -153,13 +178,12 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ

/** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output
* stream
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
*/
private def jellyBinaryToText(inputStream: InputStream, outputStream: OutputStream): Unit =

private def jellyBinaryToText(
inputStream: InputStream,
outputStream: OutputStream,
opt: RdfFromJellyOptions,
): Unit =
inline def writeFrameToOutput(f: RdfStreamFrame, frameIndex: Int): Unit =
// we want to write a comment to the file before each frame
val comment = f"# Frame $frameIndex\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,43 @@ abstract class RdfSerDesCommand[
F <: RdfFormat: Typeable,
] extends JellyCommand[T]:

type WriteAction = (InputStream, OutputStream, T) => Unit

override final def group = "rdf"

/** What is the default action if no formats specified */
val defaultAction: (InputStream, OutputStream) => Unit
val defaultAction: WriteAction

/** The print util responsible for handling the specific formats etc the command requires */
lazy val printUtil: RdfCommandPrintUtil[F]

/** The method responsible for matching the format to a given action */
def matchFormatToAction(format: F): Option[(InputStream, OutputStream) => Unit]
def matchFormatToAction(format: F): Option[WriteAction]

/** This method takes care of proper error handling and takes care of the parameter priorities in
* matching the input to a given format conversion
*
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
* @param format
* Option[String]
* @param fileName
* Option[String]
* @throws JellyDeserializationError
* @throws JenaRiotException
* @throws InvalidJellyFile
*/
final def parseFormatArgs(
inputStream: InputStream,
outputStream: OutputStream,
format: Option[String],
fileName: Option[String],
opt: T,
): Unit =
try {
val explicitFormat = if (format.isDefined) RdfFormat.find(format.get) else None
val implicitFormat =
if (fileName.isDefined) RdfFormat.inferFormat(fileName.get) else None
(explicitFormat, implicitFormat) match {
case (Some(f: F), _) =>
matchFormatToAction(f).get(inputStream, outputStream)
matchFormatToAction(f).get(inputStream, outputStream, opt)
// If format explicitly defined but does not match any available actions or formats, we throw an error
case (_, _) if format.isDefined =>
throw InvalidFormatSpecified(format.get, printUtil.validFormatsString)
case (_, Some(f: F)) =>
matchFormatToAction(f).get(inputStream, outputStream)
matchFormatToAction(f).get(inputStream, outputStream, opt)
// If format not explicitly defined but implicitly not understandable we default to this
case (_, _) => defaultAction(inputStream, outputStream)
case (_, _) => defaultAction(inputStream, outputStream, opt)
}
} catch
case e: RiotException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable

lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint

val defaultAction: (InputStream, OutputStream) => Unit =
langToJelly(RdfFormat.NQuads, _, _)
val defaultAction: WriteAction =
langToJelly(RdfFormat.NQuads, _, _, _)

private def loadOptionsFromFile(filename: String): RdfStreamOptions =
val inputStream = new FileInputStream(filename)
Expand Down Expand Up @@ -108,13 +108,14 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
outputStream,
options.inputFormat,
remainingArgs.remaining.headOption,
options,
)
if !isQuietMode then checkAndWarnTypeCombination()

override def matchFormatToAction(
format: RdfFormat.Readable,
): Option[(InputStream, OutputStream) => Unit] = format match {
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _))
): Option[WriteAction] = format match {
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _, _))
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
}

Expand All @@ -130,6 +131,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
format: RdfFormat.Jena,
inputStream: InputStream,
outputStream: OutputStream,
opt: RdfToJellyOptions,
): Unit =
val jellyOpt = getOptions.jellySerializationOptions.asRdfStreamOptions
// Configure the writer
Expand Down Expand Up @@ -202,7 +204,11 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
* @param outputStream
* Jelly binary output stream
*/
private def jellyTextToJelly(inputStream: InputStream, outputStream: OutputStream): Unit =
private def jellyTextToJelly(
inputStream: InputStream,
outputStream: OutputStream,
opt: RdfToJellyOptions,
): Unit =
if !isQuietMode then
printLine(
"WARNING: The Jelly text format is not stable and may change in incompatible " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sealed trait RdfFormat:
val fullName: String
val cliOptions: List[String]
val supportsBaseIri: Boolean
val supportsQuads: Boolean
override final def toString: String = fullName

object RdfFormat:

Expand All @@ -31,6 +33,7 @@ object RdfFormat:
override val cliOptions: List[String] = List("nq", "nquads")
override val jenaLang: Lang = RDFLanguages.NQUADS
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = true

case object NTriples
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -40,18 +43,21 @@ object RdfFormat:
override val cliOptions: List[String] = List("nt", "ntriples")
override val jenaLang: Lang = RDFLanguages.NTRIPLES
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = false

case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "Turtle"
override val cliOptions: List[String] = List("ttl", "turtle")
override val jenaLang: Lang = RDFLanguages.TURTLE
override val supportsBaseIri: Boolean = true
override val supportsQuads: Boolean = false

case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "TriG"
override val cliOptions: List[String] = List("trig")
override val jenaLang: Lang = RDFLanguages.TRIG
override val supportsBaseIri: Boolean = true
override val supportsQuads: Boolean = true

case object RdfProto
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -61,6 +67,7 @@ object RdfFormat:
override val cliOptions: List[String] = List("jenaproto", "jena-proto")
override val jenaLang: Lang = RDFLanguages.RDFPROTO
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = true

case object Thrift
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -70,18 +77,21 @@ object RdfFormat:
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = true

case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "RDF/XML"
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
override val jenaLang: Lang = RDFLanguages.RDFXML
override val supportsBaseIri: Boolean = true
override val supportsQuads: Boolean = false

case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "JSON-LD"
override val cliOptions: List[String] = List("jsonld", "json-ld")
override val jenaLang: Lang = RDFLanguages.JSONLD
override val supportsBaseIri: Boolean = true
override val supportsQuads: Boolean = true

// We do not ever want to write or read from Jelly to Jelly
// So better not have it as Writeable or Readable, just mark that it's integrated into Jena
Expand All @@ -90,6 +100,7 @@ object RdfFormat:
override val cliOptions: List[String] = List("jelly")
override val jenaLang: Lang = JellyLanguage.JELLY
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = true

case object JellyText
extends RdfFormat,
Expand All @@ -100,6 +111,7 @@ object RdfFormat:
override val cliOptions: List[String] = List("jelly-text")
val extension = ".jelly.txt"
override val supportsBaseIri: Boolean = false
override val supportsQuads: Boolean = true

private val rdfFormats: List[RdfFormat] =
List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd)
Expand Down
Loading