Commit cc28095f authored by Yann Mombrun's avatar Yann Mombrun

Fix #10 and #9. Do not remove all native content

Let the tmpDelete flag be configurable.
Move some part of the code to better remove temporary files.
Do not remove content that should not (only remove normalised pdf)
parent 2e577282
Pipeline #1920 passed with stages
in 3 minutes and 12 seconds
......@@ -156,7 +156,7 @@ public class PopplerPdfToHtml {
final ExecuteWatchdog watchdog = new ExecuteWatchdog(120 * 1000);
final Executor executor = new DefaultExecutor();
final String result;
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
executor.setStreamHandler(new PumpStreamHandler(baos));
if (!outputDir.exists() && !outputDir.mkdirs()) {
......
......@@ -30,6 +30,7 @@ import java.util.List;
import java.util.Map.Entry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.LogFactory;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
......@@ -91,6 +92,7 @@ public final class PopplerNormaliserTest {
* Exception tests
*
* @throws Exception
* If anything wrong occurs... And it should!
*/
@Test(expected = InvalidParameterException.class)
public void testNullProcessArgs() throws Exception {
......@@ -102,6 +104,7 @@ public final class PopplerNormaliserTest {
* Exception tests
*
* @throws Exception
* If anything wrong occurs... And it should!
*/
@Test(expected = InvalidParameterException.class)
public void testNullResourceProcessArgs() throws Exception {
......@@ -109,6 +112,10 @@ public final class PopplerNormaliserTest {
}
/**
* @throws Exception
* If anything wrong occurs... And it should!
*/
@Test(expected = ContentNotAvailableException.class)
public void testNoContent() throws Exception {
final Document doc = new WebLabMarshaller().unmarshal(new File("src/test/resources/corpus/resourceTest.xml"), Document.class);
......@@ -118,7 +125,10 @@ public final class PopplerNormaliserTest {
}
/**
* @throws Exception
* If anything wrong occurs... And it should!
*/
@Test(expected = ContentNotAvailableException.class)
public void testBadContentMime() throws Exception {
final Document doc = new WebLabMarshaller().unmarshal(new File("src/test/resources/corpus/resourceTest.xml"), Document.class);
......@@ -133,6 +143,10 @@ public final class PopplerNormaliserTest {
}
/**
* @throws Exception
* If anything wrong occurs...
*/
@Test
public void testWithContent() throws Exception {
try {
......@@ -153,38 +167,60 @@ public final class PopplerNormaliserTest {
InsufficientResourcesException, InvalidParameterException, ServiceNotConfiguredException, UnexpectedException, UnsupportedRequestException {
final Document doc = new WebLabMarshaller().unmarshal(new File("src/test/resources/corpus/resourceTest.xml"), Document.class);
final URI uri;
try (FileInputStream input = new FileInputStream(inputFile)) {
uri = ContentManager.getInstance().create(input);
final URI validContentKept; // PDF file to be used and not removed
try (final FileInputStream input = new FileInputStream(inputFile)) {
validContentKept = ContentManager.getInstance().create(input);
}
final URI invalidContentKept; // Non PDF file to let poppler fail if used, should still exists after test
try (final FileInputStream input = new FileInputStream(new File("pom.xml"))) {
invalidContentKept = ContentManager.getInstance().create(input);
}
final URI validContentRemoved; // PDf file to be used and removed
try (final FileInputStream input = new FileInputStream(inputFile)) {
validContentRemoved = ContentManager.getInstance().create(input);
}
final WProcessingAnnotator wpa = new WProcessingAnnotator(doc);
final DublinCoreAnnotator dca = new DublinCoreAnnotator(doc);
switch (formatOption) {
case 1: {
new WProcessingAnnotator(doc).writeNormalisedContent(uri);
final DublinCoreAnnotator dca = new DublinCoreAnnotator(doc);
dca.writeFormat("application/pdf");
dca.startInnerAnnotatorOn(uri);
wpa.writeNativeContent(invalidContentKept);
wpa.writeNormalisedContent(validContentRemoved);
dca.startInnerAnnotatorOn(validContentRemoved);
dca.writeFormat("application/pdf");
wpa.writeNormalisedContent(invalidContentKept);
dca.startInnerAnnotatorOn(invalidContentKept);
dca.writeFormat("application/xml");
LogFactory.getLog(this.getClass()).debug(validContentRemoved + " should be used and removed.");
}
break;
case 2: {
new WProcessingAnnotator(doc).writeNativeContent(uri);
final DublinCoreAnnotator dca = new DublinCoreAnnotator(doc);
wpa.writeNativeContent(validContentKept);
dca.writeFormat("application/pdf");
dca.startInnerAnnotatorOn(uri);
dca.startInnerAnnotatorOn(validContentKept);
dca.writeFormat("application/pdf");
wpa.writeNormalisedContent(invalidContentKept);
ContentManager.getInstance().delete(validContentRemoved);
LogFactory.getLog(this.getClass()).debug(validContentKept + " should be used and kept.");
}
break;
case 3: {
new WProcessingAnnotator(doc).writeNormalisedContent(uri);
final DublinCoreAnnotator dca = new DublinCoreAnnotator(doc);
dca.startInnerAnnotatorOn(uri);
wpa.writeNativeContent(invalidContentKept);
wpa.writeNormalisedContent(validContentRemoved);
wpa.writeNormalisedContent(validContentKept);
dca.startInnerAnnotatorOn(validContentKept);
dca.writeFormat("application/xml");
LogFactory.getLog(this.getClass()).debug(validContentRemoved + " should be used and removed.");
}
break;
case 4: {
new WProcessingAnnotator(doc).writeNativeContent(uri);
final DublinCoreAnnotator dca = new DublinCoreAnnotator(doc);
dca.startInnerAnnotatorOn(uri);
wpa.writeNativeContent(validContentKept);
wpa.writeNormalisedContent(invalidContentKept);
dca.startInnerAnnotatorOn(invalidContentKept);
dca.writeFormat("application/xml");
ContentManager.getInstance().delete(validContentRemoved);
LogFactory.getLog(this.getClass()).debug(validContentKept + " should be used and kept.");
}
break;
default:
......@@ -208,6 +244,10 @@ public final class PopplerNormaliserTest {
Assert.assertTrue(entry.getKey().getStart() >= 0);
Assert.assertTrue(entry.getKey().getEnd() < ((Text) entry.getValue()).getContent().length());
}
Assert.assertNotNull(ContentManager.getInstance().readLocalExistingFile(validContentKept, doc, null));
Assert.assertNotNull(ContentManager.getInstance().readLocalExistingFile(invalidContentKept, doc, null));
Assert.assertNull(ContentManager.getInstance().readLocalExistingFile(validContentRemoved, doc, null));
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment