Skip to content

Commit

Permalink
#20 Extract entries async
Browse files Browse the repository at this point in the history
  • Loading branch information
oleg-cherednik committed Dec 29, 2024
1 parent 41d6959 commit 8853980
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 77 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package ru.olegcherednik.zip4jvm.engine.unzip;

import ru.olegcherednik.zip4jvm.io.in.DataInput;

import lombok.RequiredArgsConstructor;
import org.apache.commons.io.IOUtils;

import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.Supplier;

/**
* @param <T> {@link DataInput} definition
* @author Oleg Cherednik
* @since 28.12.2024
*/
@RequiredArgsConstructor
public class DataInputThreadLocal<T extends DataInput> extends ThreadLocal<T> {

private final Supplier<T> dataInputSup;
private final List<T> dataInputs = new CopyOnWriteArrayList<>();

public void release() {
dataInputs.forEach(IOUtils::closeQuietly);
dataInputs.clear();
}

// ---------- ThreadLocal ----------

@Override
public T get() {
T in = super.get();

if (in == null) {
in = dataInputSup.get();
set(in);
dataInputs.add(in);
}

return in;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
import ru.olegcherednik.zip4jvm.model.settings.UnzipSettings;
import ru.olegcherednik.zip4jvm.model.src.SrcZip;
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -47,7 +47,19 @@ public final class UnzipEngine implements ZipFile.Reader {
public UnzipEngine(SrcZip srcZip, UnzipSettings settings) {
PasswordProvider passwordProvider = settings.getPasswordProvider();
zipModel = ZipModelBuilder.read(srcZip, settings.getCharsetCustomizer(), passwordProvider);
unzipExtractEngine = new UnzipExtractEngine(passwordProvider, zipModel);
unzipExtractEngine = createUnzipExtractEngine(settings, zipModel);

System.out.println(unzipExtractEngine.getClass().getSimpleName());
}

private static UnzipExtractEngine createUnzipExtractEngine(UnzipSettings settings, ZipModel zipModel) {
PasswordProvider passwordProvider = settings.getPasswordProvider();

if (settings.getAsyncThreads() == UnzipSettings.ASYNC_THREADS_OFF)
return new UnzipExtractEngine(passwordProvider, zipModel);

int totalThreads = settings.getAsyncThreads();
return new UnzipExtractAsyncEngine(passwordProvider, zipModel, totalThreads);
}

// ---------- ZipFile.Reader ----------
Expand Down Expand Up @@ -105,9 +117,9 @@ public ZipFile.Entry next() {
};
}

public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) throws IOException {
return srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
: new SplitRandomAccessDataInput(srcZip);
public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) {
return Quietly.doRuntime(() -> srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
: new SplitRandomAccessDataInput(srcZip));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package ru.olegcherednik.zip4jvm.engine.unzip;

import ru.olegcherednik.zip4jvm.io.in.file.consecutive.ConsecutiveAccessDataInput;
import ru.olegcherednik.zip4jvm.model.ZipModel;
import ru.olegcherednik.zip4jvm.model.entry.ZipEntry;
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;
import ru.olegcherednik.zip4jvm.utils.quitely.functions.RunnableWithException;

import java.nio.file.Path;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinWorkerThread;
import java.util.concurrent.atomic.AtomicInteger;

/**
* @author Oleg Cherednik
* @since 28.12.2024
*/
public class UnzipExtractAsyncEngine extends UnzipExtractEngine {

protected final int totalThreads;

public UnzipExtractAsyncEngine(PasswordProvider passwordProvider, ZipModel zipModel, int totalThreads) {
super(passwordProvider, zipModel);
this.totalThreads = totalThreads;
}

// ---------- UnzipExtractEngine ----------

@Override
protected void extractEntry(Path dstDir, Map<String, String> map) {
List<CompletableFuture<Void>> tasks = new LinkedList<>();
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();

DataInputThreadLocal<ConsecutiveAccessDataInput> threadLocalDataInput =
new DataInputThreadLocal<>(this::createConsecutiveDataInput);
ExecutorService executor = createExecutor();

try {
while (it.hasNext()) {
ZipEntry zipEntry = it.next();

if (map != null && !map.containsKey(zipEntry.getFileName()))
continue;

String fileName = Optional.ofNullable(map)
.map(m -> m.get(zipEntry.getFileName()))
.orElse(zipEntry.getFileName());
Path file = dstDir.resolve(fileName);

CompletableFuture<Void> task = createCompletableFuture(
() -> extractEntry(file, zipEntry, threadLocalDataInput.get()), executor);

tasks.add(task);
}

tasks.forEach(CompletableFuture::join);
} finally {
threadLocalDataInput.release();
executor.shutdown();
}
}

protected ExecutorService createExecutor() {
AtomicInteger counter = new AtomicInteger();
int parallelism = totalThreads <= 0 ? Runtime.getRuntime().availableProcessors() : totalThreads;
String format = String.format("zip4jvm-extract-%%0%dd", String.valueOf(parallelism).length());

ForkJoinPool.ForkJoinWorkerThreadFactory factory = pool -> {
ForkJoinWorkerThread thread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
thread.setName(String.format(format, counter.incrementAndGet()));
return thread;
};

return new ForkJoinPool(parallelism, factory, null, false);
}

protected CompletableFuture<Void> createCompletableFuture(RunnableWithException task, Executor executor) {
return CompletableFuture.runAsync(() -> Quietly.doRuntime(task), executor);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
import ru.olegcherednik.zip4jvm.model.src.SrcZip;
import ru.olegcherednik.zip4jvm.utils.ZipUtils;
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;
import ru.olegcherednik.zip4jvm.utils.time.DosTimestampConverterUtils;

import lombok.RequiredArgsConstructor;
Expand Down Expand Up @@ -105,31 +106,31 @@ protected List<ZipEntry> getEntriesByPrefix(String prefix) {
.collect(Collectors.toList());
}

// ----------

protected void extractEntry(Path dstDir, Map<String, String> map) {
try (ConsecutiveAccessDataInput in = createConsecutiveDataInput(zipModel.getSrcZip())) {
try (ConsecutiveAccessDataInput in = createConsecutiveDataInput()) {
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();

while (it.hasNext()) {
ZipEntry zipEntry = it.next();

if (map == null || map.containsKey(zipEntry.getFileName())) {
in.seekForward(zipEntry.getLocalFileHeaderAbsOffs());
if (map != null && !map.containsKey(zipEntry.getFileName()))
continue;

String fileName = Optional.ofNullable(map)
.map(m -> m.get(zipEntry.getFileName()))
.orElse(zipEntry.getFileName());
Path file = dstDir.resolve(fileName);

String fileName = Optional.ofNullable(map)
.map(m -> m.get(zipEntry.getFileName()))
.orElse(zipEntry.getFileName());
Path file = dstDir.resolve(fileName);
extractEntry(file, zipEntry, in);
}
extractEntry(file, zipEntry, in);
}
} catch (IOException e) {
throw new Zip4jvmException(e);
}
}

protected void extractEntry(Path file, ZipEntry zipEntry, DataInput in) throws IOException {
protected void extractEntry(Path file, ZipEntry zipEntry, ConsecutiveAccessDataInput in) throws IOException {
in.seekForward(zipEntry.getLocalFileHeaderAbsOffs());

if (zipEntry.isSymlink())
extractSymlink(file, zipEntry, in);
else if (zipEntry.isDirectory())
Expand All @@ -142,7 +143,7 @@ else if (zipEntry.isDirectory())
setFileLastModifiedTime(file, zipEntry);
}

protected static void extractSymlink(Path symlink, ZipEntry zipEntry, DataInput in) throws IOException {
protected void extractSymlink(Path symlink, ZipEntry zipEntry, DataInput in) throws IOException {
String target = IOUtils.toString(zipEntry.createInputStream(in), Charsets.UTF_8);

if (target.startsWith("/"))
Expand All @@ -154,7 +155,7 @@ else if (target.contains(":"))
ZipSymlinkEngine.createRelativeSymlink(symlink, symlink.getParent().resolve(target));
}

protected static void extractEmptyDirectory(Path dir) throws IOException {
protected void extractEmptyDirectory(Path dir) throws IOException {
Files.createDirectories(dir);
}

Expand All @@ -164,17 +165,24 @@ protected void extractRegularFile(Path file, ZipEntry zipEntry, DataInput in) th
ZipUtils.copyLarge(zipEntry.createInputStream(in), getOutputStream(file));
}

protected static void setFileAttributes(Path path, ZipEntry zipEntry) throws IOException {
public ConsecutiveAccessDataInput createConsecutiveDataInput() {
SrcZip srcZip = zipModel.getSrcZip();

return Quietly.doRuntime(() -> srcZip.isSolid() ? new SolidConsecutiveAccessDataInput(srcZip)
: new SplitConsecutiveAccessDataInput(srcZip));
}

protected void setFileAttributes(Path path, ZipEntry zipEntry) throws IOException {
if (zipEntry.getExternalFileAttributes() != null)
zipEntry.getExternalFileAttributes().apply(path);
}

private static void setFileLastModifiedTime(Path path, ZipEntry zipEntry) throws IOException {
protected void setFileLastModifiedTime(Path path, ZipEntry zipEntry) throws IOException {
long lastModifiedTime = DosTimestampConverterUtils.dosToJavaTime(zipEntry.getLastModifiedTime());
Files.setLastModifiedTime(path, FileTime.fromMillis(lastModifiedTime));
}

protected static OutputStream getOutputStream(Path file) throws IOException {
protected OutputStream getOutputStream(Path file) throws IOException {
Path parent = file.getParent();

if (!Files.exists(parent))
Expand All @@ -184,12 +192,4 @@ protected static OutputStream getOutputStream(Path file) throws IOException {
return Files.newOutputStream(file);
}

// ---------- static ----------

public static ConsecutiveAccessDataInput createConsecutiveDataInput(SrcZip srcZip) throws IOException {
return srcZip.isSolid() ? new SolidConsecutiveAccessDataInput(srcZip)
: new SplitConsecutiveAccessDataInput(srcZip);

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public class SolidConsecutiveAccessDataInput extends BaseConsecutiveAccessDataIn
private final InputStream in;

public SolidConsecutiveAccessDataInput(SrcZip srcZip) throws IOException {
System.out.println(Thread.currentThread().getName());

byteOrder = srcZip.getByteOrder();
in = new BufferedInputStream(Files.newInputStream(srcZip.getDiskByNo(0).getPath()));
}
Expand Down
Loading

0 comments on commit 8853980

Please sign in to comment.