Skip to content

Commit

Permalink
#20 Extract entries async
Browse files Browse the repository at this point in the history
  • Loading branch information
oleg-cherednik authored Dec 30, 2024
1 parent 41d6959 commit 33be9ef
Show file tree
Hide file tree
Showing 15 changed files with 349 additions and 147 deletions.
1 change: 1 addition & 0 deletions misc/pmd/multithreading.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
<exclude name="AvoidSynchronizedAtMethodLevel"/>
<exclude name="UseConcurrentHashMap"/>
<exclude name="AvoidUsingVolatile"/>
<exclude name="DoNotUseThreads"/>
</rule>
</ruleset>
Original file line number Diff line number Diff line change
Expand Up @@ -128,26 +128,27 @@ private void copyPayload(Path dir, ZipEntry zipEntry, ZipEntryBlock.LocalFileHea

Block content = diagLocalFileHeader.getContent();
long size = zipEntry.getCompressedSize();
long offs = content.getDiskOffs() + content.getSize();
// TODO here we should use SrcZip methods
long absOffs = content.getDiskOffs() + content.getSize();

EncryptionMethod encryptionMethod = zipEntry.getEncryptionMethod();

if (encryptionMethod.isAes()) {
AesEncryptionHeaderBlock block = (AesEncryptionHeaderBlock) encryptionHeaderBlock;

offs += block.getSalt().getSize();
offs += block.getPasswordChecksum().getSize();
absOffs += block.getSalt().getSize();
absOffs += block.getPasswordChecksum().getSize();

size -= block.getSalt().getSize();
size -= block.getPasswordChecksum().getSize();
size -= block.getMac().getSize();
} else if (encryptionMethod == EncryptionMethod.PKWARE) {
PkwareEncryptionHeaderBlock block = (PkwareEncryptionHeaderBlock) encryptionHeaderBlock;
offs += block.getSize();
absOffs += block.getSize();
size -= block.getSize();
}

Utils.copyLarge(blockModel.getZipModel(), dir.resolve("payload" + EXT_DATA), offs, size);
Utils.copyLarge(blockModel.getZipModel(), dir.resolve("payload" + EXT_DATA), absOffs, absOffs, size);
}

private EncryptionHeaderDecompose encryptionHeader(EncryptionMethod encryptionMethod,
Expand Down
21 changes: 6 additions & 15 deletions src/main/java/ru/olegcherednik/zip4jvm/decompose/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import ru.olegcherednik.zip4jvm.model.ZipModel;
import ru.olegcherednik.zip4jvm.model.block.Block;
import ru.olegcherednik.zip4jvm.model.entry.ZipEntry;
import ru.olegcherednik.zip4jvm.utils.ValidationUtils;

import lombok.AccessLevel;
import lombok.NoArgsConstructor;
Expand Down Expand Up @@ -50,30 +49,22 @@ public static void print(Path file, Consumer<PrintStream> consumer) throws FileN
}

public static void copyLarge(ZipModel zipModel, Path out, Block block) throws IOException {
copyLarge(zipModel, out, block.getDiskOffs(), block.getSize());
copyLarge(zipModel, out, block.getDiskOffs(), block.getAbsOffs(), block.getSize());
}

public static void copyLarge(ZipModel zipModel, Path out, long offs, long size) throws IOException {
Path file = zipModel.getSrcZip().getDiskByAbsOffs(offs).getPath();
public static void copyLarge(ZipModel zipModel, Path out, long diskOffs, long absOffs, long size)
throws IOException {
Path file = zipModel.getSrcZip().getDiskByAbsOffs(absOffs).getPath();

try (InputStream fis = Files.newInputStream(file);
OutputStream fos = Files.newOutputStream(out)) {
long skipBytes = fis.skip(offs);
assert skipBytes == offs;
long skipBytes = fis.skip(diskOffs);
assert skipBytes == diskOffs;

IOUtils.copyLarge(fis, fos, 0, size);
}
}

public static void copyByteArray(Path out, byte[] buf, Block block) throws IOException {
ValidationUtils.requireLessOrEqual(block.getAbsOffs(), Integer.MAX_VALUE, "block.absoluteOffs");
ValidationUtils.requireLessOrEqual(block.getSize(), Integer.MAX_VALUE, "block.size");

try (OutputStream fos = Files.newOutputStream(out)) {
fos.write(buf, (int) block.getAbsOffs(), (int) block.getSize());
}
}

public static void copyByteArray(Path out, byte[] buf) throws IOException {
Files.write(out, buf);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package ru.olegcherednik.zip4jvm.engine.unzip;

import ru.olegcherednik.zip4jvm.io.in.DataInput;
import ru.olegcherednik.zip4jvm.io.in.file.consecutive.ConsecutiveAccessDataInput;

import lombok.RequiredArgsConstructor;
import org.apache.commons.io.IOUtils;

import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.function.Supplier;

/**
* This class is designed to use with custom {@link ExecutorService} only. It holds a list of all {@link DataInput}
* were create in a different threads. When method {@link #release()} is invoked, it closes all created
* {@link DataInput}, but it does not clear {@link #THREAD_LOCAL} for all threads. I.e. after invoking
* {@link #release()} and then invoking {@link #get()}, the given {@link DataInput} will not be {@literal null}, but it
* will be closed and not available to reuse.
*
* @author Oleg Cherednik
* @since 28.12.2024
*/
@RequiredArgsConstructor
public class ConsecutiveAccessDataInputHolder {

private static final ThreadLocal<ConsecutiveAccessDataInput> THREAD_LOCAL = new ThreadLocal<>();

private final List<ConsecutiveAccessDataInput> dataInputs = new CopyOnWriteArrayList<>();

private final Supplier<ConsecutiveAccessDataInput> dataInputSupplier;

public void release() {
// cannot clear all THREAD_LOCAL here
dataInputs.forEach(IOUtils::closeQuietly);
dataInputs.clear();
}

public ConsecutiveAccessDataInput get() {
ConsecutiveAccessDataInput in = THREAD_LOCAL.get();

if (in == null) {
in = dataInputSupplier.get();
THREAD_LOCAL.set(in);
dataInputs.add(in);
}

return in;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
import ru.olegcherednik.zip4jvm.model.settings.UnzipSettings;
import ru.olegcherednik.zip4jvm.model.src.SrcZip;
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -47,7 +47,17 @@ public final class UnzipEngine implements ZipFile.Reader {
public UnzipEngine(SrcZip srcZip, UnzipSettings settings) {
PasswordProvider passwordProvider = settings.getPasswordProvider();
zipModel = ZipModelBuilder.read(srcZip, settings.getCharsetCustomizer(), passwordProvider);
unzipExtractEngine = new UnzipExtractEngine(passwordProvider, zipModel);
unzipExtractEngine = createUnzipExtractEngine(settings, zipModel);
}

private static UnzipExtractEngine createUnzipExtractEngine(UnzipSettings settings, ZipModel zipModel) {
PasswordProvider passwordProvider = settings.getPasswordProvider();

if (settings.getAsyncThreads() == UnzipSettings.ASYNC_THREADS_OFF)
return new UnzipExtractEngine(passwordProvider, zipModel);

int totalThreads = settings.getAsyncThreads();
return new UnzipExtractAsyncEngine(passwordProvider, zipModel, totalThreads);
}

// ---------- ZipFile.Reader ----------
Expand Down Expand Up @@ -105,9 +115,9 @@ public ZipFile.Entry next() {
};
}

public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) throws IOException {
return srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
: new SplitRandomAccessDataInput(srcZip);
public static RandomAccessDataInput createRandomAccessDataInput(SrcZip srcZip) {
return Quietly.doRuntime(() -> srcZip.isSolid() ? new SolidRandomAccessDataInput(srcZip)
: new SplitRandomAccessDataInput(srcZip));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package ru.olegcherednik.zip4jvm.engine.unzip;

import ru.olegcherednik.zip4jvm.model.ZipModel;
import ru.olegcherednik.zip4jvm.model.entry.ZipEntry;
import ru.olegcherednik.zip4jvm.model.password.PasswordProvider;
import ru.olegcherednik.zip4jvm.utils.quitely.Quietly;
import ru.olegcherednik.zip4jvm.utils.quitely.functions.RunnableWithException;

import org.apache.commons.collections4.CollectionUtils;

import java.nio.file.Path;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinWorkerThread;
import java.util.concurrent.atomic.AtomicInteger;

/**
* @author Oleg Cherednik
* @since 28.12.2024
*/
public class UnzipExtractAsyncEngine extends UnzipExtractEngine {

protected final int totalThreads;

public UnzipExtractAsyncEngine(PasswordProvider passwordProvider, ZipModel zipModel, int totalThreads) {
super(passwordProvider, zipModel);
this.totalThreads = totalThreads <= 0 ? Runtime.getRuntime().availableProcessors() : totalThreads;
}

// ---------- UnzipExtractEngine ----------

@Override
protected void extractAllEntries(Path dstDir) {
List<CompletableFuture<Void>> tasks = new LinkedList<>();
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();

ConsecutiveAccessDataInputHolder dataInputHolder =
new ConsecutiveAccessDataInputHolder(this::createConsecutiveDataInput);
ExecutorService executor = createExecutor();

try {
while (it.hasNext()) {
ZipEntry zipEntry = it.next();
Path file = dstDir.resolve(zipEntry.getFileName());

CompletableFuture<Void> task = createCompletableFuture(
() -> extractEntry(file, zipEntry, dataInputHolder.get()), executor);

tasks.add(task);
}

tasks.forEach(CompletableFuture::join);
} finally {
executor.shutdown();
dataInputHolder.release();
}
}

@Override
protected void extractEntryByPrefix(Path dstDir, Set<String> prefixes) {
assert CollectionUtils.isNotEmpty(prefixes);

List<CompletableFuture<Void>> tasks = new LinkedList<>();
Iterator<ZipEntry> it = zipModel.absOffsAscIterator();

ConsecutiveAccessDataInputHolder dataInputHolder =
new ConsecutiveAccessDataInputHolder(this::createConsecutiveDataInput);
ExecutorService executor = createExecutor();

try {
while (it.hasNext()) {
ZipEntry zipEntry = it.next();
String fileName = getFileName(zipEntry, prefixes);

if (fileName != null) {
Path file = dstDir.resolve(fileName);
CompletableFuture<Void> task = createCompletableFuture(
() -> extractEntry(file, zipEntry, dataInputHolder.get()), executor);

tasks.add(task);
}
}

tasks.forEach(CompletableFuture::join);
} finally {
dataInputHolder.release();
executor.shutdown();
}
}

// ----------

protected ExecutorService createExecutor() {
AtomicInteger counter = new AtomicInteger();
String format = String.format("zip4jvm-extract-%%0%dd", String.valueOf(totalThreads).length());

ForkJoinPool.ForkJoinWorkerThreadFactory factory = pool -> {
ForkJoinWorkerThread thread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
thread.setName(String.format(format, counter.incrementAndGet()));
return thread;
};

return new ForkJoinPool(totalThreads, factory, null, false);
}

protected CompletableFuture<Void> createCompletableFuture(RunnableWithException task, Executor executor) {
return CompletableFuture.runAsync(() -> Quietly.doRuntime(task), executor);
}

}
Loading

0 comments on commit 33be9ef

Please sign in to comment.