Skip to content

Commit

Permalink
Spanify work 1 (#812)
Browse files Browse the repository at this point in the history
* Add GetString(ReadOnlySpan<byte>) polyfill

* Add ArrayPoolBufferWriter

* Use Utf8.IsValid & char.IsAsciiHexDigit on NET8.0+

* Optimize HexTokenizer

* Eliminate various Tuple allocations

* Eliminate List allocation in CrossReferenceTable

* Eliminate various allocations in Ascii85Filter

* Spanify HexToken

* Spanify Palette

* Spanify various Cmap & font methods

* Spanify Type1Charstring classes

* Spanify PdfDocEncoding.TryConvertBytesToString

* Spanify OctalHelpers.FromOctalDigits

* Add missing braces

* React to HexToken.Byte type changes

* Cleanup

* [Tests] React to span changes

* Add ArgumentNullException check back to Type1CharstringDecryptedBytes

* Remove unsafe code

* Seal HexToken

* Avoid allocation when passing an empty span
  • Loading branch information
iamcarbon authored Apr 1, 2024
1 parent e789691 commit f62929e
Show file tree
Hide file tree
Showing 41 changed files with 429 additions and 266 deletions.
148 changes: 148 additions & 0 deletions src/UglyToad.PdfPig.Core/ArrayPoolBufferWriter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
using System;
using System.Buffers;

namespace UglyToad.PdfPig.Core;

/// <summary>
/// Pooled Buffer Writer
/// </summary>
public sealed class ArrayPoolBufferWriter<T> : IBufferWriter<T>, IDisposable
{
private const int DefaultBufferSize = 256;

private T[] buffer;
private int position;

/// <summary>
/// PooledBufferWriter constructor
/// </summary>
public ArrayPoolBufferWriter()
{
buffer = ArrayPool<T>.Shared.Rent(DefaultBufferSize);
position = 0;
}

/// <summary>
/// Constructs a PooledBufferWriter
/// </summary>
/// <param name="size">The size of the initial buffer</param>
public ArrayPoolBufferWriter(int size)
{
buffer = ArrayPool<T>.Shared.Rent(size);
position = 0;
}

/// <summary>
/// Advanced the current position
/// </summary>
/// <param name="count"></param>
public void Advance(int count)
{
position += count;
}

/// <summary>
/// Writes the provided value
/// </summary>
public void Write(T value)
{
GetSpan(1)[0] = value;

position += 1;
}

/// <summary>
/// Writes the provided values
/// </summary>
/// <param name="values"></param>
public void Write(ReadOnlySpan<T> values)
{
values.CopyTo(GetSpan(values.Length));

position += values.Length;
}

/// <summary>
/// Returns a writeable block of memory that can be written to
/// </summary>
public Memory<T> GetMemory(int sizeHint = 0)
{
EnsureCapacity(sizeHint);

return buffer.AsMemory(position);
}

/// <summary>
/// Returns a span that can be written to
/// </summary>
public Span<T> GetSpan(int sizeHint = 0)
{
EnsureCapacity(sizeHint);

return buffer.AsSpan(position);
}

/// <summary>
/// Returns the number of bytes written to the buffer
/// </summary>
public int WrittenCount => position;

/// <summary>
/// Returns the committed data as Memory
/// </summary>
public ReadOnlyMemory<T> WrittenMemory => buffer.AsMemory(0, position);

/// <summary>
/// Returns the committed data as a Span
/// </summary>
public ReadOnlySpan<T> WrittenSpan => buffer.AsSpan(0, position);

private void EnsureCapacity(int sizeHint)
{
if (sizeHint is 0)
{
sizeHint = 1;
}

if (sizeHint > RemainingBytes)
{
var newBuffer = ArrayPool<T>.Shared.Rent(Math.Max(position + sizeHint, 512));

if (buffer.Length != 0)
{
Array.Copy(buffer, 0, newBuffer, 0, position);
ArrayPool<T>.Shared.Return(buffer);
}

buffer = newBuffer;
}
}

private int RemainingBytes => buffer.Length - position;

/// <summary>
/// Resets the internal state so the instance can be reused before disposal
/// </summary>
/// <param name="clearArray"></param>
public void Reset(bool clearArray = false)
{
position = 0;

if (clearArray)
{
buffer.AsSpan().Clear();
}
}

/// <summary>
/// Disposes the buffer and returns any rented memory to the pool
/// </summary>
public void Dispose()
{
if (buffer.Length != 0)
{
ArrayPool<T>.Shared.Return(buffer);
buffer = [];
}
}
}
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Core/OctalHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static short CharacterToShort(this char c)
/// <summary>
/// Read an integer from octal digits.
/// </summary>
public static int FromOctalDigits(short[] octal)
public static int FromOctalDigits(ReadOnlySpan<short> octal)
{
int sum = 0;
for (int i = octal.Length - 1; i >= 0; i--)
Expand Down
28 changes: 2 additions & 26 deletions src/UglyToad.PdfPig.Core/OtherEncodings.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Core
{
using System.Collections.Generic;
using System.Linq;
using System;
using System.Text;

/// <summary>
Expand Down Expand Up @@ -30,31 +29,8 @@ public static byte[] StringAsLatin1Bytes(string s)
/// <summary>
/// Convert the bytes to string using the ISO 8859-1 encoding.
/// </summary>
public static string BytesAsLatin1String(IReadOnlyList<byte> bytes)
public static string BytesAsLatin1String(ReadOnlySpan<byte> bytes)
{
if (bytes == null)
{
return null;
}

if (bytes is byte[] arr)
{
return BytesAsLatin1String(arr);
}

return BytesAsLatin1String(bytes.ToArray());
}

/// <summary>
/// Convert the bytes to string using the ISO 8859-1 encoding.
/// </summary>
public static string BytesAsLatin1String(byte[] bytes)
{
if (bytes == null)
{
return null;
}

return Iso88591.GetString(bytes);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/UglyToad.PdfPig.Core/PdfDocEncoding.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Core
{
using System;
using System.Collections.Generic;

/// <summary>
Expand Down Expand Up @@ -263,7 +264,7 @@ static PdfDocEncoding()
/// Try to convert raw bytes to a PdfDocEncoding encoded string. If unsupported characters are encountered
/// meaning we cannot safely round-trip the value to bytes this will instead return false.
/// </summary>
public static bool TryConvertBytesToString(byte[] bytes, out string result)
public static bool TryConvertBytesToString(ReadOnlySpan<byte> bytes, out string result)
{
result = null;
if (bytes.Length == 0)
Expand Down
19 changes: 19 additions & 0 deletions src/UglyToad.PdfPig.Core/Polyfills/EncodingExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#if NETFRAMEWORK || NETSTANDARD2_0

namespace System.Text;

internal static class EncodingExtensions
{
public static string GetString(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
if (bytes.IsEmpty)
{
return string.Empty;
}

// NOTE: this can be made allocation free by introducing unsafe
return encoding.GetString(bytes.ToArray());
}
}

#endif
18 changes: 15 additions & 3 deletions src/UglyToad.PdfPig.Core/ReadHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
using System.Globalization;
using System.Text;

#if NET8_0_OR_GREATER
using System.Text.Unicode;
#endif

/// <summary>
/// Helper methods for reading from PDF files.
/// </summary>
Expand All @@ -20,8 +24,8 @@ public static class ReadHelper
/// </summary>
public const byte AsciiCarriageReturn = 13;

private static readonly HashSet<int> EndOfNameCharacters = new HashSet<int>
{
private static readonly HashSet<int> EndOfNameCharacters =
[
' ',
AsciiCarriageReturn,
AsciiLineFeed,
Expand All @@ -35,7 +39,7 @@ public static class ReadHelper
'(',
0,
'\f'
};
];

private static readonly int MaximumNumberStringLength = long.MaxValue.ToString("D").Length;

Expand Down Expand Up @@ -269,14 +273,21 @@ public static bool IsSpace(int c)
/// </summary>
public static bool IsHex(char ch)
{
#if NET8_0_OR_GREATER
return char.IsAsciiHexDigit(ch);
#else
return char.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
#endif
}

/// <summary>
/// Whether the given input bytes are valid UTF8.
/// </summary>
public static bool IsValidUtf8(byte[] input)
{
#if NET8_0_OR_GREATER
return Utf8.IsValid(input);
#else
try
{
var d = Encoding.UTF8.GetDecoder();
Expand All @@ -290,6 +301,7 @@ public static bool IsValidUtf8(byte[] input)
{
return false;
}
#endif
}

private static StringBuilder ReadStringNumber(IInputBytes reader)
Expand Down
7 changes: 5 additions & 2 deletions src/UglyToad.PdfPig.Core/UglyToad.PdfPig.Core.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<LangVersion>12</LangVersion>
Expand All @@ -17,7 +17,10 @@
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net471'">
<PackageReference Include="Microsoft.Bcl.HashCode" Version="1.1.1" />
</ItemGroup>
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' or '$(TargetFramework)'=='net471'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
<ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ public static class TextEdgesExtractor
/// <summary>
/// Functions used to define left, middle and right edges.
/// </summary>
private static readonly Tuple<EdgeType, Func<PdfRectangle, double>>[] edgesFuncs = new Tuple<EdgeType, Func<PdfRectangle, double>>[]
{
private static readonly Tuple<EdgeType, Func<PdfRectangle, double>>[] edgesFuncs =
[
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Left, x => Math.Round(x.Left, 0)), // use BoundingBox's left coordinate
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Mid, x => Math.Round(x.Left + x.Width / 2, 0)), // use BoundingBox's mid coordinate
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Right, x => Math.Round(x.Right, 0)) // use BoundingBox's right coordinate
};
];

/// <summary>
/// Get the text edges.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
{
using System;
using System.Collections.Generic;
using Commands;
using Commands.Arithmetic;
using Commands.Hint;
using Commands.PathConstruction;
using Commands.StartFinishOutline;
using Core;
using System;
using System.Collections.Generic;

/// <summary>
/// Decodes a set of CharStrings to their corresponding Type 1 BuildChar operations.
Expand Down Expand Up @@ -73,11 +73,11 @@ public static Type1CharStrings Parse(IReadOnlyList<Type1CharstringDecryptedBytes
return new Type1CharStrings(charStringResults, charStringIndexToName, subroutineResults);
}

private static IReadOnlyList<Union<double, LazyType1Command>> ParseSingle(IReadOnlyList<byte> charStringBytes)
private static IReadOnlyList<Union<double, LazyType1Command>> ParseSingle(ReadOnlySpan<byte> charStringBytes)
{
var interpreted = new List<Union<double, LazyType1Command>>();

for (var i = 0; i < charStringBytes.Count; i++)
for (var i = 0; i < charStringBytes.Length; i++)
{
var b = charStringBytes[i];

Expand All @@ -104,7 +104,7 @@ private static IReadOnlyList<Union<double, LazyType1Command>> ParseSingle(IReadO
return interpreted;
}

private static int InterpretNumber(byte b, IReadOnlyList<byte> bytes, ref int i)
private static int InterpretNumber(byte b, ReadOnlySpan<byte> bytes, ref int i)
{
if (b >= 32 && b <= 246)
{
Expand All @@ -128,7 +128,7 @@ private static int InterpretNumber(byte b, IReadOnlyList<byte> bytes, ref int i)
return result;
}

public static LazyType1Command GetCommand(byte v, IReadOnlyList<byte> bytes, ref int i)
public static LazyType1Command GetCommand(byte v, ReadOnlySpan<byte> bytes, ref int i)
{
switch (v)
{
Expand Down
Loading

0 comments on commit f62929e

Please sign in to comment.