Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: (#3387) use collation to determine if string type is compatible #3400

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
535 changes: 449 additions & 86 deletions sqlx-mysql/src/collation.rs

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions sqlx-mysql/src/protocol/text/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::str::from_utf8;
use bitflags::bitflags;
use bytes::{Buf, Bytes};

use crate::collation::Collation;
use crate::error::Error;
use crate::io::Decode;
use crate::io::MySqlBufExt;
Expand Down Expand Up @@ -62,6 +63,10 @@ bitflags! {
}

// https://dev.mysql.com/doc/internals/en/com-query-response.html#column-type
// dead link ^
// https://dev.mysql.com/doc/dev/mysql-server/9.0.0/page_protocol_com_query_response_text_resultset_column_definition.html
// the "type of the column as defined in enum_field_types"
// https://dev.mysql.com/doc/dev/mysql-server/9.0.0/field__types_8h.html
Comment on lines 65 to +69
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feel free to suggest which of these links we should keep - the original link here was dead when i tried it.


#[derive(Debug, Copy, Clone, PartialEq)]
#[cfg_attr(feature = "offline", derive(serde::Serialize, serde::Deserialize))]
Expand Down Expand Up @@ -112,8 +117,7 @@ pub(crate) struct ColumnDefinition {
table: Bytes,
alias: Bytes,
name: Bytes,
#[allow(unused)]
pub(crate) collation: u16,
pub(crate) collation: Collation,
pub(crate) max_size: u32,
pub(crate) r#type: ColumnType,
pub(crate) flags: ColumnFlags,
Expand Down Expand Up @@ -156,7 +160,7 @@ impl Decode<'_, Capabilities> for ColumnDefinition {
table,
alias,
name,
collation,
collation: Collation::try_from(collation)?,
max_size,
r#type: ColumnType::try_from_u16(type_id)?,
flags: ColumnFlags::from_bits_truncate(flags),
Expand All @@ -166,8 +170,13 @@ impl Decode<'_, Capabilities> for ColumnDefinition {
}

impl ColumnType {
pub(crate) fn name(self, flags: ColumnFlags, max_size: Option<u32>) -> &'static str {
let is_binary = flags.contains(ColumnFlags::BINARY);
pub(crate) fn name(
self,
collation: Collation,
flags: ColumnFlags,
max_size: Option<u32>,
) -> &'static str {
let is_binary = collation == Collation::binary;
let is_unsigned = flags.contains(ColumnFlags::UNSIGNED);
let is_enum = flags.contains(ColumnFlags::ENUM);

Expand Down
11 changes: 9 additions & 2 deletions sqlx-mysql/src/type_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ use std::fmt::{self, Display, Formatter};

pub(crate) use sqlx_core::type_info::*;

use crate::protocol::text::{ColumnDefinition, ColumnFlags, ColumnType};
use crate::{
collation::Collation,
protocol::text::{ColumnDefinition, ColumnFlags, ColumnType},
};

/// Type information for a MySql type.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "offline", derive(serde::Serialize, serde::Deserialize))]
pub struct MySqlTypeInfo {
pub(crate) r#type: ColumnType,
pub(crate) flags: ColumnFlags,
pub(crate) collation: Collation,

// [max_size] for integer types, this is (M) in BIT(M) or TINYINT(M)
#[cfg_attr(feature = "offline", serde(default))]
Expand All @@ -20,6 +24,7 @@ impl MySqlTypeInfo {
pub(crate) const fn binary(ty: ColumnType) -> Self {
Self {
r#type: ty,
collation: Collation::binary,
flags: ColumnFlags::BINARY,
max_size: None,
}
Expand All @@ -38,6 +43,7 @@ impl MySqlTypeInfo {
Self {
r#type: ColumnType::String,
flags: ColumnFlags::ENUM,
collation: Collation::binary,
max_size: None,
}
}
Expand All @@ -60,6 +66,7 @@ impl MySqlTypeInfo {
Self {
r#type: column.r#type,
flags: column.flags,
collation: column.collation,
max_size: Some(column.max_size),
}
}
Expand All @@ -77,7 +84,7 @@ impl TypeInfo for MySqlTypeInfo {
}

fn name(&self) -> &str {
self.r#type.name(self.flags, self.max_size)
self.r#type.name(self.collation, self.flags, self.max_size)
}
}

Expand Down
2 changes: 2 additions & 0 deletions sqlx-mysql/src/types/bool.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -12,6 +13,7 @@ impl Type<MySql> for bool {
// MySQL has no actual `BOOLEAN` type, the type is an alias of `TINYINT(1)`
MySqlTypeInfo {
flags: ColumnFlags::BINARY | ColumnFlags::UNSIGNED,
collation: Collation::binary,
max_size: Some(1),
r#type: ColumnType::Tiny,
}
Expand Down
4 changes: 3 additions & 1 deletion sqlx-mysql/src/types/str.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -12,6 +13,7 @@ impl Type<MySql> for str {
MySqlTypeInfo {
r#type: ColumnType::VarString, // VARCHAR
flags: ColumnFlags::empty(),
collation: Collation::utf8mb3_bin,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DrewMcArthur

I think using utf8mb3_bin regardless of session state is misleading.
In my opinion, the only difference needed in this context is whether it is binary or string.
How about using an enum like this one?

CollationType {
    Binary,
    String
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was one of the pieces I wasn't sure about, what the default should be for some of these. I think you're right, _bin probably isn't right & I'll update this to utf8mb3_general_ci if you think that'd be correct.

As for changing the type of MySqlTypeInfo's collation field to an enum, I think the source of the issue on #3387 is that there are three cases being compressed into the ColumnFlag::binary flag:

  • a VARCHAR has a collation like utf8_general_ci, and the ColumnFlag::binary is false, so this is compatible with a Rust String
  • a BLOB has a collation of binary, ColumnFlag::binary is true, so it's not compatible with a Rust String, which is also correct
  • a VARCHAR has a collation of *_bin, so ColumnFlag::binary is true, so it's marked incompatible when we should be able to decode this into a String. This is the case that needs to be fixed.

This could be handled with that enum, if the value is String for both VARCHAR cases, but I feel like that could also be confusing to have a CollationType::String (as opposed to binary) while simultaneously having ColumnFlag::binary = true.

I think my preference is to keep track of each unique collation here, since it's only a byte, but I'll defer to you and @abonander to decide what makes more sense.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
collation: Collation::utf8mb3_bin,
collation: Collation::utf8mb3_general_ci,

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^ @alu (sorry, I'm terrible at remembering to directly @ people in replies here)

max_size: None,
}
}
Expand All @@ -28,7 +30,7 @@ impl Type<MySql> for str {
| ColumnType::String
| ColumnType::VarString
| ColumnType::Enum
) && !ty.flags.contains(ColumnFlags::BINARY)
) && ty.collation != Collation::binary
}
}

Expand Down
2 changes: 2 additions & 0 deletions sqlx-mysql/src/types/uint.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -10,6 +11,7 @@ fn uint_type_info(ty: ColumnType) -> MySqlTypeInfo {
MySqlTypeInfo {
r#type: ty,
flags: ColumnFlags::BINARY | ColumnFlags::UNSIGNED,
collation: Collation::binary,
max_size: None,
}
}
Expand Down
72 changes: 72 additions & 0 deletions tests/mysql/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,75 @@ CREATE TEMPORARY TABLE user_login (

Ok(())
}

mod string_collation_tests {
use super::*;

#[sqlx_macros::test]
async fn test_binary_string_collation() -> anyhow::Result<()> {
let mut conn = new::<MySql>().await?;

conn.execute(
r#"
CREATE TEMPORARY TABLE strings (
id INT PRIMARY KEY AUTO_INCREMENT,
char_bin CHAR(50) COLLATE utf8mb4_bin,
varchar_bin VARCHAR(255) COLLATE utf8mb4_bin,
text_bin TEXT COLLATE utf8mb4_bin
);
"#,
)
.await?;

// Insert sample data
conn.execute(
r#"
INSERT INTO strings (char_bin, varchar_bin, text_bin) VALUES
('char_binary', 'varchar_binary', 'text_binary');
"#,
)
.await?;

// Query the data back
let row: (String, String, String) = sqlx::query_as(
r#"
SELECT char_bin, varchar_bin, text_bin FROM strings
"#,
)
.fetch_one(&mut conn)
.await?;

// Assert the values
assert_eq!(row.0, "char_binary");
assert_eq!(row.1, "varchar_binary");
assert_eq!(row.2, "text_binary");

Ok(())
}

#[sqlx_macros::test]
async fn test_blob_to_string_err() -> anyhow::Result<()> {
let mut conn = new::<MySql>().await?;
conn.execute(
r#"
CREATE TEMPORARY TABLE blobs (
id INT PRIMARY KEY AUTO_INCREMENT,
myblob BLOB
);
"#,
)
.await?;

// Insert sample data
conn.execute("INSERT INTO blobs (myblob) VALUES ('0x243524598');")
.await?;

let res: Result<(i32, String), sqlx::Error> =
sqlx::query_as("SELECT id, myblob FROM blobs")
.fetch_one(&mut conn)
.await;

assert!(res.is_err());
Ok(())
}
}