Skip to content

Commit

Permalink
GH-72: Add url preview crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
SetZero committed Jul 4, 2023
1 parent d683896 commit d6aab2a
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 12 deletions.
2 changes: 2 additions & 0 deletions src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ directories = "5.0.1"
num-traits = "0.2"
brotli = "3.3.4"
webbrowser = "0.8.10"
reqwest = "0.11"
scraper = "0.17.1"

[dev-dependencies]
tempfile = "3.5.0"
Expand Down
13 changes: 13 additions & 0 deletions src-tauri/src/commands/helper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use tauri::regex::Regex;

pub(crate) fn extract_og_property(body: &str, pattern: &str) -> Result<String, String> {
let re = Regex::new(pattern).map_err(|e| format!("{e:?}"))?;
let property = re
.captures(body)
.and_then(|captures| captures.get(1))
.map(|m| m.as_str())
.map(String::from)
.ok_or("regex not found")?;

Ok(property)
}
40 changes: 40 additions & 0 deletions src-tauri/src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// clippy is detecting '_ as a underscore binding, which it shouldn't
#![allow(clippy::used_underscore_binding)]

mod helper;

use std::{
borrow::BorrowMut,
collections::HashMap,
Expand All @@ -17,11 +19,15 @@ use crate::{
},
};
use base64::{engine::general_purpose, Engine};
use reqwest::header;
use serde_json::json;
use tauri::State;
use tokio::sync::Mutex;
use tracing::{error, info, trace};
use webbrowser::{Browser, BrowserOptions};

use self::helper::extract_og_property;

pub struct ConnectionState {
pub connection: Mutex<Option<Connection>>,
pub window: Mutex<tauri::Window>,
Expand Down Expand Up @@ -330,3 +336,37 @@ pub fn open_browser(url: &str) -> Result<(), String> {

Ok(())
}

#[tauri::command]
pub async fn get_open_graph_data_from_website(url: &str) -> Result<String, String> {
let res = reqwest::get(url)
.await
.map_err(|_| "Failed to fetch website".to_string())?;
let body = res
.text()
.await
.map_err(|_| "Failed to read website body".to_string())?;

let title = extract_og_property(
&body,
r#"<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']"#,
)?;

let description = extract_og_property(
&body,
r#"<meta[^>]+property=["']og:description["'][^>]+content=["']([^"']+)["']"#,
)?;

let image = extract_og_property(
&body,
r#"<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']"#,
)?;

let result = json!({
"title": title,
"description": description,
"image": image,
});

Ok(result.to_string())
}
9 changes: 5 additions & 4 deletions src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ use tracing::Level;
use tracing_subscriber::fmt;

use crate::commands::{
change_user_state, connect_to_server, get_audio_devices, get_server_list, like_message, logout,
open_browser, save_server, send_message, set_user_image, unzip_data_from_utf8,
zip_data_to_utf8,
change_user_state, connect_to_server, get_audio_devices, get_open_graph_data_from_website,
get_server_list, like_message, logout, open_browser, save_server, send_message, set_user_image,
unzip_data_from_utf8, zip_data_to_utf8,
};

fn init_logging() {
Expand Down Expand Up @@ -70,7 +70,8 @@ fn main() {
get_audio_devices,
zip_data_to_utf8,
unzip_data_from_utf8,
open_browser
open_browser,
get_open_graph_data_from_website
])
.run(tauri::generate_context!())
.expect("error while running tauri application");
Expand Down
2 changes: 2 additions & 0 deletions src/components/ChatMessage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ const parseMessage = (message: string | undefined) => {
return messageParser;
}

console.log(message);

return message;
}
const parseUI = (message: string | undefined) => {
Expand Down
73 changes: 73 additions & 0 deletions src/components/UrlPreview.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { Box, Button, Card, CardActionArea, CardActions, CardContent, CardMedia, Link, Typography } from "@mui/material";
import "./styles/UserInfo.css";
import "./styles/common.css"
import { useEffect, useMemo, useState } from "react";
import { invoke } from "@tauri-apps/api";
import { s } from "@tauri-apps/api/app-373d24a3";

interface UrlPreviewProps {
href: string;
}


interface UrlPreviewData {
title: string,
description: string,
image: string
}

function UrlPreview(props: UrlPreviewProps) {

let [urlPreviewData, setUrlPreviewData] = useState<UrlPreviewData | undefined>(undefined);

useEffect(() => {
invoke<string>('get_open_graph_data_from_website', { url: props.href }).then((data) => {
setUrlPreviewData(JSON.parse(data));
});
}, []);

const createOpenGraphData = useMemo(() => {
console.log(urlPreviewData);
if (urlPreviewData) {
return (
<Card sx={{ maxWidth: 345 }}>
<CardActionArea onClick={() => window.open(props.href, '_blank')}>
<CardMedia
component="img"
height="140"
image={urlPreviewData.image}
alt={urlPreviewData.title}
/>
<CardContent>
<Typography gutterBottom variant="h5" component="div">
{urlPreviewData.title}
</Typography>
<Typography variant="body2" color="text.secondary">
{urlPreviewData.description}
</Typography>
</CardContent>
</CardActionArea>
<CardActions>
<Button size="small" color="primary" href={props.href} target="_blank" sx={{fontSize: 9, textTransform: 'lowercase'}}>
{props.href}
</Button>
</CardActions>
</Card>
)
} else {
return (
<Box>
<Link href={props.href}>{props.href}</Link>
</Box>
);
}
}, [urlPreviewData]);

return (
<Box>
{createOpenGraphData}
</Box>
);
}

export default UrlPreview;
22 changes: 20 additions & 2 deletions src/helper/DOMHelper.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
/* listen for the return message once the tweet has been loaded */
window.addEventListener("message", (event) => {
if (event.origin !== "https://twitframe.com") return;

const { element, height } = event.data;
element.style.height = `${height}px`;
});

export function createEmbeddedIFrame(url: string): Node {
const iframe = document.createElement('iframe');
iframe.src = url;
iframe.style.minWidth = '480px';
iframe.style.minHeight = '270px';
iframe.style.border = 'none';
iframe.style.background = 'transparent';
iframe.style.colorScheme = 'auto';
iframe.className = 'embedded-iframe';
iframe.allowFullscreen = true;
return iframe;
}

iframe.onload = (el) => {
console.log("iframe loaded", el);
//iframe.contentWindow?.postMessage({ element: el.target, query: "height" },
// "https://twitframe.com");
}


return iframe;
}
8 changes: 4 additions & 4 deletions src/helper/MessageParser.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class DOMMessageParser {
{ regex: /https:\/\/www\.twitch\.tv\/videos\/([0-9]+)/, replacement: 'https://player.twitch.tv/?video=$1&parent=' + (window.location.hostname), inline: true },
{ regex: /https:\/\/clips\.twitch\.tv\/([a-zA-Z0-9_-]+)/, replacement: 'https://clips.twitch.tv/embed?clip=$1&parent=' + (window.location.hostname), inline: true },
{ regex: /https:\/\/www\.twitch\.tv\/([a-zA-Z0-9_-]+)/, replacement: 'https://player.twitch.tv/?channel=$1&parent=' + (window.location.hostname), inline: true },
{ regex: /https:\/\/twitter\.com\/([a-zA-Z0-9_]+)\/status\/([0-9]+)/, replacement: 'https://twitframe.com/show?url=https://twitter.com/$1/status/$2', inline: true },
{ regex: /https:\/\/twitter\.com\/([a-zA-Z0-9_]+)/, replacement: 'https://twitframe.com/show?url=https://twitter.com/$1', inline: true },
{ regex: /https:\/\/twitter\.com\/([a-zA-Z0-9_]+)\/status\/([0-9]+)/, replacement: 'https://twitframe.com/show?url=https://twitter.com/$1/status/$2&theme=dark', inline: true },
{ regex: /https:\/\/twitter\.com\/([a-zA-Z0-9_]+)/, replacement: 'https://twitframe.com/show?url=https://twitter.com/$1&theme=dark', inline: true },
{ regex: /https:\/\/giphy.com\/gifs\/.*-([a-zA-Z0-9_-]+)/, replacement: 'https://giphy.com/embed/$1', inline: true },
];

Expand Down Expand Up @@ -76,7 +76,7 @@ class MessageParser {
}

parseLinks() {
const regex = /(?<!\S)((?:https?:\/\/)(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-zA-Z0-9()]{2,20}\b[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)/;
const regex = /(?<!\S)((?:https?:\/\/)(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-zA-Z0-9()]{2,20}\b[-a-zA-Z0-9()@:%_\+.,~#?&\/=]*)/;
this.input = this.input.replace(regex, '<a href="$1" target="_blank">$1</a>');

return this;
Expand All @@ -103,7 +103,7 @@ class MessageParser {
}

parseMarkdown() {
this.input = marked.parse(this.input);
this.input = marked.parseInline(this.input);
this.input = DOMPurify.sanitize(this.input);

return this;
Expand Down
5 changes: 5 additions & 0 deletions src/helper/MessageUIHelper.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Container } from '@mui/material';
import parse, { DOMNode, HTMLReactParserOptions, domToReact } from 'html-react-parser';
import LightBoxImage from '../components/LightBoxImage';
import UrlPreview from '../components/UrlPreview';

export default class MessageUIHelper {
private input: string;
Expand All @@ -17,6 +18,10 @@ export default class MessageUIHelper {
return (<Container >
<LightBoxImage src={attribs.src} />
</Container>);
case 'a':
return (<Container>
<UrlPreview href={attribs.href} />
</Container>)
}
},
};
Expand Down
1 change: 0 additions & 1 deletion ~/.tauri/fakeapp.key

This file was deleted.

1 change: 0 additions & 1 deletion ~/.tauri/fakeapp.key.pub

This file was deleted.

0 comments on commit d6aab2a

Please sign in to comment.