From 99d2b4a429d1be3f7b1d28dd3b30f0eef6f5dfb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 13:25:16 +0200 Subject: [PATCH 1/7] add dagster deployment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .../workflows/github-actions-data-stack.yml | 11 ++ .gitignore | 3 + analytics/dagster/.nux/nux.yaml | 1 + analytics/dagster/Dockerfile | 21 ++++ analytics/dagster/dagster.yaml | 15 +++ .../dagster/data/most_frequent_words.json | 1 + analytics/dagster/data/topstories.csv | 101 ++++++++++++++++++ analytics/dagster/data/topstory_ids.json | 1 + analytics/dagster/requirements.txt | 3 + analytics/dagster/src/__init__.py | 1 + .../src/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 174 bytes .../__pycache__/definitions.cpython-310.pyc | Bin 0 -> 467 bytes analytics/dagster/src/assets/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 141 bytes .../__pycache__/hackernews.cpython-310.pyc | Bin 0 -> 3443 bytes analytics/dagster/src/assets/hackernews.py | 96 +++++++++++++++++ analytics/dagster/src/definitions.py | 16 +++ analytics/dagster/workspace.yaml | 2 + analytics/docker-compose.yml | 27 +++++ 19 files changed, 299 insertions(+) create mode 100644 .github/workflows/github-actions-data-stack.yml create mode 100644 analytics/dagster/.nux/nux.yaml create mode 100644 analytics/dagster/Dockerfile create mode 100644 analytics/dagster/dagster.yaml create mode 100644 analytics/dagster/data/most_frequent_words.json create mode 100644 analytics/dagster/data/topstories.csv create mode 100644 analytics/dagster/data/topstory_ids.json create mode 100644 analytics/dagster/requirements.txt create mode 100644 analytics/dagster/src/__init__.py create mode 100644 analytics/dagster/src/__pycache__/__init__.cpython-310.pyc create mode 100644 analytics/dagster/src/__pycache__/definitions.cpython-310.pyc create mode 100644 analytics/dagster/src/assets/__init__.py create mode 100644 analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc create mode 100644 analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc create mode 100644 analytics/dagster/src/assets/hackernews.py create mode 100644 analytics/dagster/src/definitions.py create mode 100644 analytics/dagster/workspace.yaml create mode 100644 analytics/docker-compose.yml diff --git a/.github/workflows/github-actions-data-stack.yml b/.github/workflows/github-actions-data-stack.yml new file mode 100644 index 000000000..d35514fb1 --- /dev/null +++ b/.github/workflows/github-actions-data-stack.yml @@ -0,0 +1,11 @@ +name: Data Stack CI + +on: [push] + +jobs: + deploy-dagster: + uses: ./.github/workflows/deploy.yml + with: + app: dagster + branch: main + secrets: inherit diff --git a/.gitignore b/.gitignore index 1db1155bd..d3911e790 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ yarn-error.log* /talisman_report .clever.json + +# Data Stack +analytics/dagster/storage/ diff --git a/analytics/dagster/.nux/nux.yaml b/analytics/dagster/.nux/nux.yaml new file mode 100644 index 000000000..1ba1db1b2 --- /dev/null +++ b/analytics/dagster/.nux/nux.yaml @@ -0,0 +1 @@ +seen: 1 diff --git a/analytics/dagster/Dockerfile b/analytics/dagster/Dockerfile new file mode 100644 index 000000000..f68ce7418 --- /dev/null +++ b/analytics/dagster/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim + +RUN pip install dagster-webserver dagster-postgres dagster-aws + +# Install dependencies +COPY requirements.txt . +RUN pip install -r requirements.txt + +ENV DAGSTER_HOME=/opt/dagster/dagster_home/ + +RUN mkdir -p $DAGSTER_HOME + +COPY dagster.yaml workspace.yaml $DAGSTER_HOME + +COPY src/ $DAGSTER_HOME + +WORKDIR $DAGSTER_HOME + +EXPOSE 3000 + +ENTRYPOINT ["dagster-webserver", "-h", "0.0.0.0", "-p", "3000"] diff --git a/analytics/dagster/dagster.yaml b/analytics/dagster/dagster.yaml new file mode 100644 index 000000000..fedb16205 --- /dev/null +++ b/analytics/dagster/dagster.yaml @@ -0,0 +1,15 @@ +storage: + postgres: + postgres_db: + username: + env: DAGSTER_PG_USERNAME + password: + env: DAGSTER_PG_PASSWORD + hostname: + env: DAGSTER_PG_HOST + db_name: + env: DAGSTER_PG_DB + port: 5432 + +telemetry: + enabled: false \ No newline at end of file diff --git a/analytics/dagster/data/most_frequent_words.json b/analytics/dagster/data/most_frequent_words.json new file mode 100644 index 000000000..1fae8bcd2 --- /dev/null +++ b/analytics/dagster/data/most_frequent_words.json @@ -0,0 +1 @@ +{"\u2013": 9, "new": 6, "hn": 5, "why": 5, "ai": 4, "show": 4, "from": 4, "macos": 3, "language": 3, "server": 3, "at": 3, "video": 3, "may": 3, "be": 3, "using": 3, "pdf": 3, "vision": 2, "sequoia": 2, "virtual": 2, "arm": 2, "exploring": 2, "apple's": 2, "models": 2, "just": 2, "fast": 2} \ No newline at end of file diff --git a/analytics/dagster/data/topstories.csv b/analytics/dagster/data/topstories.csv new file mode 100644 index 000000000..5ed79db95 --- /dev/null +++ b/analytics/dagster/data/topstories.csv @@ -0,0 +1,101 @@ +,by,descendants,id,kids,score,time,title,type,url,text +0,RafelMri,53.0,40643207,"[40644828, 40644869, 40643824, 40644023, 40644285, 40644054, 40644785, 40643908, 40644404, 40644751, 40644237, 40644218, 40643964, 40643821, 40644052, 40644577, 40643808, 40644093, 40644132, 40644401, 40643772, 40643855, 40644181, 40644433, 40644157]",110,1718087804,POV-Ray – The Persistence of Vision Raytracer,story,http://www.povray.org/, +1,throwaway-blaze,10.0,40643181,"[40644333, 40643182, 40643552, 40644698, 40643829]",85,1718087509,macOS Sequoia to Allow iCloud Logins in Virtual Machines on ARM Macs,story,https://developer.apple.com/documentation/virtualization/using_icloud_with_macos_virtual_machines?language=objc, +2,crowdhailer,34.0,40643167,"[40644331, 40644427, 40644849, 40644435, 40644350, 40644494, 40643997, 40644572]",87,1718087389,"Exploring Gleam, a type-safe language on the BEAM",story,https://christopher.engineering/en/blog/gleam-overview/, +3,2bit,359.0,40639506,"[40641415, 40640114, 40640837, 40640364, 40640302, 40644140, 40644666, 40642536, 40642453, 40640241, 40640131, 40643955, 40640261, 40639998, 40640011, 40643632, 40642527, 40642607, 40640155, 40642413, 40640123, 40642070, 40641060, 40642211, 40640868, 40640907, 40640008, 40639882, 40639995, 40640421, 40639955, 40643131, 40642757, 40641541]",708,1718055731,Apple's On-Device and Server Foundation Models,story,https://machinelearning.apple.com/research/introducing-apple-foundation-models, +4,ulrischa,6.0,40642871,"[40644867, 40644862, 40644521, 40643882, 40644105, 40643945]",67,1718084967,"NanoGPT: The simplest, fastest repository for training medium-sized GPTs",story,https://github.com/karpathy/nanoGPT, +5,calpaterson,21.0,40642476,"[40644318, 40644069, 40643994, 40644710, 40644185]",65,1718081515,DuckDB Isn't Just Fast,story,https://csvbase.com/blog/6, +6,skilled,5.0,40630952,"[40644871, 40644868, 40644860, 40630960]",24,1718002727,OpenWorm – creating a virtual organism in a computer,story,https://openworm.org/, +7,hggh,8.0,40631796,"[40644791, 40643619, 40644764, 40643440, 40643660, 40644673]",66,1718012865,Free Quality SoundFonts (Sf2),story,https://sites.google.com/site/soundfonts4u/, +8,serhack_,235.0,40639606,"[40642804, 40644008, 40640713, 40642152, 40642435, 40640489, 40641439, 40640535, 40640843, 40644592, 40644593, 40640988, 40643483, 40644027, 40641137, 40643295, 40641113, 40642905, 40640001, 40641043, 40641799, 40643533, 40642002, 40644364, 40640959, 40641965, 40640993, 40641564, 40642434, 40642455, 40640096, 40641546, 40640943, 40641359, 40640717, 40640351, 40641790, 40641232, 40642907, 40640823, 40643485, 40640528, 40641907, 40640704, 40641197, 40640240]",463,1718056387,Private Cloud Compute: A new frontier for AI privacy in the cloud,story,https://security.apple.com/blog/private-cloud-compute/, +9,craydandy,93.0,40623864,"[40638690, 40639279, 40639063, 40644599, 40638665, 40639961, 40643096, 40642193, 40642132, 40639073, 40638207, 40639521, 40643858, 40640207, 40641936, 40637550, 40640771, 40642358, 40642552, 40640141, 40643049, 40642367, 40643704, 40641620, 40640304, 40639457, 40639372, 40638602, 40639684, 40644254, 40639496, 40640169, 40639145, 40639572, 40640022, 40637833, 40638418, 40641759]",492,1717934539,I built an ROV to solve missing person cases,story,https://suanto.com/2024/06/06/the-time-I-built-an-ROV-01/, +10,ingve,34.0,40639628,"[40643006, 40642448, 40642573, 40644556, 40643662, 40641861, 40641849, 40644696, 40641783, 40642048, 40643464, 40643407, 40641940]",128,1718056513,"Engage your audience: get to the point, use story structure, force specificity",story,https://iandanielstewart.com/2024/06/09/engage-your-audience-by-getting-to-the-point-using-story-structure-and-forcing-specificity/, +11,Sephr,34.0,40639450,"[40643121, 40641976, 40643737, 40642087, 40643963, 40643579, 40642391, 40639455]",84,1718055438,Big Tech's role in enabling link fraud – take 2,story,https://eligrey.com/blog/link-fraud/,"I posted an earlier draft of this article to Hacker News 4 months ago which was well received [1], but it didn't garner much discussion around the core issue.

Dialog around link fraud complacency was likely sidetracked due to a lack of real-world examples to better illustrate the technical problem at hand. To better illustrate the issue, I've added an examples section citing over 20 cases of link fraud on Google Search, Bing, and X.

I hope that this thread can spark a meaningful dialog around law and security as it pertains to Big Tech's role in enabling link fraud. Society can overcome this issue through concerted efforts to raise awareness and enforce existing legislation.

1. https://news.ycombinator.com/item?id=39003929" +12,pantalaimon,1.0,40644323,[40644711],7,1718099031,Exploring TrustZone-M on the NRF9160,story,https://lenas-fieldnotes.de/minimal-tz/, +13,igpay,88.0,40635397,"[40636495, 40644517, 40637466, 40640101, 40639853, 40636066, 40635895, 40636504, 40637214, 40640921, 40636049, 40636394, 40643155, 40641686, 40639473, 40641182, 40640116, 40638177, 40640345, 40639583, 40635916, 40636849, 40636238, 40641700, 40639382, 40637787, 40636255, 40639414, 40638047, 40638078, 40639491, 40639676, 40635781, 40639083, 40636337, 40641581, 40639255, 40642734, 40636567, 40636569, 40636082]",258,1718037605,Show HN: Probabilistic Tic-Tac-Toe,story,https://www.csun.io/2024/06/08/probabilistic-tic-tac-toe.html, +14,drx,416.0,40636292,"[40638134, 40637883, 40637666, 40644842, 40636823, 40636860, 40636579, 40636905, 40644320, 40637926, 40636666, 40637135, 40637867, 40637479, 40637063, 40643303, 40639957, 40642783, 40637855, 40636781, 40644002, 40636726, 40636994, 40636891, 40637965, 40636519, 40636851, 40636869, 40642244, 40639549, 40638628, 40639176, 40636412, 40638501, 40636901, 40637332, 40636895, 40637127, 40638405, 40636416, 40637628, 40638247, 40637029, 40639615, 40637764, 40643982, 40639966, 40636868, 40638858, 40636762, 40637798, 40638860, 40636769, 40638243, 40637682, 40639310, 40637752, 40637531, 40636426, 40636937, 40636429, 40636643, 40642227, 40636978, 40637198]",186,1718042650,Apple unveils 'Passwords' manager app at WWDC 2024,story,https://www.zdnet.com/article/forget-lastpass-apple-unveils-passwords-manager-app-at-wwdc-2024/, +15,MaxLeiter,103.0,40640927,"[40641762, 40644830, 40642780, 40641971, 40642635, 40641815, 40644278, 40644783, 40641869, 40643724, 40643786, 40641775, 40643740, 40643719, 40642084, 40644675, 40641874, 40642222, 40642991, 40641938, 40641993, 40644735, 40644112, 40642839, 40642040, 40642925, 40641744, 40641391, 40642425, 40642158, 40642085, 40642077]",151,1718065705,Ship Something Every Day,story,https://maxleiter.com/blog/ship-every-day, +16,rbanffy,0.0,40632064,,14,1718015793,A Short History of CP/M-86 – By Bradford Morgan White,story,https://www.abortretry.fail/p/a-short-history-of-cpm-86, +17,wglb,20.0,40640833,"[40641073, 40642216, 40643147, 40642953]",79,1718064900,Siberia's 'mammoth graveyard' reveals 800-year human interactions with mammoths,story,https://phys.org/news/2024-06-siberia-mammoth-graveyard-reveals-year.html, +18,xorvoid,18.0,40633003,"[40641284, 40644160, 40644576, 40644752, 40642137, 40643267, 40643153, 40643550, 40642234]",104,1718023668,Forsp: A Forth+Lisp Hybrid Lambda Calculus Language,story,https://xorvoid.com/forsp.html, +19,zdw,53.0,40643071,"[40644062, 40644110, 40643846, 40644102, 40643826, 40643962, 40643954, 40644447, 40644170, 40643728, 40643953, 40644353, 40643918, 40644715]",74,1718086696,Spotify: Droppin' Some Fake Beats,story,https://lcamtuf.substack.com/p/spotify-droppin-some-fake-beats, +20,gradus_ad,20.0,40638764,"[40640605, 40643008, 40640411, 40642101, 40641220, 40642614]",122,1718052124,The new math of how large-scale order emerges,story,https://www.quantamagazine.org/the-new-math-of-how-large-scale-order-emerges-20240610/, +21,nextworddev,11.0,40641848,"[40644853, 40644786, 40644591, 40644679, 40644470]",26,1718075359,Apple's AI Strategy in a Nutshell,story,https://nextword.substack.com/p/apples-ai-strategy-in-a-nutshell, +22,terramex,1069.0,40636844,"[40637094, 40639568, 40637108, 40637125, 40637043, 40637067, 40637267, 40636560, 40638786, 40638242, 40637013, 40638509, 40637903, 40636706, 40636682, 40644337, 40636970, 40636775, 40636631, 40637554, 40644013, 40639132, 40637159, 40636918, 40637450, 40637150, 40636916, 40639332, 40644161, 40637088, 40636974, 40637059, 40636990, 40637041, 40636626, 40636928, 40639726, 40639010, 40637286, 40637103, 40637481, 40637201, 40636744, 40643518, 40638257, 40636859, 40637425, 40637979, 40639129, 40636603, 40636942, 40637028, 40637053, 40636734, 40640389, 40640126, 40639678, 40637096, 40638154, 40641824, 40636923, 40640274, 40637192, 40639419, 40639375, 40640775, 40638585, 40636757, 40636904, 40637623, 40636845, 40637580, 40642597, 40638723, 40637886, 40638557, 40637142, 40637075, 40636709, 40639569, 40636702, 40639263, 40638722, 40636948, 40636998, 40638593, 40637534, 40644244, 40637342, 40643065, 40636853, 40639050, 40636608, 40640785, 40636919, 40642433, 40640628, 40637265, 40638780, 40636587, 40637901, 40637206, 40638885, 40639618, 40636557, 40639731, 40636939, 40636960, 40640067, 40638727, 40639264, 40637071, 40636712, 40638339, 40636834, 40636722, 40636906, 40638498, 40637120, 40637707, 40639625, 40639195, 40638724, 40639702, 40640735, 40637273, 40636785, 40642182, 40641292, 40637777, 40637533, 40640760, 40642952, 40640222, 40638515, 40637238, 40636780, 40641024, 40636968, 40641169, 40638575, 40640902, 40637110, 40640215, 40639910, 40639537, 40638631, 40637780, 40637568, 40637500, 40637129, 40637060, 40636982, 40636953, 40636627, 40636716, 40637182, 40636796, 40636585, 40636864, 40642821, 40637005, 40637571, 40641904, 40637097, 40641908, 40637515, 40637006, 40636606, 40636979, 40636629, 40638845, 40636700, 40639304]",952,1718045327,"Apple Intelligence for iPhone, iPad, and Mac",story,https://www.apple.com/newsroom/2024/06/introducing-apple-intelligence-for-iphone-ipad-and-mac/, +23,throwup238,251.0,40637102,"[40640212, 40644805, 40644731, 40639609, 40639927, 40640380, 40639280, 40640673, 40642275, 40639507, 40639777, 40639901, 40644564, 40640209, 40640005, 40641802, 40642100, 40642270, 40640423, 40641657]",307,1718046026,Deterioration of local community a major driver of loss of play-based childhood,story,https://www.afterbabel.com/p/community-based-childhood, +24,geek_at,100.0,40632745,"[40636265, 40638625, 40636550, 40637021, 40641386, 40636419, 40636184, 40638348, 40636820, 40644280, 40638581, 40638607, 40640660, 40639771, 40636342, 40636115, 40636861, 40637769, 40636595, 40636073, 40641739, 40639951, 40635975, 40636647, 40638457, 40641888, 40637465, 40637980, 40636180, 40636609, 40636531, 40638633, 40636446, 40636470, 40643877]",178,1718021696,Sending emails to my three-year-old,story,https://blog.haschek.at/2024/leaving-a-digital-legacy.html, +25,ca98am79,70.0,40636883,"[40638720, 40639914, 40641540, 40640703, 40638761, 40643274, 40644109, 40643868, 40639197, 40639831, 40640305, 40638781, 40639973, 40639832, 40639585, 40639217, 40640283, 40637922, 40643843, 40641114, 40638537, 40638680]",174,1718045453,The rarest move in chess [video],story,https://www.youtube.com/watch?v=iDnW0WiCqNc, +26,getwiththeprog,11.0,40641704,"[40643067, 40643746, 40643396, 40642840, 40643352, 40642769]",86,1718073516,Biodiversity enhances immune regulation among daycare children,story,https://www.science.org/doi/10.1126/sciadv.aba2578, +27,oidar,44.0,40641795,"[40642251, 40642584, 40642460, 40644545, 40644148, 40643221, 40642223, 40643180, 40642598]",71,1718074517,Blackmagic Cine Immersive Capture for Vision Pro 8160x7200 Resolution per Eye,story,https://www.newsshooter.com/2024/06/10/blackmagic-ursa-cine-immersive-capture-content-for-apple-vision-pro-with-8160-x-7200-resolution-per-eye/, +28,belter,27.0,40633902,"[40634985, 40636336, 40637243, 40638077, 40639703, 40634789, 40637361, 40638375, 40634858, 40640877]",207,1718028864,Mexican Computers: A Brief Technical and Historical Overview,story,https://arxiv.org/abs/2406.04912, +29,skeptrune,,40639032,,1,1718053243,Trieve (YC W24) Is Hiring a DevRel Software Engineer,job,https://www.ycombinator.com/companies/trieve/jobs/2jeeXLs-developer-relations-software-engineer, +30,Anon84,6.0,40640424,"[40643665, 40642810, 40643600, 40643170]",56,1718061486,The Geometry of Categorical and Hierarchical Concepts in Large Language Models,story,https://arxiv.org/abs/2406.01506, +31,indigodaddy,4.0,40641615,"[40642778, 40643014]",39,1718072630,Genes protective during Black Death may now be increasing autoimmune disorders (2022),story,https://www.health.harvard.edu/blog/genes-protective-during-the-black-death-may-now-be-increasing-autoimmune-disorders-202212012859, +32,nequo,2.0,40641932,"[40643995, 40643850]",15,1718076150,The Common Lisp Cookbook (2007),story,https://cl-cookbook.sourceforge.net/index.html, +33,dp-hackernews,0.0,40644454,,6,1718100453,Humans May Be Able to Grow New Teeth Within Just 6 Years,story,https://www.popularmechanics.com/science/health/a60952102/tooth-regrowth-human-trials-japan/, +34,CTOSian,2.0,40631614,"[40643076, 40642574]",30,1718010848,An Arduino interface for 8088 CPUs,story,https://github.com/dbalsom/arduino_8088, +35,drdee,10.0,40637374,"[40644444, 40639874, 40641312, 40643251, 40640756, 40643608, 40640375, 40641889]",48,1718046819,Pyrophone,story,https://en.wikipedia.org/wiki/Pyrophone, +36,unlog,48.0,40632533,"[40638029, 40637652, 40632569, 40643806, 40644001, 40638402, 40639800, 40638597, 40635752, 40636252, 40634593, 40639035, 40638598]",170,1718020020,"Show HN: Crawl a modern website to a zip, serve the website from the zip",story,https://github.com/potahtml/mpa-archive, +37,jandrewrogers,36.0,40637785,"[40640782, 40642475, 40644328, 40643595, 40644221, 40641380, 40642064, 40640980, 40640938, 40640942, 40641142, 40642114, 40640066, 40641364]",98,1718048369,Possible exposure of Earth to dense interstellar medium 2-3M years ago,story,https://www.nature.com/articles/s41550-024-02279-8, +38,082349872349872,0.0,40631466,,7,1718009035,The Last Mathematical Testament of Galois,story,https://www.ias.ac.in/article/fulltext/reso/004/10/0093-0100, +39,classichasclass,19.0,40635697,"[40636983, 40636547, 40636962, 40636524]",69,1718039392,pico9918: A replacement TMS9918A/TMS9929A VDP using a Raspberry Pi Pico,story,https://github.com/visrealm/pico9918, +40,YeGoblynQueenne,5.0,40631573,"[40643301, 40643469]",32,1718010281,Wooden bowling arm that bested Australian cricketer in 1909 rebuilt,story,https://www.theguardian.com/technology/article/2024/jun/10/wooden-bowling-arm-john-venn-machine-rebuilt-cambridge-australia-1909-cricket-team, +41,anarbadalov,63.0,40635789,"[40642151, 40643717, 40636578, 40639561, 40637038, 40636619, 40639597]",56,1718039923,How David Bohm and Hugh Everett changed quantum theory,story,https://daily.jstor.org/how-two-rebel-physicists-changed-quantum-theory/, +42,alishobeiri,34.0,40633773,"[40641405, 40643693, 40635533, 40635793, 40639259, 40642660, 40635673, 40635861, 40636097, 40635754, 40635527, 40635574]",128,1718027962,Show HN: Thread – AI-powered Jupyter Notebook built using React,story,https://github.com/squaredtechnologies/thread,"Hey HN, we're building Thread (https://thread.dev/) an open-source Jupyter Notebook that has a bunch of AI features built in. The easiest way to think of Thread is if the chat interface of OpenAI code interpreter was fused into a Jupyter Notebook development environment where you could still edit code or re-run cells. To check it out, you can see a video demo here: https://www.youtube.com/watch?v=Jq1_eoO6w-c

We initially got the idea when building Vizly (https://vizly.fyi/) a tool that lets non-technical users ask questions from their data. While Vizly is powerful at performing data transformations, as engineers, we often felt that natural language didn't give us enough freedom to edit the code that was generated or to explore the data further for ourselves. That is what gave us the inspiration to start Thread.

We made Thread a pip package (`pip install thread-dev`) because we wanted to make Thread as easily accessible as possible. While there are a lot of notebooks that improve on the notebook development experience, they are often cloud hosted tools that are hard to access as an individual contributor unless your company has signed an enterprise agreement.

With Thread, we are hoping to bring the power of LLMs to the local notebook development environment while blending the editing experience that you can get in a cloud hosted notebook. We have many ideas on the roadmap but instead of building in a vacuum (which we have made the mistake of before) our hope was to get some initial feedback to see if others are as interested in a tool like this as we are.

Would love to hear your feedback and see what you think!" +43,skilled,88.0,40642801,"[40643264, 40643412, 40643328, 40643716, 40643387, 40644439, 40643186, 40643224, 40643354, 40643139, 40643567]",102,1718084413,British duo arrested for SMS phishing via homemade cell tower,story,https://www.cityoflondon.police.uk/news/city-of-london/news/2024/june/two-people-arrested-in-connection-with-investigation-into-homemade-mobile-antenna-used-to-send-thousands-of-smishing-text-messages-to-the-public/, +44,ColinWright,0.0,40644605,,3,1718101983,When Water Flows Uphill [video],story,https://www.youtube.com/watch?v=zzKgnNGqxMw, +45,MaysonL,5.0,40632397,"[40641990, 40638586, 40641983]",43,1718018816,Water,story,https://johncarlosbaez.wordpress.com/2013/11/29/water/, +46,diodorus,0.0,40615002,,5,1717816539,Policing the Chôra: Law Enforcement in Ptolemaic Egypt (2005) [pdf],story,https://www.u.arizona.edu/~jbausch1/cv/BauschatzFullDissertation.pdf, +47,kieto,1.0,40644111,[40644771],9,1718097150,Raspberry Pi IPO,story,https://www.raspberrypi.com/news/raspberry-pi-ipo/, +48,ksec,98.0,40630699,"[40633994, 40630785, 40632875, 40631951, 40634697, 40632445, 40631981, 40643030, 40631980, 40641488, 40633014, 40631681, 40636971, 40637023, 40634549]",224,1717999811,The Magical Mystery Merge Or Why we run FreeBSD-current at Netflix (2023) [pdf],story,https://people.freebsd.org/~gallatin/talks/OpenFest2023.pdf, +49,skilled,9.0,40643499,"[40643971, 40644029, 40643981, 40643851]",12,1718090324,Meta says European data is essential for culturally relevant AI,story,https://stackdiary.com/meta-says-european-data-is-essential-for-culturally-relevant-ai/, +50,magnio,46.0,40622209,"[40632366, 40634152, 40632555, 40632872, 40635193, 40641063, 40633995, 40632986, 40632025, 40635590, 40632960, 40637426, 40634035, 40633658, 40632656]",250,1717910091,"Scratchapixel 4.0, Learn Computer Graphics Programming",story,https://www.scratchapixel.com/index.html, +51,TangerineDream,1.0,40644459,[40644778],5,1718100494,Why curl closes PRs on GitHub,story,https://daniel.haxx.se/blog/2024/06/11/why-curl-closes-prs-on-github/, +52,hindsightbias,9.0,40627113,"[40641173, 40643233, 40641097, 40640806, 40641090]",35,1717963179,Researchers demonstrate the first chip-based 3D printer,story,https://news.mit.edu/2024/researchers-demonstrate-first-chip-based-3d-printer-0606, +53,stareatgoats,75.0,40638386,"[40639352, 40641079, 40639256, 40639531, 40639478, 40641377, 40640886]",175,1718050593,Controversial pesticide research all but vanished from a major conference,story,https://usrtk.org/bees-neonics/entomological-society-america-corporate-partners/, +54,diggan,1.0,40640534,[40644845],12,1718062255,Jank's new persistent string is fast,story,https://jank-lang.org/blog/2023-12-30-fast-string/, +55,isaacfrond,7.0,40643744,"[40644657, 40644684, 40644391, 40644003, 40643970]",11,1718093025,The word 'bot' is increasingly being used as an insult on social media,story,https://www.newscientist.com/article/2434742-the-word-bot-is-increasingly-being-used-as-an-insult-on-social-media/, +56,davidbarker,288.0,40636854,"[40637054, 40638579, 40638081, 40640958, 40637251, 40637530, 40637878, 40640378, 40637540, 40637521, 40637476, 40639082, 40637936, 40640636, 40638397, 40643694, 40643379, 40641428, 40638009, 40638566, 40637340, 40639766, 40638132, 40642538, 40640848, 40638203, 40637735, 40640187, 40638472, 40639971, 40639891, 40637835, 40637607, 40637495, 40643845, 40638404, 40637433, 40639350]",293,1718045347,macOS Sequoia Preview,story,https://www.apple.com/macos/macos-sequoia-preview/, +57,jordigh,0.0,40636122,,29,1718041744,Nvidia-patch: removes restriction simultaneous video encoding sessions,story,https://github.com/keylase/nvidia-patch, +58,thunderbong,4.0,40637303,"[40643039, 40641822, 40642389, 40639992]",31,1718046610,SQLSync: A collaborative offline-first wrapper around SQLite,story,https://github.com/orbitinghail/sqlsync, +59,ibobev,0.0,40641388,,10,1718070220,Python and OpenGL for Scientific Visualization,story,https://www.labri.fr/perso/nrougier/python-opengl/, +60,mkurz,138.0,40631439,"[40631903, 40631596, 40631652, 40631627, 40631605, 40631927, 40631891, 40631645, 40631762, 40631599, 40632911, 40633018, 40635744, 40631622, 40632138, 40634717, 40631913, 40631582, 40631677, 40631578, 40631587, 40631588, 40631595]",205,1718008722,WebKit fix: Quirk news.ycombinator to skip TextAutoSizing,story,https://github.com/WebKit/WebKit/commit/84ae355619354ee1bfa7daaa1fc95565a6726be3, +61,lnyan,9.0,40631585,"[40641138, 40641102, 40642078, 40641614, 40641146, 40641144]",14,1718010471,Filmed.js: film strip image effect,story,https://www.netzgesta.de/filmed/, +62,TamTech,2.0,40643259,[40643951],10,1718088187,The 'Dead Internet Theory',story,https://theconversation.com/the-dead-internet-theory-makes-eerie-claims-about-an-ai-run-web-the-truth-is-more-sinister-229609, +63,skilled,37.0,40622671,"[40633660, 40633996, 40635496, 40634384, 40633068, 40632857, 40634262, 40633937, 40634902, 40633188, 40635405, 40642828, 40635360, 40622680, 40633860, 40634852]",108,1717918301,PiDP-10 – a modern replica of the PDP-10,story,https://obsolescence.dev/pidp10.html, +64,goles,33.0,40619311,"[40627217, 40628315, 40627354, 40635222, 40627219, 40637678, 40636949]",85,1717870329,Western Pennsylvania dirt is used in the infields of most MLB stadiums (2017),story,https://www.post-gazette.com/sports/pirates/2017/08/31/baseball-infield-dirt-mix-prices-duraedge-pnc-park-wrigley-field-duraedge-slippery-rock-soil-pennsylvania/stories/201708310111, +65,jstanley,1.0,40638445,[40643707],30,1718050769,Printable Popup Horizontal Sundials,story,https://www.blocklayer.com/sundial-popeng, +66,letmutex,4.0,40640635,"[40643010, 40643105]",10,1718063038,Effective substring in Rust,story,https://letmutex.com/article/effective-substring-in-rust, +67,matt_d,21.0,40627563,"[40635746, 40638371, 40639932, 40638648, 40635770, 40636509]",54,1717966635,Deep Dive into Ownership in Mojo,story,https://www.modular.com/blog/deep-dive-into-ownership-in-mojo, +68,adomasm3,11.0,40637089,"[40640838, 40641094]",23,1718045992,"Phthalates, Toxic Plastic Additives, Are Everywhere: What's the Acceptable Limit",story,https://molecularspec.substack.com/p/phthalates-in-food-assessing-intake, +69,_Microft,62.0,40626807,"[40634063, 40628369, 40630764, 40628540, 40627486, 40627796, 40627270, 40631169, 40630288, 40630768, 40627106, 40634963, 40628760, 40631912, 40627828, 40634976, 40628881, 40628468, 40631804, 40634076, 40627702, 40631037, 40632909, 40631410]",597,1717960988,Designing a Lego orrery,story,https://marian42.de/article/orrery/, +70,skilled,21.0,40623497,"[40632764, 40633231, 40633583, 40642456, 40633561, 40634618, 40633546]",67,1717929627,Fingerprinting VPNs with Custom Router Firmware [pdf],story,https://censorbib.nymity.ch/pdf/Almutairi2024a.pdf, +71,padolsey,146.0,40632773,"[40634342, 40634094, 40634238, 40644255, 40635146, 40643393, 40633252, 40633460, 40633182, 40633280, 40634328, 40633218, 40634040, 40633874, 40633789, 40633236, 40633138, 40633222, 40634087, 40634939, 40633197, 40633728, 40633348, 40633545, 40633544, 40633972, 40633779, 40633200, 40634515, 40635088, 40634260, 40639669, 40633806, 40633784, 40633413]",429,1718021986,Show HN: Markdown HN profiles at {user}.at.hn,story,https://at.hn,"Very opportunistic toy project as I saw the domain was up for grabs: 'at.hn' is a little site where people can have their own subdomains for whatever their HN username is (opt-in only by adding a slug to your bio). It doesn't really do much. Just shows your HN bio rendered as markdown plus meta stuff. I'm thinking of adding an aggregated user listing on the homepage so people can explore profiles. There's a bunch of interesting people on HN but discoverability is a bit longwinded. I'm wondering what other features people want. Otherwise shall likely leave it as-is. I remember hnbadges was a thing for a while, but can't remember what happened to it. Did people like that? Anyway, at.hn's on github if people want to contribute. - https://github.com/padolsey/at.hn" +72,rudolfwinestock,174.0,40641361,"[40643491, 40643036, 40642066, 40643232, 40642104, 40642060, 40643631, 40644557, 40642524, 40643549, 40642402, 40643143, 40643904, 40642701, 40643462, 40643371, 40643732, 40642559, 40642827, 40643410, 40642674, 40643316, 40643894, 40643241, 40642523, 40641832, 40643047, 40643152, 40643991]",243,1718069975,Noam Chomsky 'no longer able to talk' after 'medical event',story,https://www.independent.co.uk/arts-entertainment/books/news/noam-chomsky-health-update-tributes-b2559831.html, +73,zeristor,19.0,40622999,"[40638576, 40641030, 40638530, 40640784, 40640859, 40640694, 40638153, 40623013, 40639223]",52,1717923633,From Steampunk to Solarpunk (2008),story,http://republicofthebees.blogspot.com/2008/04/from-steampunk-to-solarpunk.html, +74,tanelpoder,0.0,40641091,,9,1718067484,eBPF BCC to libbpf conversion guide,story,https://nakryiko.com/posts/bcc-to-libbpf-howto-guide/, +75,klaussilveira,0.0,40639742,,9,1718057183,Spilo: High Availability PostgreSQL cluster using Docker,story,https://github.com/zalando/spilo, +76,skilled,128.0,40631223,"[40643112, 40641945, 40642732, 40640794, 40641755, 40641262, 40643429, 40642962, 40643738, 40640968, 40640799, 40641086, 40641034, 40641946, 40640693]",110,1718006015,Anti-Cheat Expert: all your pixels are belong to us,story,https://invlpg.dev/post/ace_screenshots/, +77,ingve,5.0,40630656,"[40639631, 40640054, 40639456, 40640422, 40639552]",37,1717999209,The Engine of the Future,story,https://c0de517e.com/014_future_engines.htm, +78,puzzledpenguin,72.0,40634042,"[40634871, 40634563, 40643941, 40635315, 40634557, 40634839, 40638685, 40635354, 40635823, 40635143, 40640779, 40634915, 40640677, 40638647, 40634929, 40640862, 40638003, 40634587, 40635148, 40639701, 40635962, 40635776, 40635687, 40636986, 40634383, 40635710, 40635080, 40635851, 40634599, 40635713]",275,1718029827,23words.com,story,https://23words.com, +79,ndsipa_pomu,0.0,40643998,,3,1718095913,AI trained on photos from kids' entire childhood without their consent,story,https://arstechnica.com/tech-policy/2024/06/ai-trained-on-photos-from-kids-entire-childhood-without-their-consent/, +80,belter,7.0,40638741,"[40642230, 40640734, 40639673, 40639092, 40639759]",36,1718052024,The British Newspaper Archive,story,https://www.britishnewspaperarchive.co.uk/, +81,cyberlimerence,0.0,40643454,,8,1718089924,Inside Mexico’s anti-avocado militias,story,https://www.theguardian.com/news/article/2024/jun/11/inside-mexico-anti-avocado-militias, +82,dlazaro,5.0,40634269,[40640336],24,1718031177,Creating Perfect Font Fallbacks in CSS,story,https://www.aleksandrhovhannisyan.com/blog/perfect-font-fallbacks/, +83,rachofsunshine,254.0,40634774,"[40637454, 40635310, 40635569, 40635650, 40635549, 40636225, 40644432, 40635344, 40642762, 40640065, 40635435, 40636522, 40635656, 40641021, 40640983, 40635722, 40636564, 40635510, 40638937, 40635632, 40642134, 40637939, 40637864, 40635447, 40636096, 40638746, 40636646, 40635786, 40641296, 40640356, 40635554, 40638210, 40637438, 40639430, 40638675, 40638089, 40638054, 40637575, 40638520, 40639990, 40636098, 40635307, 40641261, 40636772, 40639433]",203,1718034153,Why Triplebyte Failed,story,https://www.otherbranch.com/blog/why-triplebyte-failed, +84,zshrc,115.0,40642328,"[40642727, 40642617, 40642936, 40643191, 40642858, 40642723]",124,1718079989,macOS 15.0 supports Nested Virtualization on M3 chips,story,https://developer.apple.com/documentation/virtualization/vzgenericplatformconfiguration/4360553-isnestedvirtualizationsupported, +85,thesuperbigfrog,19.0,40642272,"[40642731, 40643035, 40643531, 40643288, 40643562, 40642610, 40642686]",22,1718079401,Google is ready to fill free streaming TV channels with ads,story,https://www.theverge.com/2024/6/10/24175676/google-fast-ads-streaming-tv-network, +86,rbanffy,143.0,40631558,"[40631874, 40633095, 40632603, 40632813, 40633490, 40633149, 40632950, 40636187, 40632597, 40633134, 40632146, 40637931, 40631902, 40633371, 40634071, 40632151, 40637921, 40634841, 40632695, 40632431, 40635018, 40639828, 40632841, 40631840, 40631810, 40632299]",174,1718010142,The Mythical Non-Roboticist: Wouldn't it be great if everyone could do robotics?,story,https://spectrum.ieee.org/the-mythical-non-roboticist, +87,todsacerdoti,73.0,40622191,"[40633266, 40631494, 40634567, 40630745, 40634705, 40643687, 40638550, 40635879, 40632928, 40634407, 40631059, 40633670, 40636310, 40633589, 40631672, 40631454]",151,1717909634,Dmv.org,story,https://computer.rip/2024-06-08-dmv.org.html, +88,lnyan,150.0,40634465,"[40635258, 40634735, 40634646, 40634754, 40635238, 40636729, 40635124, 40638931, 40634501, 40637834, 40635259, 40635343, 40636178, 40635351, 40634665, 40635472, 40636538, 40636142, 40637868, 40635896]",233,1718032162,"Gainax, known for 'Evangelion' anime production, goes bankrupt",story,https://www.japantimes.co.jp/business/2024/06/08/evangelion-anime-production-company-bankrupt/, +89,carllippert,50.0,40641116,"[40641544, 40642831, 40641648, 40641561, 40642113, 40641406, 40643225, 40642763, 40642486, 40641654]",59,1718067658,Back To Atoms: Why we can stop building SaaS and build the future instead.,story,https://carllippert.com/back-to-atoms/, +90,segasaturn,5.0,40634186,[40639318],21,1718030644,"0patch – Security Patches for Windows 7, 8, 10, Server 2008, Server 2012",story,https://0patch.com/, +91,hackernj,8.0,40640076,"[40640905, 40641494]",57,1718059225,Wild elephants may have names that other elephants use to call them,story,https://www.npr.org/2024/06/07/nx-s1-4994426/wild-elephants-individual-names, +92,typeofhuman,1.0,40641443,[40643502],18,1718070765,New York Times Responds to Source Code Leak,story,https://www.securityweek.com/new-york-times-responds-to-source-code-leak/, +93,whereistimbo,2.0,40633871,[40638951],26,1718028602,EbookFoundation/Free-Programming-Books,story,https://github.com/EbookFoundation/free-programming-books/blob/main/books/free-programming-books-langs.md, +94,vyrotek,3.0,40638990,"[40642274, 40640767, 40641217]",13,1718053030,Pixel-Composer – Node based VFX compositor for pixel art,story,https://pixel-composer.com, +95,zhengiszen,131.0,40640499,"[40641412, 40640929, 40640975, 40640948, 40640994, 40640960, 40640982, 40642311, 40641288, 40641688, 40642247, 40640992, 40640896, 40641842, 40642204]",129,1718062009,Intel pauses work on $25B Israel fab,story,https://www.theregister.com/2024/06/10/intel_israeli_fab/, +96,aarondf,0.0,40636079,,15,1718041505,SQLSync – collaborative offline-first wrapper around SQLite,story,https://sqlsync.dev/, +97,bookofjoe,85.0,40625959,"[40625962, 40629914, 40634767, 40629578, 40628085, 40638122, 40635553, 40632086, 40628532, 40637225, 40632160, 40626116, 40629847]",100,1717953678,A new world of DIY medical testing,story,https://www.washingtonpost.com/technology/2024/06/09/home-health-tests-doctors-fda/, +98,fanf2,53.0,40626969,"[40627849, 40631545, 40628142, 40628172, 40631576, 40628624, 40627830, 40632258, 40633365, 40628350, 40631074, 40630851, 40628212, 40630249, 40629557, 40631094, 40628251]",312,1717962123,Libtree: Ldd as a tree saying why a library is found or not,story,https://github.com/haampie/libtree, +99,rbanffy,1.0,40639299,[40642392],12,1718054634,Django Enhancement Proposal 14: Background Workers,story,https://www.djangoproject.com/weblog/2024/may/29/django-enhancement-proposal-14-background-workers/, diff --git a/analytics/dagster/data/topstory_ids.json b/analytics/dagster/data/topstory_ids.json new file mode 100644 index 000000000..5c9ee3b91 --- /dev/null +++ b/analytics/dagster/data/topstory_ids.json @@ -0,0 +1 @@ +[40643207, 40643181, 40643167, 40639506, 40642871, 40642476, 40630952, 40631796, 40639606, 40623864, 40639628, 40639450, 40644323, 40635397, 40636292, 40640927, 40632064, 40640833, 40633003, 40643071, 40638764, 40641848, 40636844, 40637102, 40632745, 40636883, 40641704, 40641795, 40633902, 40639032, 40640424, 40641615, 40641932, 40644454, 40631614, 40637374, 40632533, 40637785, 40631466, 40635697, 40631573, 40635789, 40633773, 40642801, 40644605, 40632397, 40615002, 40644111, 40630699, 40643499, 40622209, 40644459, 40627113, 40638386, 40640534, 40643744, 40636854, 40636122, 40637303, 40641388, 40631439, 40631585, 40643259, 40622671, 40619311, 40638445, 40640635, 40627563, 40637089, 40626807, 40623497, 40632773, 40641361, 40622999, 40641091, 40639742, 40631223, 40630656, 40634042, 40643998, 40638741, 40643454, 40634269, 40634774, 40642328, 40642272, 40631558, 40622191, 40634465, 40641116, 40634186, 40640076, 40641443, 40633871, 40638990, 40640499, 40636079, 40625959, 40626969, 40639299] \ No newline at end of file diff --git a/analytics/dagster/requirements.txt b/analytics/dagster/requirements.txt new file mode 100644 index 000000000..57e66a4a0 --- /dev/null +++ b/analytics/dagster/requirements.txt @@ -0,0 +1,3 @@ +matplotlib +pandas +requests \ No newline at end of file diff --git a/analytics/dagster/src/__init__.py b/analytics/dagster/src/__init__.py new file mode 100644 index 000000000..5577505ee --- /dev/null +++ b/analytics/dagster/src/__init__.py @@ -0,0 +1 @@ +from .definitions import defs as defs \ No newline at end of file diff --git a/analytics/dagster/src/__pycache__/__init__.cpython-310.pyc b/analytics/dagster/src/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3673b6f4797cbf0765afe2fdcae2c80e4ad672c5 GIT binary patch literal 174 zcmd1j<>g`kf;5ASG*uw|7{oyaOhAqU5Elyoi4=wu#vF!R#wbQch7_h?22JLdj6h*c z##<~YscFT2noPI2fsD+&%#zIfyy7B|=9LUZEI@&008UFCv*S+ literal 0 HcmV?d00001 diff --git a/analytics/dagster/src/__pycache__/definitions.cpython-310.pyc b/analytics/dagster/src/__pycache__/definitions.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c7c087b51743f8555228cb02ff3658a802df375 GIT binary patch literal 467 zcmYjNyH3L}6tx{UX#$0c0kP3F2sQlxgcukROE)i;iEo-f99MQ3+Lf8#K;oCYGW8c& ziroirEg$PX&XLd6gF%LTe2nXAg%SD+$^W1}x%1aPc#ufq7AefIB8;erMT#C1GA@@(xDQBK{_JycECEE?TecQ!3+ zkgqNL72QyV0pP+~Sny~4l?mv+elXxYU9t!p`PhBF#%r3wf;(I0v5Y}XRVH9l{ z>|BbbU2+4J0bBnHI_kaHPh?I5A;9-_q34TYFL`)*trZm3l*QKjZaL%m()Dk44sU}) TJM?gYjuS#M9B&BCaJKmYWNm`H literal 0 HcmV?d00001 diff --git a/analytics/dagster/src/assets/__init__.py b/analytics/dagster/src/assets/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc b/analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dc98a97d53eb5816bb0bb4cdded214f55050b64 GIT binary patch literal 141 zcmd1j<>g`kf@69aX(0MBh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6v2Kfj# D-`XD& literal 0 HcmV?d00001 diff --git a/analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc b/analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7855bbdb80d15899113345f757f2a4b8b9af1be GIT binary patch literal 3443 zcmbVO-ESMm5x?C#9*@T(CE2np*|KqQKO77XcZW9-Xn$||$USJR>?vgs| zKIq-0WD!RofZRu;DEinMAoCXVz0dtOxqH+u6C9+28zT zi$=q@;Q3p7yZ`rwW&Hyur=JQ;eguE{7igHpNNPp%oO5do-sn3lU=1U&1QJU|qRC#DSYV4r;tV7Atk#)Bu-)2h5ncW9zT)!k&s&Z@_mPxb z{p1PnjZ{+P9~ZgePn2HzELL13ahi~Ji{ zIztUqr0z10!u8S{?c1`+UTjD%e^WyzZF;pEE4lhfx&fHJq z=5SvJM~)d?0m1TF9$!|qsb9i);+F2T&U~{w z06F)7sK&GjZx?3Pba4hL!0LaZEO>^w+!+5EL7OO^$q+HY!Xh`Q-4Ee#--z3}@GHoM zcU73h`=Np}4!?ITlpI0_TOkYjT=llYI89AHn0@~T!p%MdX91LIX(Beq@BDAL#;u)k z#bESs88!<%&av}Jp~7+V$ZZLR#-jb=>}hO6(zbhYw?(6RKFYcRoDxjx>QM0AgztR~ zNs%4#SZhfb)k|oclNK=(3;3x;oV)Q55YM!o0`Bd!7-%QS`-P@MrfYW~nSLsO)FM|E zUHF`Ea3dB8O*5vQZ633z4r^H~c382Ox2obIimYIR(A8C8XjMj4bQUraJ^%wvS>NQ% zyjew2|0K;Xz+7H|CU66?{?f5MvO;{aNLLB{(x*P5)TN8W`Nn-I+8`D=pQnX-EgA4E zUM0h?A+7^RhOY=D0x&J3m^R~&q=H*kS!G6pVyfG)M9l2wapr=iQIn2 zIv^8I)gJi_%GWbYrrNZwg3_BdX0T>PO&lS^gWLiX%QNikGc%hmvSpJ(wdv9f7+#$@ zuqHK-I$PG*IksY?mZ#^++H?g|YT~o=r9VACvmhx~pWBnVT4ihO0#HM95~#K5h0-g7 zqgv80Vf!~|3R1)QbBGa-R3IXLuiIc zK9@*T3I(U=zmmsVbW${qOH#X4*Cf_9XpLx$Jl)q+0WcH5X}=KKhKiz{Jt#b|2J49w zm*L2DMd265&Gx$+Z~gG*hi|vOczE^7hDoeL%&yl!Lg59%`5ycwf-bR2>tSs|O2Vu) z3tF;f{eeEl0!obzaTBBrXrb=Y_BHJc<3#)ily9-5r|>3l`%_enKy)oigO~2h@l^~@ z_c%OV6Q1_1wCKgDyxl(8`I@C@I$CQiMRg^>c@3s5x_QbbqREqgH^#e_#@QB&Z~qLo zNmSxl1 zblr98eiOLsaLw^y&9$2_TC@Y&1j1i-#6$FtiW+pAt+V0*^a4wmI0Fr&dtsufmjEbu zz-hA89`55vyYpWB0tMY6@I)-l!JE3Lt4DV None: + """Get up to 100 top stories from the HackerNews topstories endpoint. + + API Docs: https://github.com/HackerNews/API#new-top-and-best-stories + """ + newstories_url = "https://hacker-news.firebaseio.com/v0/topstories.json" + top_new_story_ids = requests.get(newstories_url).json()[:100] + + os.makedirs("data", exist_ok=True) + with open("data/topstory_ids.json", "w") as f: + json.dump(top_new_story_ids, f) + + +@asset(deps=[topstory_ids], group_name="hackernews", compute_kind="HackerNews API") +def topstories(context: AssetExecutionContext) -> MaterializeResult: + """Get items based on story ids from the HackerNews items endpoint. It may take 30 seconds to fetch all 100 items. + + API Docs: https://github.com/HackerNews/API#items + """ + with open("data/topstory_ids.json", "r") as f: + topstory_ids = json.load(f) + + results = [] + for item_id in topstory_ids: + item = requests.get(f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json").json() + results.append(item) + + if len(results) % 20 == 0: + context.log.info(f"Got {len(results)} items so far.") + + df = pd.DataFrame(results) + df.to_csv("data/topstories.csv") + + return MaterializeResult( + metadata={ + "num_records": len(df), # Metadata can be any key-value pair + "preview": MetadataValue.md(df.head().to_markdown()), + # The `MetadataValue` class has useful static methods to build Metadata + } + ) + + +@asset(deps=[topstories], group_name="hackernews", compute_kind="Plot") +def most_frequent_words(context: AssetExecutionContext) -> MaterializeResult: + """Get the top 25 most frequent words in the titles of the top 100 HackerNews stories.""" + stopwords = ["a", "the", "an", "of", "to", "in", "for", "and", "with", "on", "is"] + + topstories = pd.read_csv("data/topstories.csv") + + # loop through the titles and count the frequency of each word + word_counts = {} + for raw_title in topstories["title"]: + title = raw_title.lower() + for word in title.split(): + cleaned_word = word.strip(".,-!?:;()[]'\"-") + if cleaned_word not in stopwords and len(cleaned_word) > 0: + word_counts[cleaned_word] = word_counts.get(cleaned_word, 0) + 1 + + # Get the top 25 most frequent words + top_words = { + pair[0]: pair[1] + for pair in sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:25] + } + + # Make a bar chart of the top 25 words + plt.figure(figsize=(10, 6)) + plt.bar(list(top_words.keys()), list(top_words.values())) + plt.xticks(rotation=45, ha="right") + plt.title("Top 25 Words in Hacker News Titles") + plt.tight_layout() + + # Convert the image to a saveable format + buffer = BytesIO() + plt.savefig(buffer, format="png") + image_data = base64.b64encode(buffer.getvalue()) + + # Convert the image to Markdown to preview it within Dagster + md_content = f"![img](data:image/png;base64,{image_data.decode()})" + + with open("data/most_frequent_words.json", "w") as f: + json.dump(top_words, f) + + # Attach the Markdown content as metadata to the asset + return MaterializeResult(metadata={"plot": MetadataValue.md(md_content)}) \ No newline at end of file diff --git a/analytics/dagster/src/definitions.py b/analytics/dagster/src/definitions.py new file mode 100644 index 000000000..ec2ebeea2 --- /dev/null +++ b/analytics/dagster/src/definitions.py @@ -0,0 +1,16 @@ +from dagster import ( + Definitions, + ScheduleDefinition, + define_asset_job, + load_assets_from_package_module, +) + +from . import assets + +daily_refresh_schedule = ScheduleDefinition( + job=define_asset_job(name="all_assets_job"), cron_schedule="0 0 * * *" +) + +defs = Definitions( + assets=load_assets_from_package_module(assets), schedules=[daily_refresh_schedule] +) \ No newline at end of file diff --git a/analytics/dagster/workspace.yaml b/analytics/dagster/workspace.yaml new file mode 100644 index 000000000..92341baec --- /dev/null +++ b/analytics/dagster/workspace.yaml @@ -0,0 +1,2 @@ +load_from: + - python_module: src \ No newline at end of file diff --git a/analytics/docker-compose.yml b/analytics/docker-compose.yml new file mode 100644 index 000000000..ff7a6f83b --- /dev/null +++ b/analytics/docker-compose.yml @@ -0,0 +1,27 @@ +services: + dagster: + build: + context: dagster/ + dockerfile: Dockerfile + volumes: + - ./dagster:/opt/dagster/dagster_home + env_file: + - .env + ports: + - 3000:3000 + + postgres: + image: postgres:latest + ports: + - 54322:5432 + env_file: + - .env + environment: + - POSTGRES_PASSWORD=${DAGSTER_PG_PASSWORD} + - POSTGRES_USER=${DAGSTER_PG_USERNAME} + - POSTGRES_DB=${DAGSTER_PG_DB} + volumes: + - dagster-postgres:/var/lib/postgresql/data +volumes: + dagster-postgres: + driver: local \ No newline at end of file From c1ff41ef22a1565e00c5f03076b2960d033037ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 13:25:36 +0200 Subject: [PATCH 2/7] remove pycache and pyc file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .../src/__pycache__/__init__.cpython-310.pyc | Bin 174 -> 0 bytes .../src/__pycache__/definitions.cpython-310.pyc | Bin 467 -> 0 bytes .../assets/__pycache__/__init__.cpython-310.pyc | Bin 141 -> 0 bytes .../__pycache__/hackernews.cpython-310.pyc | Bin 3443 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 analytics/dagster/src/__pycache__/__init__.cpython-310.pyc delete mode 100644 analytics/dagster/src/__pycache__/definitions.cpython-310.pyc delete mode 100644 analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc delete mode 100644 analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc diff --git a/analytics/dagster/src/__pycache__/__init__.cpython-310.pyc b/analytics/dagster/src/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 3673b6f4797cbf0765afe2fdcae2c80e4ad672c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 174 zcmd1j<>g`kf;5ASG*uw|7{oyaOhAqU5Elyoi4=wu#vF!R#wbQch7_h?22JLdj6h*c z##<~YscFT2noPI2fsD+&%#zIfyy7B|=9LUZEI@&008UFCv*S+ diff --git a/analytics/dagster/src/__pycache__/definitions.cpython-310.pyc b/analytics/dagster/src/__pycache__/definitions.cpython-310.pyc deleted file mode 100644 index 2c7c087b51743f8555228cb02ff3658a802df375..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 467 zcmYjNyH3L}6tx{UX#$0c0kP3F2sQlxgcukROE)i;iEo-f99MQ3+Lf8#K;oCYGW8c& ziroirEg$PX&XLd6gF%LTe2nXAg%SD+$^W1}x%1aPc#ufq7AefIB8;erMT#C1GA@@(xDQBK{_JycECEE?TecQ!3+ zkgqNL72QyV0pP+~Sny~4l?mv+elXxYU9t!p`PhBF#%r3wf;(I0v5Y}XRVH9l{ z>|BbbU2+4J0bBnHI_kaHPh?I5A;9-_q34TYFL`)*trZm3l*QKjZaL%m()Dk44sU}) TJM?gYjuS#M9B&BCaJKmYWNm`H diff --git a/analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc b/analytics/dagster/src/assets/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 1dc98a97d53eb5816bb0bb4cdded214f55050b64..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 141 zcmd1j<>g`kf@69aX(0MBh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6v2Kfj# D-`XD& diff --git a/analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc b/analytics/dagster/src/assets/__pycache__/hackernews.cpython-310.pyc deleted file mode 100644 index a7855bbdb80d15899113345f757f2a4b8b9af1be..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3443 zcmbVO-ESMm5x?C#9*@T(CE2np*|KqQKO77XcZW9-Xn$||$USJR>?vgs| zKIq-0WD!RofZRu;DEinMAoCXVz0dtOxqH+u6C9+28zT zi$=q@;Q3p7yZ`rwW&Hyur=JQ;eguE{7igHpNNPp%oO5do-sn3lU=1U&1QJU|qRC#DSYV4r;tV7Atk#)Bu-)2h5ncW9zT)!k&s&Z@_mPxb z{p1PnjZ{+P9~ZgePn2HzELL13ahi~Ji{ zIztUqr0z10!u8S{?c1`+UTjD%e^WyzZF;pEE4lhfx&fHJq z=5SvJM~)d?0m1TF9$!|qsb9i);+F2T&U~{w z06F)7sK&GjZx?3Pba4hL!0LaZEO>^w+!+5EL7OO^$q+HY!Xh`Q-4Ee#--z3}@GHoM zcU73h`=Np}4!?ITlpI0_TOkYjT=llYI89AHn0@~T!p%MdX91LIX(Beq@BDAL#;u)k z#bESs88!<%&av}Jp~7+V$ZZLR#-jb=>}hO6(zbhYw?(6RKFYcRoDxjx>QM0AgztR~ zNs%4#SZhfb)k|oclNK=(3;3x;oV)Q55YM!o0`Bd!7-%QS`-P@MrfYW~nSLsO)FM|E zUHF`Ea3dB8O*5vQZ633z4r^H~c382Ox2obIimYIR(A8C8XjMj4bQUraJ^%wvS>NQ% zyjew2|0K;Xz+7H|CU66?{?f5MvO;{aNLLB{(x*P5)TN8W`Nn-I+8`D=pQnX-EgA4E zUM0h?A+7^RhOY=D0x&J3m^R~&q=H*kS!G6pVyfG)M9l2wapr=iQIn2 zIv^8I)gJi_%GWbYrrNZwg3_BdX0T>PO&lS^gWLiX%QNikGc%hmvSpJ(wdv9f7+#$@ zuqHK-I$PG*IksY?mZ#^++H?g|YT~o=r9VACvmhx~pWBnVT4ihO0#HM95~#K5h0-g7 zqgv80Vf!~|3R1)QbBGa-R3IXLuiIc zK9@*T3I(U=zmmsVbW${qOH#X4*Cf_9XpLx$Jl)q+0WcH5X}=KKhKiz{Jt#b|2J49w zm*L2DMd265&Gx$+Z~gG*hi|vOczE^7hDoeL%&yl!Lg59%`5ycwf-bR2>tSs|O2Vu) z3tF;f{eeEl0!obzaTBBrXrb=Y_BHJc<3#)ily9-5r|>3l`%_enKy)oigO~2h@l^~@ z_c%OV6Q1_1wCKgDyxl(8`I@C@I$CQiMRg^>c@3s5x_QbbqREqgH^#e_#@QB&Z~qLo zNmSxl1 zblr98eiOLsaLw^y&9$2_TC@Y&1j1i-#6$FtiW+pAt+V0*^a4wmI0Fr&dtsufmjEbu zz-hA89`55vyYpWB0tMY6@I)-l!JE3Lt4DV Date: Tue, 11 Jun 2024 13:25:56 +0200 Subject: [PATCH 3/7] add pycache and pyc exclusion in gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d3911e790..26436ffc3 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,6 @@ yarn-error.log* .clever.json # Data Stack +.pyc +__pycache__ analytics/dagster/storage/ From 2da8b25557bba61258cbc48ecb8d792a74b8981c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 13:30:53 +0200 Subject: [PATCH 4/7] specify analytics directory to trigger the pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .github/workflows/github-actions-data-stack.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/github-actions-data-stack.yml b/.github/workflows/github-actions-data-stack.yml index d35514fb1..2960bdaff 100644 --- a/.github/workflows/github-actions-data-stack.yml +++ b/.github/workflows/github-actions-data-stack.yml @@ -1,6 +1,8 @@ name: Data Stack CI on: [push] + paths: + - analytics/** jobs: deploy-dagster: From de050d7376ce057a01d96f407fcac95e0a13ee54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 13:34:18 +0200 Subject: [PATCH 5/7] fix github action definition of modified directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .github/workflows/github-actions-data-stack.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github-actions-data-stack.yml b/.github/workflows/github-actions-data-stack.yml index 2960bdaff..5a4400771 100644 --- a/.github/workflows/github-actions-data-stack.yml +++ b/.github/workflows/github-actions-data-stack.yml @@ -1,7 +1,8 @@ name: Data Stack CI -on: [push] - paths: +on: + push: + paths: - analytics/** jobs: From 1461c77ab4337339b26bd2bdf9f012ea792f1140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 14:47:18 +0200 Subject: [PATCH 6/7] fix lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- analytics/dagster/src/__init__.py | 2 +- analytics/dagster/src/assets/hackernews.py | 6 ++++-- analytics/dagster/src/definitions.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/analytics/dagster/src/__init__.py b/analytics/dagster/src/__init__.py index 5577505ee..3c25b881e 100644 --- a/analytics/dagster/src/__init__.py +++ b/analytics/dagster/src/__init__.py @@ -1 +1 @@ -from .definitions import defs as defs \ No newline at end of file +from .definitions import defs as defs diff --git a/analytics/dagster/src/assets/hackernews.py b/analytics/dagster/src/assets/hackernews.py index de008a19d..11059a0c1 100644 --- a/analytics/dagster/src/assets/hackernews.py +++ b/analytics/dagster/src/assets/hackernews.py @@ -34,7 +34,9 @@ def topstories(context: AssetExecutionContext) -> MaterializeResult: results = [] for item_id in topstory_ids: - item = requests.get(f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json").json() + item = requests.get( + f"https://hacker-news.firebaseio.com/v0/item/{item_id}.json" + ).json() results.append(item) if len(results) % 20 == 0: @@ -93,4 +95,4 @@ def most_frequent_words(context: AssetExecutionContext) -> MaterializeResult: json.dump(top_words, f) # Attach the Markdown content as metadata to the asset - return MaterializeResult(metadata={"plot": MetadataValue.md(md_content)}) \ No newline at end of file + return MaterializeResult(metadata={"plot": MetadataValue.md(md_content)}) diff --git a/analytics/dagster/src/definitions.py b/analytics/dagster/src/definitions.py index ec2ebeea2..3070b3e35 100644 --- a/analytics/dagster/src/definitions.py +++ b/analytics/dagster/src/definitions.py @@ -13,4 +13,4 @@ defs = Definitions( assets=load_assets_from_package_module(assets), schedules=[daily_refresh_schedule] -) \ No newline at end of file +) From a9060cca6ec897ebe49812e81642fc2a230908bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Courivaud?= Date: Tue, 11 Jun 2024 16:52:55 +0200 Subject: [PATCH 7/7] add alias to application and update configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphaël Courivaud --- .github/workflows/github-actions-data-stack.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github-actions-data-stack.yml b/.github/workflows/github-actions-data-stack.yml index 5a4400771..78e97818e 100644 --- a/.github/workflows/github-actions-data-stack.yml +++ b/.github/workflows/github-actions-data-stack.yml @@ -9,6 +9,6 @@ jobs: deploy-dagster: uses: ./.github/workflows/deploy.yml with: - app: dagster + app: dagster-production branch: main secrets: inherit