The following code snippets may be executed in a Microsoft Fabric notebook. These scripts primarily leverage the Semantic Link (a.k.a. SemPy) and Semantic Link Labs libraries.
This repo is intended to help business intelligence analysts/developers and data scientists become more familiar with notebooks (and therefore Python) in Microsoft Fabric and the potential benefits of using the Semantic Link/Labs libraries.
Semantic-link library version 0.7.2 or higher
Load the Semantic Link and Semantic Link Labs libraries inside of your notebook
%pip install semantic-link-labs
import sempy_labs as labs
import sempy.fabric as fabric
An even better way to ensure the Semantic-link library is available in your workspace/notebooks is to load it as a library in a custom Fabric environment. If you do this, you will not have to run the above '%pip install semantic-link' code every time in your notebook. Please follow the steps below to create an environment and load the Semantic-Link library.
- Navigate to your Fabric workspace
- Click 'New' -> More options
- Within 'Data Science', click 'Environment'
- Name your environment, click 'Create'
- Within 'Public Libraries', click 'Add from PyPI'
- Enter 'Semantic-link' in the text box under 'Library'
- Click 'Save' at the top right of the screen
- Click 'Publish' at the top right of the screen
- Click 'Publish All'
Update your notebook to use the new environment (must wait for the environment to finish publishing)
- Navigate to your Notebook
- Select your newly created environment within the 'Environment' drop down in the navigation bar at the top of the notebook
import sempy
import sempy.fabric as fabric
import sempy_labs as labs
import pandas as pd
import numpy as np
import pyspark
import pyspark.sql.functions as F
import pyarrow.dataset as ds
import pyarrow.parquet as pq
import os
from delta.tables import DeltaTable
from powerbiclient import Report
import sempy
import sempy.fabric
dir(sempy.fabric)
import inspect
import sempy
import sempy.fabric as fabric
all_members = inspect.getmembers(fabric) #Enter the library alias
for name, member in all_members:
if inspect.isfunction(member):
print(name)
import inspect
import sempy
import sempy.fabric as fabric
functions = inspect.getmembers(fabric,inspect.isfunction) #Enter the library alias
for name, func in functions:
signature = inspect.signature(func)
parameters = list(signature.parameters.keys())
print(f"{name} : {parameters}\n")
import sempy
import sempy.fabric as fabric
help(fabric.list_datasets) #Replace 'list_datasets' within any function shown in dir(sempy.fabric) output
import pkg_resources
library_name = 'semantic-link' #Enter the name of the library
version = pkg_resources.get_distribution(library_name).version
version
import requests
library_name = 'semantic-link' #Enter the name of the library
url = f"https://pypi.org/pypi/{library_name}/json"
response = requests.get(url)
data = response.json()
latest_version = data["info"]["version"]
print(f"The latest version of '{library_name}' is: {latest_version}")
import pkg_resources
import requests
library_name = 'semantic-link' #Enter the name of the library
version = pkg_resources.get_distribution(library_name).version
url = f"https://pypi.org/pypi/{library_name}/json"
response = requests.get(url)
data = response.json()
latest_version = data["info"]["version"]
if version == latest_version:
print(f"You have the latest version of '{library_name}' installed.")
else:
print(f"A new version '{latest_version}' of the '{library_name}' library is available.")
import pkg_resources
import pandas as pd
installed_packages = [pkg.key for pkg in pkg_resources.working_set]
title = 'Installed Packages'
df = pd.DataFrame({title: installed_packages})
display(df)
Refresh the TOM cache. If the semantic model has been updated during your notebook session, run this script to ensure you get the latest model metadata.
import sempy
import sempy.fabric as fabric
fabric.refresh_tom_cache()
import sempy
import sempy.fabric as fabric
x = fabric.list_capacities()
x
import sempy
import sempy.fabric as fabric
x = fabric.get_artifact_id()
x
import sempy
import sempy.fabric as fabric
x = fabric.get_lakehouse_id()
x
import sempy
import sempy.fabric as fabric
import pandas as pd
def get_sql_endpoint(workspaceName = None, lakehouseName = None):
if workspaceName == None:
workspaceID = fabric.get_workspace_id()
else:
workspaceID = fabric.resolve_workspace_id(workspaceName)
if lakehouseName == None:
lakehouseID = fabric.get_lakehouse_id()
else:
dfItems = fabric.list_items()
dfItems = dfItems[dfItems['Display Name'] == lakehouseName and dfItems['Type'] == 'Lakehouse']
lakehouseID = dfItems['Id'].iloc[0]
workspaceID = fabric.get_workspace_id()
lakehouseID = fabric.get_lakehouse_id()
client = fabric.FabricRestClient()
response = client.get(f"/v1/workspaces/{workspaceID}/lakehouses/{lakehouseID}")
responseJson = response.json()
sqlEndpoint = responseJson['properties']['sqlEndpointProperties']['connectionString']
return sqlEndpoint
get_sql_endpoint()
Show a list of the tables in your lakehouse using the List Tables API
import sempy
import sempy.fabric as fabric
import pandas as pd
import json
def get_lakehouse_tables(workspaceId = None, lakehouseId = None):
if workspaceId == None:
workspaceId = fabric.get_workspace_id()
if lakehouseId == None:
lakehouseId = fabric.get_lakehouse_id()
client = fabric.FabricRestClient()
wName = fabric.resolve_workspace_name(workspaceId)
response = client.get(f"/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/tables")
tableList = response.json()['data']
dfItems = fabric.list_items()
dfItems = dfItems[dfItems['Id'] == lakehouseId]
lakehouseName = dfItems['Display Name'].iloc[0]
df = pd.DataFrame({'Workspace Name': [], 'Lakehouse Name': [], 'Table Name': [], 'Type': [], 'Location': [], 'Format': []})
for t in tableList:
tName = t['name']
tType = t['type']
tLocation = t['location']
tFormat = t['format']
new_data = {'Workspace Name': wName, 'Lakehouse Name': lakehouseName, 'Table Name': tName, 'Type': tType, 'Location': tLocation, 'Format': tFormat}
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
return df
get_lakehouse_tables()
import sempy
import sempy.fabric as fabric
import pandas as pd
import os
def load_table(tablename, source, mode = None, workspaceId = None, lakehouseId = None):
#Mode values: Overwrite; Append
if workspaceId == None:
workspaceId = fabric.get_workspace_id()
if lakehouseId == None:
lakehouseId = fabric.get_lakehouse_id()
if mode == None:
mode = 'Overwrite'
if not mode in ['Overwrite', 'Append']:
return print("Invalid Mode value. Mode must be either 'Overwrite' or 'Append'.")
payload = None
file_extension = os.path.splitext(source)[1]
if file_extension == '.csv':
payload = {
"relativePath": source,
"pathType": "File",
"mode": mode,
"formatOptions": {
"format": "Csv",
"header": "true",
"delimiter": ","
}
}
elif file_extension == '.parquet':
payload = {
"relativePath": source,
"pathType": "File",
"mode": mode,
"formatOptions": {
"format": "Parquet",
"header": "true"
}
}
else:
return print("Invalid file extension. Only .csv and .parquet are supported.")
client = fabric.FabricRestClient()
response = client.post(f"/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/tables/{tableName}/load",json= payload)
return response
load_table(
tablename = 'TestFile' #Enter the name of the table to be created
,source = 'Files/Folder/Myfilename.parquet' #Enter the file path of your Parquet file
)
import sempy
import sempy.fabric as fabric
workspace = None #Enter the name of your workspace
df = fabric.list_items(workspace=workspace)
df
import sempy
import sempy.fabric as fabric
df = fabric.list_workspaces().sort_values(by='Name', ascending=True)
df
import sempy
import sempy.fabric as fabric
workspace_name = '' #Enter the workspace name to be used as a filter
dfW = fabric.list_workspaces()
dfW_filt = dfW[dfW['Name'].isin([workspace_name])]
dfW_filt
import sempy
import sempy.fabric as fabric
workspaceName = '' #Enter the workspace name to be used as a filter
x = fabric.list_workspaces()
filter_condition = [workspaceName]
x = x[x['Name'].isin(filter_condition)]
y = x["Id"].iloc[0]
z = fabric.resolve_workspace_name(y)
z
import sempy
import sempy.fabric as fabric
import pandas as pd
dfC = fabric.list_capacities()
dfW = fabric.list_workspaces().sort_values(by='Name', ascending=True)
dfC.rename(columns={'Id': 'Capacity Id'}, inplace=True)
dfC['Capacity Id'] = dfC['Capacity Id'].str.upper()
x = pd.merge(dfW, dfC[['Capacity Id', 'Sku', 'Region', 'State']], on='Capacity Id', how='inner')
x
import sempy
import sempy.fabric as fabric
workspace = '' #Enter the workspace name
id = fabric.resolve_workspace_id(workspace=workspace)
id
import sempy
import sempy.fabric as fabric
id = '' #Enter the workspace ID
x = fabric.resolve_workspace_name(id)
x
import sempy
import sempy.fabric as fabric
fabric.get_workspace_id()
import sempy_labs as labs
labs.assign_workspace_to_capacity(capacity_name='new capacity', workspace='Workspace to be moved')
import sempy_labs as labs
labs.add_user_to_workspace(email_address='[email protected]', role_name='Member', workspace='test workspace')
import sempy
import sempy.fabric as fabric
x = fabric.list_apps()
x
import sempy
import sempy.fabric as fabric
x = fabric.list_gateways()
x
import sempy
import sempy.fabric as fabric
x = fabric.list_reports()
x
import sempy_labs.report as rep
report_name = '' #Enter the report name
dataset_name = '' #Enter the dataset name
report_workspace = None
dataset_workspace = None
for item in report_list:
report_rebind(report=report_name, dataset=dataset_name, report_workspace=report_workspace, dataset_workspace=dataset_workspace)
- First run this code to create the report_rebind function.
report_list = ['', '', '', '', ''] #Enter your list of reports
datasetName = '' #Enter the dataset name
for item in report_list:
report_rebind(item, datasetName)
import sempy
import sempy.fabric as fabric
from powerbiclient import Report
def launch_report(reportName):
dfR = fabric.list_reports()
filtered_df = dfR[(dfR['Name'] == reportName)]
report_id = filtered_df['Id'].iloc[0]
report = Report(group_id=None, report_id=report_id)
return report
launch_report('') #Enter report name
import sempy
import sempy.fabric as fabric
import json
import pandas as pd
def list_dashboards(workspaceName=None):
df = pd.DataFrame(columns=['Dashboard ID', 'Dashboard Name', 'Read Only', 'Web URL', 'Embed URL', 'Data Classification'])
if workspaceName == 'None':
groupId = fabric.get_workspace_id()
else:
groupId = fabric.resolve_workspace_id(workspaceName)
client = fabric.PowerBIRestClient()
response = client.get(f"/v1.0/myorg/groups/{groupId}/dashboards")
responseJson = response.json()
for v in responseJson['value']:
dashboardID = v['id']
displayName = v['displayName']
isReadOnly = v['isReadOnly']
webURL = v['webUrl']
embedURL = v['embedUrl']
dataClass = v['dataClassification']
new_data = {'Dashboard ID': dashboardID, 'Dashboard Name': displayName, 'Read Only': isReadOnly, 'Web URL': webURL, 'Embed URL': embedURL, 'Data Classification': dataClass}
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
df['Read Only'] = df['Read Only'].astype(bool)
return df
list_dashboards()
import sempy
import sempy.fabric as fabric
x = fabric.list_datasets()
x
import sempy
import sempy.fabric as fabric
x = fabric.list_datasets(additional_xmla_properties=['Model.DefaultMode', 'Model.DirectLakeBehavior', 'CompatibilityLevel'])
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter your dataset name
x = fabric.list_datasets()
x = x[x['Dataset Name'] == datasetName]
datasetID = x["Dataset ID"].values[0]
datasetID
Shows the TMSL for a given dataset
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter your dataset name
workspaceName = '' #Enter your workspace name
x = fabric.get_tmsl(datasetName, workspaceName)
print(x)
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_datasources(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_tables(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_columns(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_partitions(datasetName)
x
import sempy
import sempy.fabric as fabric
import pandas as pd
datasetName = '' #Enter dataset name
x = fabric.list_partitions(datasetName)
def format_sql_query(value):
return value.replace('\n', '<br>').replace('\t', ' ')
x_styled = x.style.format({'Query': format_sql_query})
x_styled
List the tables of a Direct Lake semantic model and show the lakehouse table from which it derives ('Query')
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
dfP = fabric.list_partitions(datasetName)
dfP_filtered = dfP[dfP['Mode'] == 'DirectLake'][['Table Name', 'Query']]
dfP_filtered
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_measures(datasetName)
x
List the measures within a given dataset (semantic model) and properly format the Measure Expression column
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_measures(datasetName)
def format_sql_query(value):
return value.replace('\n', '<br>').replace('\t', ' ')
x_styled = x.style.format({'Measure Expression': format_sql_query})
x_styled
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_hierarchies(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_relationships(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
relationships = fabric.list_relationships(datasetName)
plot_relationship_metadata(relationships)
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.get_roles(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.get_roles(dataset = datasetName, include_members = True)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.get_row_level_security_permissions(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_calculation_items(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_perspectives(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_translations(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_expressions(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_annotations(datasetName)
x
import sempy
import sempy.fabric as fabric
def create_blank_semantic_model(datasetName, compatibilityLevel = 1604, workspaceName = None):
if workspaceName == None:
workspaceId = fabric.get_workspace_id()
workspaceName = fabric.resolve_workspace_name(workspaceId)
tmsl = f'''
{{
"createOrReplace": {{
"object": {{
"database": '{datasetName}'
}},
"database": {{
"name": '{datasetName}',
"compatibilityLevel": {compatibilityLevel},
"model": {{
"culture": "en-US",
"defaultPowerBIDataSourceVersion": "powerBI_V3"
}}
}}
}}
}}
'''
return fabric.execute_tmsl(script = tmsl, workspace = workspaceName)
create_blank_semantic_model('') #Enter semantic model name to be created
Valid options for refresh_type: 'full', 'automatic', 'dataOnly', 'calculate', 'clearValues', 'defragment'. Default is 'automatic'.
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
fabric.refresh_dataset(dataset = datasetName, refresh_type = 'full')
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
my_objects = [
{"table": "tableName1"}, #Update 'tableName1' with your table name
{"table": "tableName2"} #Update 'tableName2' with your table name
]
fabric.refresh_dataset(dataset = datasetName, refresh_type = 'full', objects = my_objects)
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
my_objects = [
{"table": "table1", "partition": "partition1"}, #Update 'table1' with your table name and 'partition1' with the partition name
{"table": "table1", "partition": "partition2"} #Update 'table1' with your table name and 'partition2' with the partition name
]
fabric.refresh_dataset(dataset = datasetName, refresh_type = 'full', objects = my_objects)
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
my_objects = [
{"table": "table1"}, #Update 'table1' with your table name
{"table": "table2", "partition": "partition2"} #Update 'table2' with your table name and 'partition2' with the partition name
]
fabric.refresh_dataset(dataset = datasetName, refresh_type = 'full', objects = my_objects)
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.list_refresh_requests(datasetName)
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
tableName = '' #Enter table name
rowLimit = 100
x = fabric.read_table(datasetName,tableName,False,rowLimit)
x
Connect to the Tabular Object Model (TOM); prints each table name
import sempy
import sempy.fabric as fabric
workspaceName = '' #Enter workspace name
datasetName = '' #Enter dataset name
tom_server = fabric.create_tom_server(readonly=True, workspace=workspaceName)
m = tom_server.Databases.GetByName(datasetName).Model
for t in m.Tables:
print(t.Name)
import sempy
import sempy.fabric as fabric
sempy.fabric._client._utils._init_analysis_services()
import Microsoft.AnalysisServices.Tabular as TOM
def create_measure(datasetName, tableName, measureName, measureExpression, workspaceName = None):
if workspaceName == None:
workspaceID = fabric.get_workspace_id()
workspaceName = fabric.resolve_workspace_name(workspaceID)
dfM = fabric.list_measures(dataset = datasetName, workspace = workspaceName)
mExists = any(r['Measure Name'] == measureName for i, r in dfM.iterrows())
if mExists:
print(f"The '{measureName}' measure already exists in the '{datasetName}' semantic model within the '{workspaceName}' workspace.")
return
dfT = fabric.list_tables(dataset = datasetName, workspace = workspaceName)
if not any(r['Name'] == tableName for i, r in dfT.iterrows()):
print(f"The '{tableName}' table does not exist in the '{datasetName}' semantic model within the '{workspaceName}' workspace.")
return
tom_server = fabric.create_tom_server(readonly=False, workspace=workspaceName)
m = tom_server.Databases.GetByName(datasetName).Model
measure = TOM.Measure()
measure.Name = measureName
measure.Expression = measureExpression
m.Tables[tableName].Measures.Add(measure)
print(f"The '{measureName}' measure has been added to the '{tableName}' table in the '{datasetName} semantic model within the '{workspaceName}' workspace.")
m.SaveChanges()
import sempy
import sempy.fabric as fabric
sempy.fabric._client._utils._init_analysis_services()
from sempy.fabric._cache import _get_or_create_workspace_client
from sempy.fabric._client._connection_mode import ConnectionMode
import Microsoft.AnalysisServices.Tabular as TOM
workspaceName = '' #Enter workspace name
datasetName = '' #Enter dataset name
tom_server = _get_or_create_workspace_client(workspaceName).get_dataset_client(datasetName, ConnectionMode.XMLA)._get_connected_dataset_server(readonly=False)
mName = '' #Enter measure name
mExpr = '' #Enter DAX expression
measure = TOM.Measure()
measure.Name = mName
measure.Expression = mExpr
for d in tom_server.Databases:
if d.Name == datasetName:
print(f"Updating {d.Name}...")
m = d.Model
for t in m.Tables:
for ms in t.Measures:
if ms.Name == mName:
ms.Expression = mExpr
print(f"The DAX expression for '{mName}' has been updated.")
m.SaveChanges()
import sempy
import sempy.fabric as fabric
from sempy.fabric._client import DatasetXmlaClient
from sempy.fabric._cache import _get_or_create_workspace_client
sempy.fabric._client._utils._init_analysis_services()
workspaceName = '' #Enter workspace name
datasetName = '' #Enter dataset name
tom_server = _get_or_create_workspace_client(workspaceName)._create_tom_server(False)
for d in tom_server.Databases:
if d.Name == datasetName:
m = d.Model
for t in m.Tables:
for ms in t.Measures:
print(ms.Name + ' : ' + ms.Expression)
tom_server.Disconnect(True)
tom_server.Dispose()
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
EVALUATE
SUMMARIZECOLUMNS(
"Header Name",[Measure Name] // update with actual measure name
)
""")
x
Run Dynamic Management Views (DMVs) via evaluate_dax()
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
SELECT
MEASURE_GROUP_NAME AS [TableName]
,ATTRIBUTE_NAME AS [ColumnName]
,DATATYPE AS [DataType]
,DICTIONARY_SIZE AS [DictionarySize]
,DICTIONARY_ISRESIDENT AS [IsResident]
,DICTIONARY_TEMPERATURE AS [Temperature]
,DICTIONARY_LAST_ACCESSED AS [LastAccessed]
FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS
WHERE [COLUMN_TYPE] = 'BASIC_DATA'
AND NOT [ISROWNUMBER]
ORDER BY [DICTIONARY_TEMPERATURE] DESC
""")
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
// Select specific columns and rename columns
EVALUATE
SELECTCOLUMNS(INFO.TABLES(),"Table Name", [Name],"Hidden", [IsHidden],"Data Category", [DataCategory],[Description])
""")
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
// Filter by multiple columns
EVALUATE
FILTER(INFO.TABLES(),[Name] = "TableName" && [IsHidden] = False) // Update filter with the actual table name
""")
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
// Select specific columns and filter
EVALUATE
FILTER(SELECTCOLUMNS(INFO.TABLES(),[Name],[IsHidden],[DataCategory],[Description]),[Name] = "TableName") // Update filter with the actual table name
""")
x
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
x = fabric.evaluate_dax(
datasetName,
"""
// View specific columns and order the table by a column in descending order
EVALUATE
SELECTCOLUMNS(COLUMNSTATISTICS(),"Table Name",[Table Name], [Column Name], [Cardinality])
ORDER BY [Cardinality] DESC
""")
x
import sempy
import sempy.fabric as fabric
x = fabric.evaluate_measure(
"DatasetName", #Enter your dataset name
"MeasureName", #Enter the measure name from your dataset
["'TableName'[ColumnName]", "TableName[ColumnName]"]) # Enter columns
x
%load_ext sempy
%%dax "DatasetName" -w "WorkspaceName" # Enter DatasetName & WorkspaceName
EVALUATE
SUMMARIZECOLUMNS(
"Header Name",[Measure Name] // Update with actual measure name
)
Run Dynamic Management Views (DMVs) using DAX cell magic
%%dax "DatasetName" -w "WorkspaceName" # Enter DatasetName & WorkspaceName
SELECT * FROM $SYSTEM.DISCOVER_SESSIONS
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
dfC = fabric.list_columns(datasetName, extended=True)
dfC_filt = dfC[dfC['Is Resident']]
total_size_sum = dfC_filt['Total Size'].sum()
total_size_sum
import sempy
import sempy.fabric as fabric
datasetName = '' #Enter dataset name
dfC = fabric.list_columns(datasetName, extended=True).sort_values(by='Temperature', ascending=False)
dfC
import pandas as pd
url = 'https://learn.microsoft.com/power-bi/enterprise/directlake-overview'
tables = pd.read_html(url)
df = tables[0]
df['Fabric/Power BI SKUs'] = df['Fabric/Power BI SKUs'].str.split('/')
df = df.explode('Fabric/Power BI SKUs', ignore_index=True)
df
import sempy
import sempy.fabric as fabric
import pandas as pd
def get_sku_size(workspaceName = None):
if workspaceName == None:
workspaceId = fabric.get_workspace_id()
workspaceName = fabric.resolve_workspace_name(workspaceId)
dfC = fabric.list_capacities()
dfW = fabric.list_workspaces().sort_values(by='Name', ascending=True)
dfC.rename(columns={'Id': 'Capacity Id'}, inplace=True)
dfCW = pd.merge(dfW, dfC[['Capacity Id', 'Sku', 'Region', 'State']], on='Capacity Id', how='inner')
sku_value = dfCW.loc[dfCW['Name'] == workspaceName, 'Sku'].iloc[0]
return sku_value
sku_value = get_sku_size()
def get_directlake_guardrails(skuSize):
url = 'https://learn.microsoft.com/power-bi/enterprise/directlake-overview'
tables = pd.read_html(url)
df = tables[0]
df['Fabric/Power BI SKUs'] = df['Fabric/Power BI SKUs'].str.split('/')
df = df.explode('Fabric/Power BI SKUs', ignore_index=True)
filtered_df = df[df['Fabric/Power BI SKUs'] == sku_value]
return filtered_df
get_directlake_guardrails(sku_value)
The documentation for Direct Lake migration has been moved here.