import requests
from datetime import datetime, timedelta
import os
from delta import *
import pyspark
from pyspark.sql.functions import col, expr , round
# Initiate spark session
builder = pyspark.sql.SparkSession.builder.appName("MyApp") \
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
spark = configure_spark_with_delta_pip(builder).getOrCreate()
Apache Spark
Apache Spark Frequently Used Command
Action Syntax show Dataframe df.show() Stop Spark Session spark.stop() Count entries in dataframe df.count() Write to Delta table df.write.format(“delta”).mode(“overwrite”).save(“/venv/storage”) Read from Delta Table df = spark.read.format(“delta”).load(“/venv/storage”) Remove Duplicates df = df.dropDuplicates()
0 Comments