# Run each of these queries, preferably in a separate cmd cell for separate analysis# create a temporary sql view for querying flight informationFlightTable = spark.read.parquet('/mnt/flightdata/parquet/flights')FlightTable.createOrReplaceTempView('FlightTable')# create a temporary sql view for querying airline code informationAirlineCodes = spark.read.parquet('/mnt/flightdata/parquet/airlinecodes')AirlineCodes.createOrReplaceTempView('AirlineCodes')# using spark sql, query the parquet file to return total flights in January and February 2016out1 = spark.sql("SELECT * FROM FlightTable WHERE Month=1 and Year= 2016")NumJan2016Flights = out1.count()out2 = spark.sql("SELECT * FROM FlightTable WHERE Month=2 and Year= 2016")NumFeb2016Flights = out2.count()print("Jan 2016: ", NumJan2016Flights, " Feb 2016: ", NumFeb2016Flights)Total = NumJan2016Flights+NumFeb2016Flightsprint("Total flights combined: ", Total)# List out all the airports in Texasout = spark.sql( "SELECT distinct(OriginCityName) FROM FlightTable where OriginStateName = 'Texas'")print('Airports in Texas: ', out.show(100))# find all airlines that fly from Texasout1 = spark.sql( "SELECT distinct(Reporting_Airline) FROM FlightTable WHERE OriginStateName='Texas'")print('Airlines that fly to/from Texas: ', out1.show(100, False))
DISCLAIMER WARNING: The content on this site should not be considered investment advice. Investing i
Ransom Where? US Doubles Down on Finding Ransomware Actors as Demands Hit New Highs and Attacks Hit
By itself, a single bit is kind of useless, as it can only represent one of two things. Imagine if y