{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analyzing Windows RPC Methods & Other Functions Via GraphFrames"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* **Author:** Roberto Rodriguez (@Cyb3rWard0g)\n",
    "* **Project:** Infosec Jupyter Book\n",
    "* **Public Organization:** Open Threat Research\n",
    "* **License:** Creative Commons Attribution-ShareAlike 4.0 International\n",
    "* **Reference:**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.functions import *\n",
    "from graphframes import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize Spark Session"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark = SparkSession \\\n",
    "    .builder \\\n",
    "    .appName(\"WinRPC\") \\\n",
    "    .config(\"spark.sql.caseSensitive\",\"True\") \\\n",
    "    .config(\"spark.driver.memory\", \"4g\") \\\n",
    "    .getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <p><b>SparkSession - in-memory</b></p>\n",
       "                \n",
       "        <div>\n",
       "            <p><b>SparkContext</b></p>\n",
       "\n",
       "            <p><a href=\"http://192.168.1.232:4040\">Spark UI</a></p>\n",
       "\n",
       "            <dl>\n",
       "              <dt>Version</dt>\n",
       "                <dd><code>v3.0.0</code></dd>\n",
       "              <dt>Master</dt>\n",
       "                <dd><code>local[*]</code></dd>\n",
       "              <dt>AppName</dt>\n",
       "                <dd><code>WinRPC</code></dd>\n",
       "            </dl>\n",
       "        </div>\n",
       "        \n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "<pyspark.sql.session.SparkSession at 0x11a219250>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spark"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download and Decompress JSON File"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2020-07-21 15:01:41--  https://github.com/Cyb3rWard0g/WinRpcFunctions/raw/master/win10_1909/AllRpcFuncMaps.zip\n",
      "Resolving github.com (github.com)... 140.82.113.3\n",
      "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n",
      "HTTP request sent, awaiting response... 302 Found\n",
      "Location: https://raw.githubusercontent.com/Cyb3rWard0g/WinRpcFunctions/master/win10_1909/AllRpcFuncMaps.zip [following]\n",
      "--2020-07-21 15:01:41--  https://raw.githubusercontent.com/Cyb3rWard0g/WinRpcFunctions/master/win10_1909/AllRpcFuncMaps.zip\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 26891116 (26M) [application/zip]\n",
      "Saving to: ‘AllRpcFuncMaps.zip’\n",
      "\n",
      "AllRpcFuncMaps.zip  100%[===================>]  25.64M  4.33MB/s    in 6.1s    \n",
      "\n",
      "2020-07-21 15:01:47 (4.22 MB/s) - ‘AllRpcFuncMaps.zip’ saved [26891116/26891116]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "! wget https://github.com/Cyb3rWard0g/WinRpcFunctions/raw/master/win10_1909/AllRpcFuncMaps.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Archive:  AllRpcFuncMaps.zip\n",
      "  inflating: AllRpcFuncMaps.json     \n"
     ]
    }
   ],
   "source": [
    "! unzip AllRpcFuncMaps.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read JSON File as Spark DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 9.34 ms, sys: 5.12 ms, total: 14.5 ms\n",
      "Wall time: 1min 8s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "df = spark.read.json('AllRpcFuncMaps.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Temporary SQL View"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.createOrReplaceTempView('RPCMaps')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create GraphFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "vertices = spark.sql(\n",
    "'''\n",
    "SELECT FunctionName AS id, FunctionType, Module\n",
    "FROM RPCMaps\n",
    "GROUP BY FunctionName, FunctionType, Module\n",
    "'''\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "edges = spark.sql(\n",
    "'''\n",
    "SELECT CalledBy AS src, FunctionName AS dst\n",
    "FROM RPCMaps\n",
    "'''\n",
    ").dropDuplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = GraphFrame(vertices, edges)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GraphFrame(v:[id: string, FunctionType: string ... 1 more field], e:[src: string, dst: string])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Motif Finding"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Motif finding refers to searching for structural patterns in a graph.\n",
    "\n",
    "GraphFrame motif finding uses a simple Domain-Specific Language (DSL) for expressing structural queries. For example, graph.find(\"(a)-[e]->(b); (b)-[e2]->(a)\") will search for pairs of vertices a,b connected by edges in both directions. It will return a DataFrame of all such structures in the graph, with columns for each of the named elements (vertices or edges) in the motif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Basic Motif Queries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What about a chain of 3 vertices where the first one is an RPC function and the last one is an external function named LoadLibraryExW?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "loadLibrary = g.find(\"(a)-[]->(b); (b)-[]->(c)\")\\\n",
    "  .filter(\"a.FunctionType = 'RPCFunction'\")\\\n",
    "  .filter(\"c.FunctionType = 'ExtFunction'\")\\\n",
    "  .filter(\"c.id = 'LoadLibraryExW'\").dropDuplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------------------------------------+----------------------------------------+----------+--------------+\n",
      "|Module                                 |id                                      |id        |id            |\n",
      "+---------------------------------------+----------------------------------------+----------+--------------+\n",
      "|c:/Windows/System32/appinfo.dll        |RAiLaunchProcessWithIdentity            |Open      |LoadLibraryExW|\n",
      "|C:/Windows/System32/UserDataService.dll|UdmSvcImpl_GetContactRevisionEnum       |Initialize|LoadLibraryExW|\n",
      "|c:/Windows/System32/lsm.dll            |RpcWaitAsyncNotification                |Initialize|LoadLibraryExW|\n",
      "|c:/Windows/System32/lsm.dll            |RpcWaitAsyncNotification                |Initialize|LoadLibraryExW|\n",
      "|C:/Windows/System32/PhoneService.dll   |PhoneSvcImpl_PhoneRpcGetShouldMuteKeypad|Initialize|LoadLibraryExW|\n",
      "|C:/Windows/System32/UserDataService.dll|UdmSvcImpl_ToggleContactMaintenance     |Initialize|LoadLibraryExW|\n",
      "|C:/Windows/System32/UserDataService.dll|UdmSvcImpl_EmptyEmailFolder             |Initialize|LoadLibraryExW|\n",
      "|C:/Windows/System32/UserDataService.dll|UdmSvcImpl_EmptyEmailFolder             |Initialize|LoadLibraryExW|\n",
      "|c:/Windows/System32/vpnike.dll         |VpnikeCreateIDPayload                   |Initialize|LoadLibraryExW|\n",
      "|c:/Windows/System32/vpnike.dll         |VpnikeCreateIDPayload                   |Initialize|LoadLibraryExW|\n",
      "+---------------------------------------+----------------------------------------+----------+--------------+\n",
      "only showing top 10 rows\n",
      "\n",
      "CPU times: user 6.63 ms, sys: 3.24 ms, total: 9.87 ms\n",
      "Wall time: 37.8 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "loadLibrary.select(\"a.Module\",\"a.id\",\"b.id\",\"c.id\").show(10,truncate=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What if we also filter our graph query by a specific module? What about Lsasrv.dll?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "loadLibrary = g.find(\"(a)-[]->(b); (b)-[]->(c)\")\\\n",
    "  .filter(\"a.FunctionType = 'RPCFunction'\")\\\n",
    "  .filter(\"lower(a.Module) LIKE '%lsasrv.dll'\")\\\n",
    "  .filter(\"c.FunctionType = 'ExtFunction'\")\\\n",
    "  .filter(\"c.id = 'LoadLibraryExW'\").dropDuplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------------------------+----------------------------------+-------------------------+--------------+\n",
      "|Module                        |id                                |id                       |id            |\n",
      "+------------------------------+----------------------------------+-------------------------+--------------+\n",
      "|c:/Windows/System32/lsasrv.dll|DsRolerGetPrimaryDomainInformation|LsapDbOpenObject         |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarQueryTrustedDomainInfoByName  |LsapLoadLsaDbExtensionDll|LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarOpenPolicy2                   |LsapDbOpenObject         |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|DsRolerGetPrimaryDomainInformation|LsapDbOpenObject         |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarCreateSecret                  |LsapDbDereferenceObject  |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarEnumerateAccountsWithUserRight|LsapDbDereferenceObject  |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarLookupSids                    |LsapLookupSids           |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarQueryTrustedDomainInfoByName  |LsapDbOpenObject         |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarSetTrustedDomainInfoByName    |LsapDbDereferenceObject  |LoadLibraryExW|\n",
      "|c:/Windows/System32/lsasrv.dll|LsarOpenAccount                   |LsapLoadLsaDbExtensionDll|LoadLibraryExW|\n",
      "+------------------------------+----------------------------------+-------------------------+--------------+\n",
      "only showing top 10 rows\n",
      "\n",
      "CPU times: user 4.95 ms, sys: 2.65 ms, total: 7.6 ms\n",
      "Wall time: 23 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "loadLibrary.select(\"a.Module\",\"a.id\",\"b.id\",\"c.id\").show(10,truncate=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Breadth-first search (BFS)\n",
    "\n",
    "Breadth-first search (BFS) finds the shortest path(s) from one vertex (or a set of vertices) to another vertex (or a set of vertices). The beginning and end vertices are specified as Spark DataFrame expressions."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Shortest Path from an RPC Method to LoadLibraryExW"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "loadLibraryBFS = g.bfs(\n",
    "  fromExpr = \"FunctionType = 'RPCFunction'\",\n",
    "  toExpr = \"id = 'LoadLibraryExW' and FunctionType = 'ExtFunction'\",\n",
    "  maxPathLength = 3).dropDuplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------------------------+--------------------------------------------+\n",
      "|Module                                |e0                                          |\n",
      "+--------------------------------------+--------------------------------------------+\n",
      "|C:/Windows/System32/appmgmts.dll      |[ARPRemoveApp, LoadLibraryExW]              |\n",
      "|c:/Windows/System32/nlasvc.dll        |[operator(), LoadLibraryExW]                |\n",
      "|c:/Windows/System32/lsasrv.dll        |[LsarQueryInformationPolicy, LoadLibraryExW]|\n",
      "|C:/Windows/System32/tellib.dll        |[operator(), LoadLibraryExW]                |\n",
      "|C:/Windows/System32/tellib.dll        |[operator(), LoadLibraryExW]                |\n",
      "|C:/Windows/System32/debugregsvc.dll   |[s_MergeEtlFiles, LoadLibraryExW]           |\n",
      "|c:/Windows/System32/samsrv.dll        |[SamrCloseHandle, LoadLibraryExW]           |\n",
      "|C:/Windows/System32/appmgmts.dll      |[GetManagedApps, LoadLibraryExW]            |\n",
      "|C:/Windows/System32/debugregsvc.dll   |[s_MergeEtlFiles, LoadLibraryExW]           |\n",
      "|C:/Windows/System32/WaaSMedicAgent.exe|[LoadPluginLibrary, LoadLibraryExW]         |\n",
      "+--------------------------------------+--------------------------------------------+\n",
      "only showing top 10 rows\n",
      "\n",
      "CPU times: user 2.73 ms, sys: 1.58 ms, total: 4.31 ms\n",
      "Wall time: 13.5 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "loadLibraryBFS.select(\"from.Module\", \"e0\").show(10,truncate=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "PySpark_Python3",
   "language": "python",
   "name": "pyspark3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}