Parsing SQL

Parsing SQL is easy with the SDK.

Use the json_message method to parse an SQL statement to a JSON format.

Use the xml_message method to parse an SQL statement to an XML format.

Example 1:

In this example we parse a query that includes a JOIN, GROUP BY, WHERE, HAVING and ORDER BY clause. from flowhigh.utils.converter import FlowHighSubmissionClass

fh = FlowHighSubmissionClass.from_sql("""
SELECT
	customer.id,
	customer.date_of_sales,
	SUM(sales.amount) AS amount_of_sales,
	COUNT(customer.orders) AS count_of_orders
FROM customer
JOIN sales
ON customer.id = sales.id
WHERE customer.date_of_sales > '2020-01-01'
GROUP BY customer.id, customer.date_of_sales
HAVING COUNT(customer.orders) > 1
ORDER BY customer.date_of_sales""")

print(fh.json_message)

JSON Output:

{
	"version": "1.0",
	"status": "OK",
	"ts": "2023-01-25T14:16:51.698Z",
	"statement": [
       {
        "pos": "0-333",
        "ds": [
          {
            "pos": "0-333",
            "type": "root",
            "subType": "inline",
            "out": {
              "exprs": [
                {
                  "pos": "8-11",
                  "dboref": "C1",
                  "eltype": "attr"
                },
                {
                  "pos": "22-22",
                  "dboref": "C2",
                  "eltype": "attr"
                },
                {
                  "pos": "47-36",
                  "alias": "amount_of_sales",
                  "type": "fagg",
                  "name": "SUM",
                  "exprs": [
                    {
                      "pos": "51-12",
                      "dboref": "C3",
                      "eltype": "attr"
                    }
                  ],
                  "eltype": "func"
                },
                {
                  "pos": "86-41",
                  "alias": "count_of_orders",
                  "type": "fagg",
                  "name": "COUNT",
                  "exprs": [
                    {
                      "pos": "92-15",
                      "dboref": "C4",
                      "eltype": "attr"
                    }
                  ],
                  "eltype": "func"
                }
              ],
              "eltype": "out"
            },
            "in": {
              "exprs": [
                {
                  "pos": "133-8",
                  "dboref": "T1",
                  "eltype": "ds"
                },
                {
                  "type": "inner",
                  "definedAs": "explicit",
                  "ds": {
                    "pos": "147-5",
                    "dboref": "T2",
                    "eltype": "ds"
                  },
                  "op": {
                    "pos": "156-22",
                    "exprs": [
                      {
                        "pos": "156-11",
                        "dboref": "C1",
                        "eltype": "attr"
                      },
                      {
                        "pos": "170-8",
                        "dboref": "C5",
                        "eltype": "attr"
                      }
                    ],
                    "type": "EQ",
                    "eltype": "op"
                  },
                  "eltype": "join"
                }
              ],
              "eltype": "in"
            },
            "modifiers": [
              {
                "type": "filtreg",
                "op": {
                  "pos": "185-37",
                  "exprs": [
                    {
                      "pos": "185-22",
                      "dboref": "C2",
                      "eltype": "attr"
                    },
                    {
                      "value": "'2020-01-01'",
                      "eltype": "const"
                    }
                  ],
                  "type": "GT",
                  "eltype": "op"
                },
                "eltype": "filter"
              },
              {
                "type": "aggreg",
                "exprs": [
                  {
                    "pos": "232-11",
                    "dboref": "C1",
                    "eltype": "attr"
                  },
                  {
                    "pos": "245-22",
                    "dboref": "C2",
                    "eltype": "attr"
                  }
                ],
                "eltype": "agg"
              },
              {
                "type": "filtagg",
                "op": {
                  "pos": "275-26",
                  "exprs": [
                    {
                      "pos": "275-22",
                      "type": "fagg",
                      "name": "COUNT",
                      "exprs": [
                        {
                          "pos": "281-15",
                          "dboref": "C4",
                          "eltype": "attr"
                        }
                      ],
                      "eltype": "func"
                    },
                    {
                      "value": 1,
                      "eltype": "const"
                    }
                  ],
                  "type": "GT",
                  "eltype": "op"
                },
                "eltype": "filter"
              },
              {
                "exprs": [
                  {
                    "pos": "311-22",
                    "direction": "asc",
                    "dboref": "C2",
                    "eltype": "attr"
                  }
                ],
                "eltype": "sort"
              }
            ],
            "eltype": "ds"
          }
        ],
        "rawInput": "SELECT\n\tcustomer.id,\n\tcustomer.date_of_sales,\n\tSUM(sales.amount) AS amount_of_sales,\n\tCOUNT(customer.orders) AS count_of_orders\nFROM customer\nJOIN sales\nON customer.id = sales.id\nWHERE customer.date_of_sales > '2020-01-01'\nGROUP BY customer.id, customer.date_of_sales\nHAVING COUNT(customer.orders) > 1\nORDER BY customer.date_of_sales",
        "eltype": "statement"
      }
   ],
  "eltype": "parSeQL"
}

Example 2:

In this example we will parse multiple statements (SELECT,MERGE,CTAS) at the same time.

from flowhigh.utils.converter import FlowHighSubmissionClass

fh = FlowHighSubmissionClass.from_sql("""
MERGE INTO members m
  USING (
  SELECT id, date
  FROM signup
  WHERE DATEDIFF(day, CURRENT_DATE(), signup.date::DATE) < -30) s ON m.id = s.id
  WHEN MATCHED THEN UPDATE SET m.fee = 40;
SELECT order FROM orders;
CREATE TABLE sales AS
SELECT * FROM sale;""")

print(fh.json_message)

JSON Output:

{
	"version": "1.0",
	"status": "OK",
	"ts": "2023-01-25T19:44:58.133Z",
	"statement": [
      {
          "pos": "0-185",
          "ds": [
            {
              "pos": "33-92",
              "alias": "s",
              "type": "view",
              "subType": "inline",
              "fullref": "s",
              "out": {
                "exprs": [
                  {
                    "pos": "40-2",
                    "dboref": "C1",
                    "eltype": "attr"
                  },
                  {
                    "pos": "44-4",
                    "dboref": "C2",
                    "eltype": "attr"
                  }
                ],
                "eltype": "out"
              },
              "in": {
                "exprs": [
                  {
                    "pos": "56-6",
                    "type": "table",
                    "subType": "physical",
                    "refds": "signup",
                    "fullref": "signup",
                    "eltype": "ds"
                  }
                ],
                "eltype": "in"
              },
              "modifiers": [
                {
                  "type": "filtreg",
                  "op": {
                    "pos": "71-54",
                    "exprs": [
                      {
                        "pos": "71-48",
                        "type": "fscalar",
                        "name": "DATEDIFF",
                        "subType": "datetime",
                        "exprs": [
                          {
                            "pos": "80-3",
                            "dboref": "C3",
                            "eltype": "attr"
                          },
                          {
                            "pos": "85-14",
                            "type": "fscalar",
                            "name": "CURRENT_DATE",
                            "subType": "context",
                            "eltype": "func"
                          },
                          {
                            "pos": "101-17",
                            "expr": {
                              "pos": "101-1",
                              "dboref": "C2",
                              "eltype": "attr"
                            },
                            "eltype": "cast",
                            "dataType": "DATE"
                          }
                        ],
                        "eltype": "func"
                      },
                      {
                        "value": -30,
                        "eltype": "const"
                      }
                    ],
                    "type": "LT",
                    "eltype": "op"
                  },
                  "eltype": "filter"
                }
              ],
              "eltype": "ds"
            },
            {
              "pos": "11-7",
              "alias": "m",
              "type": "table",
              "subType": "physical",
              "action": "update",
              "refds": "members",
              "fullref": "members",
              "out": {
                "exprs": [
                  {
                    "pos": "175-10",
                    "exprs": [
                      {
                        "pos": "175-5",
                        "dboref": "C4",
                        "eltype": "attr"
                      },
                      {
                        "value": 40,
                        "eltype": "const"
                      }
                    ],
                    "type": "ASSIGN",
                    "eltype": "op"
                  }
                ],
                "eltype": "out"
              },
              "in": {
                "exprs": [
                  {
                    "refTo": "33-92",
                    "eltype": "ds"
                  }
                ],
                "eltype": "in"
              },
              "modifiers": [
                {
                  "op": {
                    "pos": "132-11",
                    "exprs": [
                      {
                        "pos": "132-4",
                        "dboref": "C1",
                        "eltype": "attr"
                      },
                      {
                        "pos": "139-4",
                        "dboref": "C1",
                        "eltype": "attr"
                      }
                    ],
                    "type": "EQ",
                    "eltype": "op"
                  },
                  "eltype": "filter"
                }
              ],
              "eltype": "ds"
            }
          ],
          "rawInput": "MERGE INTO members m\n  USING (\n  SELECT id, date\n  FROM signup\n  WHERE DATEDIFF(day, CURRENT_DATE(), signup.date::DATE) < -30) s ON m.id = s.id\n  WHEN MATCHED THEN UPDATE SET m.fee = 40",
          "type": "merge",
          "eltype": "mergeStatement"
        },
        {
          "pos": "187-24",
          "ds": [
            {
              "pos": "187-24",
              "type": "root",
              "subType": "inline",
              "out": {
                "exprs": [
                  {
                    "pos": "7-5",
                    "dboref": "C5",
                    "eltype": "attr"
                  }
                ],
                "eltype": "out"
              },
              "in": {
                "exprs": [
                  {
                    "pos": "205-6",
                    "dboref": "T1",
                    "eltype": "ds"
                  }
                ],
                "eltype": "in"
              },
              "eltype": "ds"
            }
          ],
          "rawInput": "SELECT order FROM orders",
          "eltype": "statement"
        },
        {
          "pos": "213-40",
          "ds": [
            {
              "pos": "235-18",
              "subType": "inline",
              "out": {
                "exprs": [
                  {
                    "pos": "29-1",
                    "refds": "sale",
                    "fullref": "sale.*",
                    "eltype": "asterisk"
                  }
                ],
                "eltype": "out"
              },
              "in": {
                "exprs": [
                  {
                    "pos": "249-4",
                    "dboref": "T3",
                    "eltype": "ds"
                  }
                ],
                "eltype": "in"
              },
              "eltype": "ds"
            },
            {
              "pos": "213-40",
              "action": "create_table_global_transient",
              "refds": "sales",
              "fullref": "sales",
              "out": {
                "exprs": [
                  {
                    "pos": "29-1",
                    "refds": "sale",
                    "fullref": "sale.*",
                    "eltype": "asterisk"
                  }
                ],
                "eltype": "out"
              },
              "in": {
                "exprs": [
                  {
                    "refTo": "235-18",
                    "eltype": "ds"
                  }
                ],
                "eltype": "in"
              },
              "eltype": "ds"
            }
          ],
          "antiPatterns": [
            {
              "type": "AP_05",
              "pos": [
                "242-1"
              ],
              "eltype": "antiPattern"
            }
          ],
          "rawInput": "CREATE TABLE sales AS\nSELECT * FROM sale",
          "type": "create table",
          "eltype": "createTableStatement"
        }
	],
	"eltype": "parSeQL"
}

You can view more examples on how to use the SDK on the FlowHigh SDK page.