package de.superx.bianalysis.sqlgeneration; import java.util.List; import java.util.StringJoiner; import de.superx.bianalysis.ColumnElement; import de.superx.bianalysis.ReportMetadata; import de.superx.bianalysis.models.DimensionAttribute; import de.superx.bianalysis.models.Filter; import de.superx.bianalysis.models.Measure; /** * Lets consider the following example for the SQL Generation: * * Dimensions: X, Y, Z with one attribute each * Attributes * - X: DA * - values: DA1, DA1 * - Y: DB * - values: DB1, DB1 * - Z: DC * - values: DC1, DC1 * Measures: * - M1: count on col_a * - M2: sum on col_b * * For the simplest use case (all attributes and measures selected without any * filters or bridge tables) the generated table would look like this: * * +---------+-----------------------+----------------------+ * | | DA1 | DA2 | * | |-----------+-----------+-----------+----------+ * | DC | DB1 | DB2 | DB1 | DB2 | * | |-----+-----+-----+-----+-----+-----+-----+----+ * | | M1 | M2 | M1 | M2 | M1 | M2 | M1 | M2 | * +=========+=====+=====+=====+=====+=====+=====+=====+====+ * | DC1 | | | | | | | | | * +---------+-----+-----+-----+-----+-----+-----+-----+----+ * | DC2 | | | | | | | | | * +---------+-----+-----+-----+-----+-----+-----+-----+----+ * * and the generated SQL would look like this: * * SELECT * DC, * COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col0", * SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col1", * COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col2", * SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col3", * COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col4", * SUM(col_b) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col5", * COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB2') as "col6", * SUM(col_b) FILTER (WHERE DA = 'DA2' AND DB = 'DB2') as "col7" * FROM * presentation.fact_table * JOIN presentation.dim_a * ON fact_table.dim_a = dim_a.id * JOIN presentation.dim_b * ON fact_table.dim_b = dim_b.id * JOIN presentation.dim_a * ON fact_table.dim_c = dim_c.id * GROUP BY dim_c.DC * * * !! Special Cases: * * 1. Filtering Attributes * User Filtered to see only DA with values DA1. * In this case the select section would shrink to the following four columns: * * COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col0", * SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col1", * COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col2", * SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col3", * COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col4" * * and the following where clause would be appended: * * WHERE dim_a.DA IN ('DA1') * * 2. Measures with Build-In-Filter * Consider the measure M1 should only count the values DA1 of attribute DA. * In this case the filter condition is prepended to the selection section * of the specific columns for this measure: * * COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA1' AND DB = 'DB1') as "col0", * ... * COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA1' AND DB = 'DB2') as "col2", * ... * COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA2' AND DB = 'DB1') as "col4", * ... * COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA2' AND DB = 'DB2') as "col6", * ... * * The filter condition for a measure can be either an IN or NOT IN condition. * */ public class SQLGenerator { public ReportMetadata reportMetadata; public List columnElements; public char formatSql = ' '; public static String PRESENTATION_SCHEMA = "presentation_user"; private final static String HIERARCHY_MODEL_SUFFIX = "_hierarchy"; private String presentationSchema = "presentation"; public SQLGenerator(ReportMetadata reportMetadata, List columnElements, String presentationSchema) { this.reportMetadata = reportMetadata; this.columnElements = columnElements; this.presentationSchema = presentationSchema; } public SQLGenerator(ReportMetadata reportMetadata, List columnElements) { this.reportMetadata = reportMetadata; this.columnElements = columnElements; } public SQLGenerator(ReportMetadata reportMetadata) { this.reportMetadata = reportMetadata; } public String buildFormattedSqlStatement() { formatSql = '\n'; return buildSqlStatement(); } public String buildSqlStatement() { StringBuilder statement = new StringBuilder(); statement.append("SELECT "); statement.append(buildSelectSection()); statement.append(formatSql + "FROM " + presentationSchema + "." + reportMetadata.factTable.getTablename() ); statement.append(buildJoinSection()); statement.append(buildFilterSection()); statement.append(buildGroupBySection()); statement.append(buildOrderBySection()); return statement.toString(); } public String buildSelectSection() { StringJoiner columns = new StringJoiner(", "); String dimensionAttributesStatement = selectDimensionAttributes(); if (dimensionAttributesStatement != null && !dimensionAttributesStatement.isBlank() ) { columns.add(dimensionAttributesStatement); } StringJoiner measuresStatementJoiner = new StringJoiner(", "); columnElements.forEach((columnElement) -> { measuresStatementJoiner.add(selectMeasure(columnElement)); }); String measuresStatement = measuresStatementJoiner.toString(); if (measuresStatement != null && !measuresStatement.isBlank()) { columns.add(measuresStatement); } return columns.toString(); } public String selectDimensionAttributes() { if (reportMetadata.leftDimensionAttributes == null) { return null; } StringJoiner columns = new StringJoiner(", "); for (DimensionAttribute attribute : reportMetadata.leftDimensionAttributes) { String columnName = attribute.getColumnname(); String tableAlias = attribute.getDimensionTableAlias(); String columnAlias = attribute.getDimensionColumnAlias(); if(attribute.isHierarchy()) { // Build select expressions for each hierarchy level (ancestor node), // assigning aliases col0, col1, etc. StringBuilder resultBuilder = new StringBuilder(); for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) { resultBuilder .append(attribute.getDimensionTableAlias()) .append(".ancestor_") .append(columnName) .append('[').append(i + 1).append(']') .append(" AS \"col").append(i).append("\""); if (i < reportMetadata.maxBridgeLvl - 1) { resultBuilder.append(", "); } } columns.add(resultBuilder.toString()); } else { columns.add(String.format("%s.%s AS %s", tableAlias, columnName, columnAlias)); String sortOrderColumn = attribute.getSortOrderColumn(); if (sortOrderColumn != null) { columns.add(String.format("%s.%s AS %s_%s", tableAlias, sortOrderColumn, columnAlias, sortOrderColumn)); } } } return columns.toString(); } public String getMeasureTablePart(String factTableTablename, Measure measure, List dimensionAttributes) { String result = ""; String tableCol = factTableTablename + "." + measure.getColumnname(); if(measure.getAggregationType().equals("sum")) { result = "SUM(" + tableCol + ")"; } else if (measure.getAggregationType().equals("count")) { result = "COUNT(" + tableCol + ")"; } else if (measure.getAggregationType().equals("distinct-count")) { result = "COUNT(distinct(" + tableCol + "))"; } else if (measure.getAggregationType().equals("avg")) { result = "AVG(" + tableCol + ")"; } else if (measure.getAggregationType().equals("min")) { result = "MIN(" + tableCol + ")"; } else if (measure.getAggregationType().equals("max")) { result = "MAX(" + tableCol + ")"; } else if (measure.getAggregationType().equals("std")) { result = "STDDEV_SAMP(" + tableCol + ")"; } else if (measure.getAggregationType().equals("var")) { result = "VAR_SAMP(" + tableCol + ")"; } return result; } public String selectMeasure(ColumnElement columnElement) { String factTableTablename = reportMetadata.factTable.getTablename(); StringBuilder measureSelect = new StringBuilder(); Measure measure = columnElement.measure; measureSelect.append(getMeasureTablePart(factTableTablename, measure, reportMetadata.leftDimensionAttributes));//todo topdimen hinzufügen if ( measure.filterCondition != null ) { // if there exists a filter condition for a specific measure, prepend it to the column filter condition measureSelect.append(formatSql+ "FILTER (WHERE " + measure.filterCondition); if (columnElement.dimensionAttributeFilter != null) { measureSelect.append(" AND " + columnElement.dimensionAttributeFilter); } measureSelect.append(")"); } else if (columnElement.dimensionAttributeFilter != null) { measureSelect.append(formatSql + "FILTER (WHERE " + columnElement.dimensionAttributeFilter + ")"); } if (measureSelect.length() != 0) { measureSelect.append(" AS \"col" + columnElement.columnNumber + "\""); } return measureSelect.toString(); } public String buildJoinSection() { StringBuilder statement = new StringBuilder(); for (DimensionAttribute attr : reportMetadata.getUniqueDimensionAttributes()) { String joinColumn = "id"; if( attr.getDimIdJoinColumn() != null && !attr.getDimIdJoinColumn().isBlank()) { // Hierarchy dimension models must always be joined on an id column. // See the "hierarchy_dim.sql" dbt macro for implementation details. // For other models, the default join column can be customized in the metadata JSON files // using the "id_column" attribute. joinColumn = attr.getDimIdJoinColumn(); } String dimensionTable = attr.getTablename(); boolean isTopAttribute = reportMetadata.topDimensionAttributes.contains(attr); if(attr.isHierarchy() && !isTopAttribute) { // Hierarchy dimension tables use a dedicated join suffix. // For example, dim_orgunit is joined as dim_orgunit_hierarchy. // This hierarchy table contains all node paths in the hierarchy tree. // For additional details, see the "hierarchy_dim.sql" macro. dimensionTable += HIERARCHY_MODEL_SUFFIX; } String join = String.format( " JOIN " + presentationSchema + ".%s AS %s ON %s.%s = %s.%s", dimensionTable, attr.getDimensionTableAlias(), reportMetadata.factTable.getTablename(), attr.getJoincolumn(), attr.getDimensionTableAlias(), joinColumn ); statement.append(join); /* TODO userinput for histroical keys: 1. is_current 2. last_known 3. specific date: (ANY_DATE BETWEEN %s.valid_from AND %s.valid_to) */ if(attr.isHistorical()) { String currentFilter = String.format( " AND %s.is_current = true ", attr.getDimensionTableAlias() ); statement.append(currentFilter); } } return statement.toString(); } public String buildFilterSection() { if (reportMetadata.filters == null || reportMetadata.filters.size() <= 0) { return ""; } StringBuilder statement = new StringBuilder(" WHERE "); StringJoiner groups = new StringJoiner(" AND "); for (Filter filter : reportMetadata.filters) { if(reportMetadata.isHierarchyFilter(filter)) { StringBuilder resultBuilder = new StringBuilder(); for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) { resultBuilder .append(filter.dimensionTableAlias) .append(".ancestor_") .append(filter.columnname) .append('[').append(i).append("] IN (") .append(filter.getValues()) .append(')'); if (i < reportMetadata.maxBridgeLvl - 1) { resultBuilder.append(" OR "); } } if(!resultBuilder.isEmpty()) { groups.add(resultBuilder.toString()); } } else { groups.add(filter.dimensionTableAlias + "." + filter.columnname + " IN (" + filter.getValues() + ")"); } } statement.append(groups.toString()); if(groups.length() == 0) { return ""; } return statement.toString(); } public String buildGroupBySection() { if(reportMetadata.leftDimensionAttributes == null || reportMetadata.leftDimensionAttributes.size() <= 0) { return ""; } StringBuilder statement = new StringBuilder("GROUP BY ROLLUP ("); StringJoiner groups = new StringJoiner(", "); for (DimensionAttribute attr : reportMetadata.leftDimensionAttributes) { if(attr.isHierarchy()) { // TODO: what is happening here? int numOfHierarchyAttributes = reportMetadata.getHierarchyAttributes().size(); for (int i = 0; i < numOfHierarchyAttributes; i++) { for (int j = 0; j < reportMetadata.maxBridgeLvl; j++) { if(j < reportMetadata.minBridgeLvl) { continue; } groups.add("col"+(j + (i * reportMetadata.maxBridgeLvl))); } } } else { groups.add(attr.getDimensionTableAlias() + "." + attr.getColumnname()); if(attr.getSortOrderColumn() != null) { groups.add(attr.getDimensionTableAlias() + "." + attr.getSortOrderColumn()); } } } statement.append(groups.toString()); if(groups.length() == 0) { return ""; } statement.append(")"); return formatSql + statement.toString(); } public StringJoiner buildOrderBySection() { StringJoiner orderCols = new StringJoiner(", ", " ORDER BY ", ""); orderCols.setEmptyValue(""); for (DimensionAttribute attr : reportMetadata.leftDimensionAttributes) { if(attr.isHierarchy()) { for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) { orderCols.add("col" + i); } continue; } if(attr.getSortOrderColumn() != null) { orderCols.add(attr.getDimensionTableAlias() + "." + attr.getSortOrderColumn()); } else { orderCols.add(attr.getDimensionColumnAlias()); } } return orderCols; } }