SuperX-Kernmodul
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

387 lines
16 KiB

package de.superx.bianalysis.sqlgeneration;
import java.util.List;
import java.util.StringJoiner;
import de.superx.bianalysis.ColumnElement;
import de.superx.bianalysis.ReportMetadata;
import de.superx.bianalysis.models.DimensionAttribute;
import de.superx.bianalysis.models.Filter;
import de.superx.bianalysis.models.Measure;
/**
* Lets consider the following example for the SQL Generation:
*
* Dimensions: X, Y, Z with one attribute each
* Attributes
* - X: DA
* - values: DA1, DA1
* - Y: DB
* - values: DB1, DB1
* - Z: DC
* - values: DC1, DC1
* Measures:
* - M1: count on col_a
* - M2: sum on col_b
*
* For the simplest use case (all attributes and measures selected without any
* filters or bridge tables) the generated table would look like this:
*
* +---------+-----------------------+----------------------+
* | | DA1 | DA2 |
* | |-----------+-----------+-----------+----------+
* | DC | DB1 | DB2 | DB1 | DB2 |
* | |-----+-----+-----+-----+-----+-----+-----+----+
* | | M1 | M2 | M1 | M2 | M1 | M2 | M1 | M2 |
* +=========+=====+=====+=====+=====+=====+=====+=====+====+
* | DC1 | | | | | | | | |
* +---------+-----+-----+-----+-----+-----+-----+-----+----+
* | DC2 | | | | | | | | |
* +---------+-----+-----+-----+-----+-----+-----+-----+----+
*
* and the generated SQL would look like this:
*
* SELECT
* DC,
* COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col0",
* SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col1",
* COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col2",
* SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col3",
* COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col4",
* SUM(col_b) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col5",
* COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB2') as "col6",
* SUM(col_b) FILTER (WHERE DA = 'DA2' AND DB = 'DB2') as "col7"
* FROM
* presentation.fact_table
* JOIN presentation.dim_a
* ON fact_table.dim_a = dim_a.id
* JOIN presentation.dim_b
* ON fact_table.dim_b = dim_b.id
* JOIN presentation.dim_a
* ON fact_table.dim_c = dim_c.id
* GROUP BY dim_c.DC
*
*
* !! Special Cases:
*
* 1. Filtering Attributes
* User Filtered to see only DA with values DA1.
* In this case the select section would shrink to the following four columns:
*
* COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col0",
* SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB1') as "col1",
* COUNT(col_a) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col2",
* SUM(col_b) FILTER (WHERE DA = 'DA1' AND DB = 'DB2') as "col3",
* COUNT(col_a) FILTER (WHERE DA = 'DA2' AND DB = 'DB1') as "col4"
*
* and the following where clause would be appended:
*
* WHERE dim_a.DA IN ('DA1')
*
* 2. Measures with Build-In-Filter
* Consider the measure M1 should only count the values DA1 of attribute DA.
* In this case the filter condition is prepended to the selection section
* of the specific columns for this measure:
*
* COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA1' AND DB = 'DB1') as "col0",
* ...
* COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA1' AND DB = 'DB2') as "col2",
* ...
* COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA2' AND DB = 'DB1') as "col4",
* ...
* COUNT(col_a) FILTER (WHERE col_a IN ('DA1') AND DA = 'DA2' AND DB = 'DB2') as "col6",
* ...
*
* The filter condition for a measure can be either an IN or NOT IN condition.
*
*/
public class SQLGenerator {
public ReportMetadata reportMetadata;
public List<ColumnElement> columnElements;
public char formatSql = ' ';
public static String PRESENTATION_SCHEMA = "presentation_user";
private final static String HIERARCHY_MODEL_SUFFIX = "_hierarchy";
private String presentationSchema = "presentation";
public SQLGenerator(ReportMetadata reportMetadata, List<ColumnElement> columnElements,
String presentationSchema) {
this.reportMetadata = reportMetadata;
this.columnElements = columnElements;
this.presentationSchema = presentationSchema;
}
public SQLGenerator(ReportMetadata reportMetadata, List<ColumnElement> columnElements) {
this.reportMetadata = reportMetadata;
this.columnElements = columnElements;
}
public SQLGenerator(ReportMetadata reportMetadata) {
this.reportMetadata = reportMetadata;
}
public String buildFormattedSqlStatement() {
formatSql = '\n';
return buildSqlStatement();
}
public String buildSqlStatement() {
StringBuilder statement = new StringBuilder();
statement.append("SELECT ");
statement.append(buildSelectSection());
statement.append(formatSql + "FROM " + presentationSchema + "." + reportMetadata.factTable.getTablename() );
statement.append(buildJoinSection());
statement.append(buildFilterSection());
statement.append(buildGroupBySection());
statement.append(buildOrderBySection());
return statement.toString();
}
public String buildSelectSection() {
StringJoiner columns = new StringJoiner(", ");
String dimensionAttributesStatement = selectDimensionAttributes();
if (dimensionAttributesStatement != null && !dimensionAttributesStatement.isBlank() ) {
columns.add(dimensionAttributesStatement);
}
StringJoiner measuresStatementJoiner = new StringJoiner(", ");
columnElements.forEach((columnElement) -> {
measuresStatementJoiner.add(selectMeasure(columnElement));
});
String measuresStatement = measuresStatementJoiner.toString();
if (measuresStatement != null && !measuresStatement.isBlank()) {
columns.add(measuresStatement);
}
return columns.toString();
}
public String selectDimensionAttributes() {
if (reportMetadata.leftDimensionAttributes == null) {
return null;
}
StringJoiner columns = new StringJoiner(", ");
for (DimensionAttribute attribute : reportMetadata.leftDimensionAttributes) {
String columnName = attribute.getColumnname();
String tableAlias = attribute.getDimensionTableAlias();
String columnAlias = attribute.getDimensionColumnAlias();
if(attribute.isHierarchy()) {
// Build select expressions for each hierarchy level (ancestor node),
// assigning aliases col0, col1, etc.
StringBuilder resultBuilder = new StringBuilder();
for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) {
resultBuilder
.append(attribute.getDimensionTableAlias())
.append(".ancestor_")
.append(columnName)
.append('[').append(i + 1).append(']')
.append(" AS \"col").append(i).append("\"");
if (i < reportMetadata.maxBridgeLvl - 1) {
resultBuilder.append(", ");
}
}
columns.add(resultBuilder.toString());
} else {
columns.add(String.format("%s.%s AS %s", tableAlias, columnName, columnAlias));
String sortOrderColumn = attribute.getSortOrderColumn();
if (sortOrderColumn != null) {
columns.add(String.format("%s.%s AS %s_%s",
tableAlias, sortOrderColumn, columnAlias, sortOrderColumn));
}
}
}
return columns.toString();
}
public String getMeasureTablePart(String factTableTablename, Measure measure, List<DimensionAttribute> dimensionAttributes) {
String result = "";
String tableCol = factTableTablename + "." + measure.getColumnname();
if(measure.getAggregationType().equals("sum")) {
result = "SUM(" + tableCol + ")";
} else if (measure.getAggregationType().equals("count")) {
result = "COUNT(" + tableCol + ")";
} else if (measure.getAggregationType().equals("distinct-count")) {
result = "COUNT(distinct(" + tableCol + "))";
} else if (measure.getAggregationType().equals("avg")) {
result = "AVG(" + tableCol + ")";
} else if (measure.getAggregationType().equals("min")) {
result = "MIN(" + tableCol + ")";
} else if (measure.getAggregationType().equals("max")) {
result = "MAX(" + tableCol + ")";
} else if (measure.getAggregationType().equals("std")) {
result = "STDDEV_SAMP(" + tableCol + ")";
} else if (measure.getAggregationType().equals("var")) {
result = "VAR_SAMP(" + tableCol + ")";
}
return result;
}
public String selectMeasure(ColumnElement columnElement) {
String factTableTablename = reportMetadata.factTable.getTablename();
StringBuilder measureSelect = new StringBuilder();
Measure measure = columnElement.measure;
measureSelect.append(getMeasureTablePart(factTableTablename, measure, reportMetadata.leftDimensionAttributes));//todo topdimen hinzufügen
if ( measure.filterCondition != null ) {
// if there exists a filter condition for a specific measure, prepend it to the column filter condition
measureSelect.append(formatSql+ "FILTER (WHERE " + measure.filterCondition);
if (columnElement.dimensionAttributeFilter != null) {
measureSelect.append(" AND " + columnElement.dimensionAttributeFilter);
}
measureSelect.append(")");
} else if (columnElement.dimensionAttributeFilter != null) {
measureSelect.append(formatSql + "FILTER (WHERE " + columnElement.dimensionAttributeFilter + ")");
}
if (measureSelect.length() != 0) {
measureSelect.append(" AS \"col" + columnElement.columnNumber + "\"");
}
return measureSelect.toString();
}
public String buildJoinSection() {
StringBuilder statement = new StringBuilder();
for (DimensionAttribute attr : reportMetadata.getUniqueDimensionAttributes()) {
String joinColumn = "id";
if( attr.getDimIdJoinColumn() != null
&& !attr.getDimIdJoinColumn().isBlank()) {
// Hierarchy dimension models must always be joined on an id column.
// See the "hierarchy_dim.sql" dbt macro for implementation details.
// For other models, the default join column can be customized in the metadata JSON files
// using the "id_column" attribute.
joinColumn = attr.getDimIdJoinColumn();
}
String dimensionTable = attr.getTablename();
boolean isTopAttribute = reportMetadata.topDimensionAttributes.contains(attr);
if(attr.isHierarchy() && !isTopAttribute) {
// Hierarchy dimension tables use a dedicated join suffix.
// For example, dim_orgunit is joined as dim_orgunit_hierarchy.
// This hierarchy table contains all node paths in the hierarchy tree.
// For additional details, see the "hierarchy_dim.sql" macro.
dimensionTable += HIERARCHY_MODEL_SUFFIX;
}
String join = String.format(
" JOIN " + presentationSchema + ".%s AS %s ON %s.%s = %s.%s",
dimensionTable,
attr.getDimensionTableAlias(),
reportMetadata.factTable.getTablename(),
attr.getJoincolumn(),
attr.getDimensionTableAlias(),
joinColumn
);
statement.append(join);
/* TODO userinput for histroical keys:
1. is_current
2. last_known
3. specific date: (ANY_DATE BETWEEN %s.valid_from AND %s.valid_to)
*/
if(attr.isHistorical()) {
String currentFilter = String.format(
" AND %s.is_current = true ",
attr.getDimensionTableAlias()
);
statement.append(currentFilter);
}
}
return statement.toString();
}
public String buildFilterSection() {
if (reportMetadata.filters == null || reportMetadata.filters.size() <= 0) {
return "";
}
StringBuilder statement = new StringBuilder(" WHERE ");
StringJoiner groups = new StringJoiner(" AND ");
for (Filter filter : reportMetadata.filters) {
if(reportMetadata.isHierarchyFilter(filter)) {
StringBuilder resultBuilder = new StringBuilder();
for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) {
resultBuilder
.append(filter.dimensionTableAlias)
.append(".ancestor_")
.append(filter.columnname)
.append('[').append(i).append("] IN (")
.append(filter.getValues())
.append(')');
if (i < reportMetadata.maxBridgeLvl - 1) {
resultBuilder.append(" OR ");
}
}
if(!resultBuilder.isEmpty()) {
groups.add(resultBuilder.toString());
}
} else {
groups.add(filter.dimensionTableAlias + "." + filter.columnname + " IN (" + filter.getValues() + ")");
}
}
statement.append(groups.toString());
if(groups.length() == 0) {
return "";
}
return statement.toString();
}
public String buildGroupBySection() {
if(reportMetadata.leftDimensionAttributes == null || reportMetadata.leftDimensionAttributes.size() <= 0) {
return "";
}
StringBuilder statement = new StringBuilder("GROUP BY ROLLUP (");
StringJoiner groups = new StringJoiner(", ");
for (DimensionAttribute attr : reportMetadata.leftDimensionAttributes) {
if(attr.isHierarchy()) {
// TODO: what is happening here?
int numOfHierarchyAttributes = reportMetadata.getHierarchyAttributes().size();
for (int i = 0; i < numOfHierarchyAttributes; i++) {
for (int j = 0; j < reportMetadata.maxBridgeLvl; j++) {
if(j < reportMetadata.minBridgeLvl) {
continue;
}
groups.add("col"+(j + (i * reportMetadata.maxBridgeLvl)));
}
}
} else {
groups.add(attr.getDimensionTableAlias() + "." + attr.getColumnname());
if(attr.getSortOrderColumn() != null) {
groups.add(attr.getDimensionTableAlias() + "." + attr.getSortOrderColumn());
}
}
}
statement.append(groups.toString());
if(groups.length() == 0) {
return "";
}
statement.append(")");
return formatSql + statement.toString();
}
public StringJoiner buildOrderBySection() {
StringJoiner orderCols = new StringJoiner(", ", " ORDER BY ", "");
orderCols.setEmptyValue("");
for (DimensionAttribute attr : reportMetadata.leftDimensionAttributes) {
if(attr.isHierarchy()) {
for (int i = reportMetadata.minBridgeLvl; i < reportMetadata.maxBridgeLvl; i++) {
orderCols.add("col" + i);
}
continue;
}
if(attr.getSortOrderColumn() != null) {
orderCols.add(attr.getDimensionTableAlias() + "." + attr.getSortOrderColumn());
} else {
orderCols.add(attr.getDimensionColumnAlias());
}
}
return orderCols;
}
}