You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
932 lines
24 KiB
932 lines
24 KiB
<?xml version="1.0" encoding="UTF-8"?><transformation> |
|
<info> |
|
<name>wiki2xhtml</name> |
|
<description/> |
|
<extended_description/> |
|
<trans_version/> |
|
<trans_type>Normal</trans_type> |
|
<trans_status>0</trans_status> |
|
<directory>/</directory> |
|
<parameters> |
|
<parameter> |
|
<name>document_id</name> |
|
<default_value>68</default_value> |
|
<description/> |
|
</parameter> |
|
</parameters> |
|
<log> |
|
<trans-log-table> |
|
<connection/> |
|
<schema/> |
|
<table/> |
|
<size_limit_lines/> |
|
<interval/> |
|
<timeout_days/> |
|
<field> |
|
<id>ID_BATCH</id> |
|
<enabled>Y</enabled> |
|
<name>ID_BATCH</name> |
|
</field> |
|
<field> |
|
<id>CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>CHANNEL_ID</name> |
|
</field> |
|
<field> |
|
<id>TRANSNAME</id> |
|
<enabled>Y</enabled> |
|
<name>TRANSNAME</name> |
|
</field> |
|
<field> |
|
<id>STATUS</id> |
|
<enabled>Y</enabled> |
|
<name>STATUS</name> |
|
</field> |
|
<field> |
|
<id>LINES_READ</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_READ</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>LINES_WRITTEN</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_WRITTEN</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>LINES_UPDATED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_UPDATED</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>LINES_INPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_INPUT</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>LINES_OUTPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_OUTPUT</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>LINES_REJECTED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_REJECTED</name> |
|
<subject/> |
|
</field> |
|
<field> |
|
<id>ERRORS</id> |
|
<enabled>Y</enabled> |
|
<name>ERRORS</name> |
|
</field> |
|
<field> |
|
<id>STARTDATE</id> |
|
<enabled>Y</enabled> |
|
<name>STARTDATE</name> |
|
</field> |
|
<field> |
|
<id>ENDDATE</id> |
|
<enabled>Y</enabled> |
|
<name>ENDDATE</name> |
|
</field> |
|
<field> |
|
<id>LOGDATE</id> |
|
<enabled>Y</enabled> |
|
<name>LOGDATE</name> |
|
</field> |
|
<field> |
|
<id>DEPDATE</id> |
|
<enabled>Y</enabled> |
|
<name>DEPDATE</name> |
|
</field> |
|
<field> |
|
<id>REPLAYDATE</id> |
|
<enabled>Y</enabled> |
|
<name>REPLAYDATE</name> |
|
</field> |
|
<field> |
|
<id>LOG_FIELD</id> |
|
<enabled>Y</enabled> |
|
<name>LOG_FIELD</name> |
|
</field> |
|
<field> |
|
<id>EXECUTING_SERVER</id> |
|
<enabled>N</enabled> |
|
<name>EXECUTING_SERVER</name> |
|
</field> |
|
<field> |
|
<id>EXECUTING_USER</id> |
|
<enabled>N</enabled> |
|
<name>EXECUTING_USER</name> |
|
</field> |
|
<field> |
|
<id>CLIENT</id> |
|
<enabled>N</enabled> |
|
<name>CLIENT</name> |
|
</field> |
|
</trans-log-table> |
|
<perf-log-table> |
|
<connection/> |
|
<schema/> |
|
<table/> |
|
<interval/> |
|
<timeout_days/> |
|
<field> |
|
<id>ID_BATCH</id> |
|
<enabled>Y</enabled> |
|
<name>ID_BATCH</name> |
|
</field> |
|
<field> |
|
<id>SEQ_NR</id> |
|
<enabled>Y</enabled> |
|
<name>SEQ_NR</name> |
|
</field> |
|
<field> |
|
<id>LOGDATE</id> |
|
<enabled>Y</enabled> |
|
<name>LOGDATE</name> |
|
</field> |
|
<field> |
|
<id>TRANSNAME</id> |
|
<enabled>Y</enabled> |
|
<name>TRANSNAME</name> |
|
</field> |
|
<field> |
|
<id>STEPNAME</id> |
|
<enabled>Y</enabled> |
|
<name>STEPNAME</name> |
|
</field> |
|
<field> |
|
<id>STEP_COPY</id> |
|
<enabled>Y</enabled> |
|
<name>STEP_COPY</name> |
|
</field> |
|
<field> |
|
<id>LINES_READ</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_READ</name> |
|
</field> |
|
<field> |
|
<id>LINES_WRITTEN</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_WRITTEN</name> |
|
</field> |
|
<field> |
|
<id>LINES_UPDATED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_UPDATED</name> |
|
</field> |
|
<field> |
|
<id>LINES_INPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_INPUT</name> |
|
</field> |
|
<field> |
|
<id>LINES_OUTPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_OUTPUT</name> |
|
</field> |
|
<field> |
|
<id>LINES_REJECTED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_REJECTED</name> |
|
</field> |
|
<field> |
|
<id>ERRORS</id> |
|
<enabled>Y</enabled> |
|
<name>ERRORS</name> |
|
</field> |
|
<field> |
|
<id>INPUT_BUFFER_ROWS</id> |
|
<enabled>Y</enabled> |
|
<name>INPUT_BUFFER_ROWS</name> |
|
</field> |
|
<field> |
|
<id>OUTPUT_BUFFER_ROWS</id> |
|
<enabled>Y</enabled> |
|
<name>OUTPUT_BUFFER_ROWS</name> |
|
</field> |
|
</perf-log-table> |
|
<channel-log-table> |
|
<connection/> |
|
<schema/> |
|
<table/> |
|
<timeout_days/> |
|
<field> |
|
<id>ID_BATCH</id> |
|
<enabled>Y</enabled> |
|
<name>ID_BATCH</name> |
|
</field> |
|
<field> |
|
<id>CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>CHANNEL_ID</name> |
|
</field> |
|
<field> |
|
<id>LOG_DATE</id> |
|
<enabled>Y</enabled> |
|
<name>LOG_DATE</name> |
|
</field> |
|
<field> |
|
<id>LOGGING_OBJECT_TYPE</id> |
|
<enabled>Y</enabled> |
|
<name>LOGGING_OBJECT_TYPE</name> |
|
</field> |
|
<field> |
|
<id>OBJECT_NAME</id> |
|
<enabled>Y</enabled> |
|
<name>OBJECT_NAME</name> |
|
</field> |
|
<field> |
|
<id>OBJECT_COPY</id> |
|
<enabled>Y</enabled> |
|
<name>OBJECT_COPY</name> |
|
</field> |
|
<field> |
|
<id>REPOSITORY_DIRECTORY</id> |
|
<enabled>Y</enabled> |
|
<name>REPOSITORY_DIRECTORY</name> |
|
</field> |
|
<field> |
|
<id>FILENAME</id> |
|
<enabled>Y</enabled> |
|
<name>FILENAME</name> |
|
</field> |
|
<field> |
|
<id>OBJECT_ID</id> |
|
<enabled>Y</enabled> |
|
<name>OBJECT_ID</name> |
|
</field> |
|
<field> |
|
<id>OBJECT_REVISION</id> |
|
<enabled>Y</enabled> |
|
<name>OBJECT_REVISION</name> |
|
</field> |
|
<field> |
|
<id>PARENT_CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>PARENT_CHANNEL_ID</name> |
|
</field> |
|
<field> |
|
<id>ROOT_CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>ROOT_CHANNEL_ID</name> |
|
</field> |
|
</channel-log-table> |
|
<step-log-table> |
|
<connection/> |
|
<schema/> |
|
<table/> |
|
<timeout_days/> |
|
<field> |
|
<id>ID_BATCH</id> |
|
<enabled>Y</enabled> |
|
<name>ID_BATCH</name> |
|
</field> |
|
<field> |
|
<id>CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>CHANNEL_ID</name> |
|
</field> |
|
<field> |
|
<id>LOG_DATE</id> |
|
<enabled>Y</enabled> |
|
<name>LOG_DATE</name> |
|
</field> |
|
<field> |
|
<id>TRANSNAME</id> |
|
<enabled>Y</enabled> |
|
<name>TRANSNAME</name> |
|
</field> |
|
<field> |
|
<id>STEPNAME</id> |
|
<enabled>Y</enabled> |
|
<name>STEPNAME</name> |
|
</field> |
|
<field> |
|
<id>STEP_COPY</id> |
|
<enabled>Y</enabled> |
|
<name>STEP_COPY</name> |
|
</field> |
|
<field> |
|
<id>LINES_READ</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_READ</name> |
|
</field> |
|
<field> |
|
<id>LINES_WRITTEN</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_WRITTEN</name> |
|
</field> |
|
<field> |
|
<id>LINES_UPDATED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_UPDATED</name> |
|
</field> |
|
<field> |
|
<id>LINES_INPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_INPUT</name> |
|
</field> |
|
<field> |
|
<id>LINES_OUTPUT</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_OUTPUT</name> |
|
</field> |
|
<field> |
|
<id>LINES_REJECTED</id> |
|
<enabled>Y</enabled> |
|
<name>LINES_REJECTED</name> |
|
</field> |
|
<field> |
|
<id>ERRORS</id> |
|
<enabled>Y</enabled> |
|
<name>ERRORS</name> |
|
</field> |
|
<field> |
|
<id>LOG_FIELD</id> |
|
<enabled>N</enabled> |
|
<name>LOG_FIELD</name> |
|
</field> |
|
</step-log-table> |
|
<metrics-log-table> |
|
<connection/> |
|
<schema/> |
|
<table/> |
|
<timeout_days/> |
|
<field> |
|
<id>ID_BATCH</id> |
|
<enabled>Y</enabled> |
|
<name>ID_BATCH</name> |
|
</field> |
|
<field> |
|
<id>CHANNEL_ID</id> |
|
<enabled>Y</enabled> |
|
<name>CHANNEL_ID</name> |
|
</field> |
|
<field> |
|
<id>LOG_DATE</id> |
|
<enabled>Y</enabled> |
|
<name>LOG_DATE</name> |
|
</field> |
|
<field> |
|
<id>METRICS_DATE</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_DATE</name> |
|
</field> |
|
<field> |
|
<id>METRICS_CODE</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_CODE</name> |
|
</field> |
|
<field> |
|
<id>METRICS_DESCRIPTION</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_DESCRIPTION</name> |
|
</field> |
|
<field> |
|
<id>METRICS_SUBJECT</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_SUBJECT</name> |
|
</field> |
|
<field> |
|
<id>METRICS_TYPE</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_TYPE</name> |
|
</field> |
|
<field> |
|
<id>METRICS_VALUE</id> |
|
<enabled>Y</enabled> |
|
<name>METRICS_VALUE</name> |
|
</field> |
|
</metrics-log-table> |
|
</log> |
|
<maxdate> |
|
<connection/> |
|
<table/> |
|
<field/> |
|
<offset>0.0</offset> |
|
<maxdiff>0.0</maxdiff> |
|
</maxdate> |
|
<size_rowset>10000</size_rowset> |
|
<sleep_time_empty>50</sleep_time_empty> |
|
<sleep_time_full>50</sleep_time_full> |
|
<unique_connections>N</unique_connections> |
|
<feedback_shown>Y</feedback_shown> |
|
<feedback_size>50000</feedback_size> |
|
<using_thread_priorities>Y</using_thread_priorities> |
|
<shared_objects_file/> |
|
<capture_step_performance>N</capture_step_performance> |
|
<step_performance_capturing_delay>1000</step_performance_capturing_delay> |
|
<step_performance_capturing_size_limit>100</step_performance_capturing_size_limit> |
|
<dependencies/> |
|
<partitionschemas/> |
|
<slaveservers/> |
|
<clusterschemas/> |
|
<created_user>-</created_user> |
|
<created_date>2017/08/22 11:23:11.075</created_date> |
|
<modified_user>-</modified_user> |
|
<modified_date>2017/08/22 11:23:11.075</modified_date> |
|
<key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key> |
|
<is_key_private>N</is_key_private> |
|
</info> |
|
<notepads/> |
|
<order> |
|
<hop> |
|
<from>wiki2html</from> |
|
<to>Update src_xml</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>getSrcTitle</from> |
|
<to>DownloadSrc</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>saveSrc</from> |
|
<to>Block this step until steps finish</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>Block this step until steps finish</from> |
|
<to>wiki2html</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>DownloadSrc</from> |
|
<to>save src_api_xml</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>save src_api_xml</from> |
|
<to>apiDownload2src</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
<hop> |
|
<from>apiDownload2src</from> |
|
<to>saveSrc</to> |
|
<enabled>Y</enabled> |
|
</hop> |
|
</order> |
|
<step> |
|
<name>Block this step until steps finish</name> |
|
<type>BlockUntilStepsFinish</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<steps> |
|
<step> |
|
<name>saveSrc</name> |
|
<CopyNr>0</CopyNr> |
|
</step> |
|
</steps> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>384</xloc> |
|
<yloc>288</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>DownloadSrc</name> |
|
<type>UserDefinedJavaClass</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<definitions> |
|
<definition> |
|
<class_type>TRANSFORM_CLASS</class_type> |
|
<class_name>Processor</class_name> |
|
<class_source>import org.apache.commons.httpclient.Cookie; |
|
import org.apache.commons.httpclient.HttpClient; |
|
import org.apache.commons.httpclient.HttpException; |
|
import org.apache.commons.httpclient.HttpState; |
|
import org.apache.commons.httpclient.NameValuePair; |
|
import org.apache.commons.httpclient.cookie.CookiePolicy; |
|
import org.apache.commons.httpclient.methods.GetMethod; |
|
import org.apache.commons.httpclient.methods.PostMethod; |
|
import java.io.InputStream; |
|
import java.io.IOException; |
|
import java.net.URLEncoder; |
|
|
|
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws java.io.IOException,KettleException { |
|
|
|
Object[] r = getRow(); |
|
if (r == null) { |
|
setOutputDone(); |
|
return false; |
|
} |
|
|
|
String host=get(Fields.In, "hostname").getString(r);// e.g."wiki.his.de"; |
|
String api_path=get(Fields.In, "api_path").getString(r);// e.g."wiki.his.de"; |
|
String cookieName=get(Fields.In, "cookie_name").getString(r); //"wikidb_session"; |
|
String cookieValue=get(Fields.In, "cookie_value").getString(r);//"dr1brbd7saa7jismgsk2a3cf80"; //getParameter("cookie"); |
|
|
|
String title= get(Fields.In, "src_title").getString(r); |
|
try{ |
|
title=URLEncoder.encode(title, "UTF-8"); |
|
} |
|
catch (Exception e) { |
|
} |
|
String strURL="https://"+host+"/"+api_path+"/api.php?action=query&titles="+title+"&prop=revisions&rvprop=content&format=xml"; |
|
HttpState initialState = null; |
|
initialState = authenticateWithCookie(host, cookieName, cookieValue); |
|
Object[] outputRow = createOutputRow(r, data.outputRowMeta.size()); |
|
String responseBody =""; |
|
//try { |
|
responseBody = getPageContent(strURL, initialState); |
|
|
|
//} catch (HttpException e) { |
|
// TODO Auto-generated catch block |
|
//get(Fields.Out, "src_text").setValue(outputRow,"Nicht lesbar: "+ strURL); |
|
// e.printStackTrace(); |
|
// } catch (IOException e) { |
|
// TODO Auto-generated catch block |
|
//get(Fields.Out, "src_text").setValue(outputRow,"Nicht lesbar: "+ strURL); |
|
// e.printStackTrace(); |
|
// } |
|
|
|
get(Fields.Out, "src_api_xml").setValue(outputRow, responseBody); |
|
|
|
putRow(data.outputRowMeta, outputRow); |
|
return true; |
|
} |
|
public static HttpState authenticateWithCookie(String host, String cookieName, String cookieValue) { |
|
HttpState initialState = new HttpState(); |
|
if(cookieName != null){ |
|
Cookie mycookie = new Cookie(host, cookieName, cookieValue, "/", null, false); |
|
initialState.addCookie(mycookie); |
|
} |
|
return initialState; |
|
} |
|
public static String getPageContent(String strURL, HttpState initialState) throws IOException, HttpException { |
|
|
|
int c; |
|
String responseBodyString=""; |
|
HttpClient httpclient = new HttpClient(); |
|
httpclient.getHttpConnectionManager(). |
|
getParams().setConnectionTimeout(30000); |
|
httpclient.setState(initialState); |
|
|
|
httpclient.getParams().setCookiePolicy(CookiePolicy.RFC_2109); |
|
GetMethod httpget = new GetMethod(strURL); |
|
httpget.addRequestHeader("Content-Type","text/xml; charset=UTF-8"); |
|
// Execute HTTP GET |
|
//der Inhalt steht in textarea id=wpTextbox1 |
|
int result = httpclient.executeMethod(httpget); |
|
//System.out.println("Response status code: " + result); |
|
InputStream responseBody = null; |
|
|
|
//responseBody = httpget.getResponseBodyAsStream(); //getResponseBodyAsString(); |
|
responseBodyString=httpget.getResponseBodyAsString(); |
|
/*while ((c = responseBody.read()) != -1) { |
|
responseBodyString+= (char) c; |
|
}*/ |
|
httpget.releaseConnection(); |
|
return responseBodyString; |
|
} |
|
</class_source> |
|
</definition> |
|
</definitions> |
|
<fields/> |
|
<clear_result_fields>N</clear_result_fields> |
|
<info_steps/> |
|
<target_steps/> |
|
<usage_parameters/> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>48</xloc> |
|
<yloc>176</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>Update src_xml</name> |
|
<type>Update</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<connection>eduetl</connection> |
|
<skip_lookup>N</skip_lookup> |
|
<commit>100</commit> |
|
<use_batch>N</use_batch> |
|
<error_ignored>N</error_ignored> |
|
<ignore_flag_field/> |
|
<lookup> |
|
<schema/> |
|
<table>kb_document_source</table> |
|
<key> |
|
<name>document_id</name> |
|
<field>document_id</field> |
|
<condition>=</condition> |
|
<name2/> |
|
</key> |
|
<value> |
|
<name>src_xml</name> |
|
<rename>contentHtml</rename> |
|
</value> |
|
<value> |
|
<name>last_input</name> |
|
<rename>last_input</rename> |
|
</value> |
|
<value> |
|
<name>src_headers</name> |
|
<rename>headersStr</rename> |
|
</value> |
|
<value> |
|
<name>src_header_levels</name> |
|
<rename>headersLevelsStr</rename> |
|
</value> |
|
<value> |
|
<name>internal_hyperlinks</name> |
|
<rename>internalHyperlinksStr</rename> |
|
</value> |
|
<value> |
|
<name>template_options</name> |
|
<rename>templateOptionsStr</rename> |
|
</value> |
|
</lookup> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>560</xloc> |
|
<yloc>144</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>apiDownload2src</name> |
|
<type>XSLT</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<xslfilename>${Internal.Transformation.Filename.Directory}/mediawikiapi_page2text.xsl</xslfilename> |
|
<fieldname>src_api_xml</fieldname> |
|
<resultfieldname>result_text</resultfieldname> |
|
<xslfilefield/> |
|
<xslfilefielduse>N</xslfilefielduse> |
|
<xslfieldisafile>N</xslfieldisafile> |
|
<xslfactory>JAXP</xslfactory> |
|
<parameters/> |
|
<outputproperties> |
|
<outputproperty> |
|
<name>method</name> |
|
<value>text</value> |
|
</outputproperty> |
|
</outputproperties> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>240</xloc> |
|
<yloc>224</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>getSrcTitle</name> |
|
<type>TableInput</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<connection>eduetl</connection> |
|
<sql>select S.document_id, S.src_title, S.src_text,S.src_api_xml, |
|
S.src_url,W.hostname,W.api_path,W.cookie_name, W.cookie_value, today() as last_input |
|
from kb_document_source S, kb_webconnection W |
|
where W.id=S.webconnection_id |
|
and S.document_id=${document_id}</sql> |
|
<limit>0</limit> |
|
<lookup/> |
|
<execute_each_row>N</execute_each_row> |
|
<variables_active>Y</variables_active> |
|
<lazy_conversion_active>N</lazy_conversion_active> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>64</xloc> |
|
<yloc>48</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>saveSrc</name> |
|
<type>Update</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<connection>eduetl</connection> |
|
<skip_lookup>N</skip_lookup> |
|
<commit>100</commit> |
|
<use_batch>N</use_batch> |
|
<error_ignored>N</error_ignored> |
|
<ignore_flag_field/> |
|
<lookup> |
|
<schema/> |
|
<table>kb_document_source</table> |
|
<key> |
|
<name>document_id</name> |
|
<field>document_id</field> |
|
<condition>=</condition> |
|
<name2/> |
|
</key> |
|
<value> |
|
<name>src_text</name> |
|
<rename>result_text</rename> |
|
</value> |
|
</lookup> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>336</xloc> |
|
<yloc>208</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>wiki2html</name> |
|
<type>ScriptValueMod</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<compatible>N</compatible> |
|
<optimizationLevel>9</optimizationLevel> |
|
<jsScripts> |
|
<jsScript> |
|
<jsScript_type>0</jsScript_type> |
|
<jsScript_name>Script 1</jsScript_name> |
|
<jsScript_script>//Script here |
|
// assuming, that the ua-parser.js is in the transformation directory: |
|
var transformationPath = getVariable("Internal.Transformation.Filename.Directory", ""); |
|
var jsScriptPath = transformationPath + "/mwtools.js"; |
|
LoadScriptFile(jsScriptPath); |
|
//alert("klappt"); |
|
|
|
var newWikiModel = new wikiModel(result_text); |
|
|
|
//var contentHtml=mw2xhtml(result_text); |
|
|
|
var contentHtml=newWikiModel.wikiHtml; |
|
var headersStr=newWikiModel.headersStr; |
|
var headersLevelsStr=newWikiModel.headersLevelsStr; |
|
var internalHyperlinksStr=newWikiModel.internalHyperlinksStr; |
|
var templateOptionsStr=newWikiModel.templateOptionsStr; |
|
</jsScript_script> |
|
</jsScript> |
|
</jsScripts> |
|
<fields> |
|
<field> |
|
<name>contentHtml</name> |
|
<rename>contentHtml</rename> |
|
<type>String</type> |
|
<length>-1</length> |
|
<precision>-1</precision> |
|
<replace>N</replace> |
|
</field> |
|
<field> |
|
<name>headersStr</name> |
|
<rename>headersStr</rename> |
|
<type>String</type> |
|
<length>-1</length> |
|
<precision>-1</precision> |
|
<replace>N</replace> |
|
</field> |
|
<field> |
|
<name>headersLevelsStr</name> |
|
<rename>headersLevelsStr</rename> |
|
<type>String</type> |
|
<length>-1</length> |
|
<precision>-1</precision> |
|
<replace>N</replace> |
|
</field> |
|
<field> |
|
<name>internalHyperlinksStr</name> |
|
<rename>internalHyperlinksStr</rename> |
|
<type>String</type> |
|
<length>-1</length> |
|
<precision>-1</precision> |
|
<replace>N</replace> |
|
</field> |
|
<field> |
|
<name>templateOptionsStr</name> |
|
<rename>templateOptionsStr</rename> |
|
<type>String</type> |
|
<length>-1</length> |
|
<precision>-1</precision> |
|
<replace>N</replace> |
|
</field> |
|
</fields> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>528</xloc> |
|
<yloc>288</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step> |
|
<name>save src_api_xml</name> |
|
<type>Update</type> |
|
<description/> |
|
<distribute>Y</distribute> |
|
<custom_distribution/> |
|
<copies>1</copies> |
|
<partitioning> |
|
<method>none</method> |
|
<schema_name/> |
|
</partitioning> |
|
<connection>eduetl</connection> |
|
<skip_lookup>N</skip_lookup> |
|
<commit>100</commit> |
|
<use_batch>N</use_batch> |
|
<error_ignored>N</error_ignored> |
|
<ignore_flag_field/> |
|
<lookup> |
|
<schema/> |
|
<table>kb_document_source</table> |
|
<key> |
|
<name>document_id</name> |
|
<field>document_id</field> |
|
<condition>=</condition> |
|
<name2/> |
|
</key> |
|
<value> |
|
<name>src_api_xml</name> |
|
<rename>src_api_xml</rename> |
|
</value> |
|
</lookup> |
|
<cluster_schema/> |
|
<remotesteps> |
|
<input/> |
|
<output/> |
|
</remotesteps> |
|
<GUI> |
|
<xloc>160</xloc> |
|
<yloc>176</yloc> |
|
<draw>Y</draw> |
|
</GUI> |
|
</step> |
|
<step_error_handling/> |
|
<slave-step-copy-partition-distribution/> |
|
<slave_transformation>N</slave_transformation> |
|
</transformation>
|
|
|