索引多个实体在一个核心Apache SOLR


Indexing Multiple Entities in one Core Apache SOLR

我在索引一个集合中的多个实体时遇到困难。当我试图索引只有实体定义最后得到索引。

下面是配置:

data-config.xml

<?xml version="1.0" encoding="UTF-8"?>
<dataConfig> <propertyWriter dateFormat="yyyy-MM-dd HH:mm:ss" type="SimplePropertiesWriter" filename="demo.properties" />
<script><![CDATA[
id = 1;
function GenerateId(row) {
row.put('doc_id', (id ++).toFixed());
return row;
}
]]>
</script>
<dataSource type="JdbcDataSource" zeroDateTimeBehavior="convertToNull" name="ds-1" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1/demo"   batchSize="-1" user="root" autoCommit="true" password="xxxxxxx">
</dataSource >
<document name="demo_doc">
   <entity name="table1" pk="table1_id" dataSource="ds-1"  transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
        logTemplate="The demo is ${table1.table1_id}" logLevel="info"
        query="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') from table1 Where table1_flag=1 AND '${dih.request.clean}' != 'false' OR table1_date_updated &gt; '${dih.table1.last_index_time}'"
        >
        <field column="doc_id" name="singlekey" />
        <field column="doc_type" template="TABLE1" name="doc_type" />
        <field column="table1_desc" name="solr_table1_desc_en" stripHTML="true"/>
        <field column="table1_date_updated" name="solr_table1_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en" />
</entity>
<entity name="table2"
    pk="table2_id" 
    dataSource="ds-1"
    transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
    logTemplate="The table2 is ${table2.table2_id}" logLevel="info"
    query="select table2_id,table2_name,table2_flag,DATE_FORMAT(table2_date_updated,'%Y-%m-%dT%TZ') from table2 Where table2_flag=1 AND '${dih.request.clean}' != 'false' OR table2_date_updated &gt; '${dih.table2.last_index_time}'"
    >
    <field column="doc_id" name="singlekey" />
    <field column="doc_type" template="TABLE2" name="doc_type" />
    <field column="table2_name" name="solr_table2_name" />
        <entity name="table3" 
                pk="table3_id,table3_frid" 
                transformer="HTMLStripTransformer,RegexTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
                logTemplate="The table3 is ${table3.table3_id}" logLevel="info"
                query="select table3_id,table3_frid,table3_name,table3_desc,table3_subdesc,table3_keyword,table3_flag,DATE_FORMAT(table3_date_updated,'%Y-%m-%dT%TZ') from table3 Where  table3_frid=$table1.table1_id} AND table3_flag=1"
                >
            <field column="table3_name" name="solr_table3_name"/>
            <field column="table3_desc" name="solr_table3_desc" stripHTML="true"/>
            <field column="table3_subdesc" name="solr_table3_subdesc" stripHTML="true"/>
            <field column="table3_keyword" name="solr_table3_keyword"/>
            <field column="table3_date_updated" name="solr_table3_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en"/>
       </entity>
    </entity>
 </document>

schema.xml

<field name="singlekey" type="string" required="true" multiValued="false" /> 
<field name="doc_type" type="string"  multiValued="false" />
<uniqueKey>singlekey</uniqueKey>
<field name="table1_desc_en"  type="text_auto" indexed="true" stored="true"  multiValued="false"  />
<field name="table1_date_updated_dt" type="date" indexed="true" stored="true"  multiValued="false"  />

<field name="table2_name" type="text_ws" indexed="true" stored="true" />
<field name="table3_name" type="text_ws" indexed="true" stored="true" multiValued="true" />
<field name="table3_desc"   type="text_en_splitting" indexed="true" stored="true" multiValued="true" />
<field name="table3_subdesc" type="text_en" indexed="true" stored="true" multiValued="true" />
<field name="table3_keyword" type="text_en" indexed="true" stored="true" multiValued="true" />
<field name="table3_date_updated_dt" type="date" indexed="true" multiValued="false" stored="true" />

我不能索引表1,而表2和表3(这是1到许多关系表)正在被索引,但表1没有被索引.

通过在表1中添加以下行来修复此问题:

<entity name="table1" 
pk="table1_id" 
dataSource="ds-1"   
transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateForma    tTransformer,script:GenerateId,LogTransformer"            
logTemplate="The demo is ${table1.table1_id}" 
logLevel="info"
query="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') from table1 Where table1_flag=1 AND '${dih.request.clean}' != 'false' OR table1_date_updated &gt; '${dih.table1.last_index_time}'" 
deltaImportQuery="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') as table1_date_updated from table1 Where table1_id='${dih.delta.id}'"
deltaQuery="select  table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') as table1_date_updated from table1 Where table1_date_updated &gt; '${dih.table1 .last_index_time}'"
preImportDeleteQuery="select table1_id from table1  where table1_date_updated &gt; '${dih.table1 .last_index_time}'" 
>               
<field column="doc_id" name="singlekey" /> 
<field column="doc_type" template="TABLE1" name="doc_type" /> 
<field column="table1_desc" name="solr_table1_desc_en" stripHTML="true"/> 
<field column="table1_date_updated" name="solr_table1_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en" /> 
</entity>