Search for Issues in Document Processing
Follow the steps below to identify document processor issues.
Step 1 - to Add log Information
To display detailed chunks in the log.log file, you must first
add a Debug Processor element to your document processors
list.
- From the Administration Console, go to Index > Data processing > Pipeline name.
- In the Document Processors tab, click Other in the Processor types menu.
- Drag the Debug Processor element to the end of the processors list.
Example: <Debug> tags are displayed:
[2013/09/23-09:54:11.584] [info] [AnalyzerThread-bg0-default_model-1] [analysis.debug] uri: C:\Users\E7G\Downloads\traffic.csv8 source: RATP did: 57 slice: 0: DebugProcessor: dumping C:\Users\E7G\Downloads\traffic.csv8DebugProcessor: dumping C:\Users\E7G\Downloads\traffic.csv8: <DebugChunk type="TextChunk" ctx="ville" deleted="false" part="null" value="Paris" score=0 language="xx"> </DebugChunk> <DebugChunk type="TextChunk" ctx="arrondissement" deleted="false" part="null" value="1" score=0 language="xx"> </DebugChunk> <DebugChunk type="TextChunk" ctx="source" deleted="false" part="null" value="RATP" score=0 language="xx"> </DebugChunk>
Step 2 - to Submit Document Using cvdebug
cvconsole cvdebug> analysis analyze path=<PATH_TO_DOCUMENT>
For example, submit a .CSV file.
cvconsole cvdebug> analysis analyze path=/tests/myfile.csv
The output is a mapping of contexts and chunk values.
<TestAnalysisPipelineOutput xmlns="com.exalead.indexing.analysis.v10" documentProcessorsTimeUS=1000
semanticAndMappingTimeUS=0>
<##default>
<DocumentProcessorsOutput xmlns="com.exalead.indexing.analysis.v10">
<Document>
<Document xmlns="com.exalead.ndoc.v10">
<element>
[
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="source"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="sourceTest"/>,
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="uri"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="C:\Users\E7G\Downloads\traffic2.csv"/>,
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="extracted_mime"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="text/plain"/>,
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="mime"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="text#plain"/>,
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="docsrc"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="txt"/>,
<Context xmlns="com.exalead.ndoc.v10" language="xx" name="text"/>,
<ScoreContext xmlns="com.exalead.ndoc.v10" value=0/>,
<Chunk xmlns="com.exalead.ndoc.v10" value="Rang,Reseau,Station,Trafic,Correspondances,
c1,c2,c3,c4,Ville,Arrondissement1,MΘtro,GARE DU NORD,"48,146,629",4,5,0,0,0,Paris,102,
MΘtro,SAINT-LAZARE,"46,790,941",3,9,12,13,14,Paris,8"/>,
</element>
</Document>
</Document>
</DocumentProcessorsOutput>
</##default>
<##default>
<UnmappedContexts xmlns="com.exalead.indexing.analysis.v10">
<StringValue>
[
<StringValue xmlns="exa.bee" value="docsrc"/>,
<StringValue xmlns="exa.bee" value="extracted_mime"/>,
<StringValue xmlns="exa.bee" value="source"/>
]
</StringValue>
</UnmappedContexts>
</##default>
</TestAnalysisPipelineOutput>
