diff options
author | Vort <vvort@yandex.ru> | 2014-01-21 09:27:46 +0200 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2014-02-04 15:13:15 +0000 |
commit | 9db3b5585c5fa7fff633672fd32510c4066d035a (patch) | |
tree | 83be4d5dee638d00d1f1c69cbe051ac7f10e2b6a /sdext | |
parent | 5cfda4f4de8da82485e27b1f0f0d94211d3b4d95 (diff) |
fdo#35143 PDF import: Reimplementation of whitespace detection function
Change-Id: I5b230aaebf72b70bbb7e206414a5ac0e01f01f86
Reviewed-on: https://gerrit.libreoffice.org/7564
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
Diffstat (limited to 'sdext')
-rw-r--r-- | sdext/source/pdfimport/tree/pdfiprocessor.cxx | 245 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/pdfiprocessor.hxx | 29 |
2 files changed, 42 insertions, 232 deletions
diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx index 83be259d3b69..72cb7532a004 100644 --- a/sdext/source/pdfimport/tree/pdfiprocessor.cxx +++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx @@ -75,7 +75,6 @@ namespace pdfi m_eTextDirection( LrTb ), m_nPages(0), m_nNextZOrder( 1 ), - m_bIsWhiteSpaceInLine( false ), m_xStatusIndicator( xStat ), m_bHaveTextOnDocLevel(false) { @@ -210,221 +209,66 @@ sal_Int32 PDFIProcessor::getFontId( const FontAttributes& rAttr ) const // line diagnose block - start void PDFIProcessor::processGlyphLine() { - if( m_GlyphsList.empty() ) + if (m_GlyphsList.empty()) return; - double fPreAvarageSpaceValue= 0.0; - double fAvarageDiffCharSpaceValue= 0.0; - double fMinPreSpaceValue= 0.0; - double fMaxPreSpaceValue= 0.0; - double fNullSpaceBreakerAvaregeSpaceValue = 0.0; + double spaceDetectBoundary = 0.0; - unsigned int nSpaceCount( 0 ); - unsigned int nDiffSpaceCount( 0 ); - unsigned int nNullSpaceBreakerCount=0; - bool preSpaceNull(true); - - for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value + // Try to find space glyph and it's width + for (size_t i = 0; i < m_GlyphsList.size(); i++) { - if( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 ) - { - if( fMinPreSpaceValue>m_GlyphsList[i].getPrevGlyphsSpace() ) - fMinPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace(); + OUString& glyph = m_GlyphsList[i].getGlyph(); - if( fMaxPreSpaceValue<m_GlyphsList[i].getPrevGlyphsSpace() ) - fMaxPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace(); + sal_Unicode ch = '\0'; + if (!glyph.isEmpty()) + ch = glyph[0]; - fPreAvarageSpaceValue+= m_GlyphsList[i].getPrevGlyphsSpace(); - nSpaceCount++; - } - } - - if( nSpaceCount!=0 ) - fPreAvarageSpaceValue= fPreAvarageSpaceValue/( nSpaceCount ); - - for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value - { - if ( m_GlyphsList[i].getPrevGlyphsSpace()==0.0 ) - { - if ( - ( m_GlyphsList[i+1].getPrevGlyphsSpace()>0.0)&& - ( fPreAvarageSpaceValue>m_GlyphsList[i+1].getPrevGlyphsSpace()) - ) - { - fNullSpaceBreakerAvaregeSpaceValue+=m_GlyphsList[i+1].getPrevGlyphsSpace(); - nNullSpaceBreakerCount++; - } + if ((ch == 0x20) || (ch == 0xa0)) + { + double spaceWidth = + m_GlyphsList[i].getRect().X2 - + m_GlyphsList[i].getRect().X1; + spaceDetectBoundary = spaceWidth * 0.5; + break; } } - if( ( fNullSpaceBreakerAvaregeSpaceValue!= 0.0 )&& - ( fNullSpaceBreakerAvaregeSpaceValue < fPreAvarageSpaceValue ) - ) + // If space glyph is not found, use average glyph width instead + if (spaceDetectBoundary == 0.0) { - fPreAvarageSpaceValue = fNullSpaceBreakerAvaregeSpaceValue; - } - - for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 cose the first Glypth dont have prevGlyphSpace value - { - if ( ( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 ) - ) + double avgGlyphWidth = 0.0; + for (size_t i = 0; i < m_GlyphsList.size(); i++) { - if ( - ( m_GlyphsList[i].getPrevGlyphsSpace() <= fPreAvarageSpaceValue )&& - ( m_GlyphsList[i+1].getPrevGlyphsSpace()<= fPreAvarageSpaceValue ) - ) - { - double temp= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace(); - - if(temp!=0.0) - { - if( temp< 0.0) - temp= temp* -1.0; - - fAvarageDiffCharSpaceValue+=temp; - nDiffSpaceCount++; - } - } + avgGlyphWidth += + m_GlyphsList[i].getRect().X2 - + m_GlyphsList[i].getRect().X1; } - - } - - if ( - ( nNullSpaceBreakerCount>0 ) - ) - { - fNullSpaceBreakerAvaregeSpaceValue=fNullSpaceBreakerAvaregeSpaceValue/nNullSpaceBreakerCount; - } - - if ( - ( nDiffSpaceCount>0 )&&(fAvarageDiffCharSpaceValue>0) - ) - { - fAvarageDiffCharSpaceValue= fAvarageDiffCharSpaceValue/ nDiffSpaceCount; + avgGlyphWidth /= m_GlyphsList.size(); + spaceDetectBoundary = avgGlyphWidth * 0.2; } - ParagraphElement* pPara= NULL ; - FrameElement* pFrame= NULL ; + FrameElement* frame = m_pElFactory->createFrameElement(m_GlyphsList[0].getCurElement(), + getGCId(getTransformGlyphContext(m_GlyphsList[0]))); + frame->ZOrder = m_nNextZOrder++; + ParagraphElement* para = m_pElFactory->createParagraphElement(frame); - if(!m_GlyphsList.empty()) + for (size_t i = 0; i < m_GlyphsList.size(); i++) { - pFrame = m_pElFactory->createFrameElement( m_GlyphsList[0].getCurElement(), getGCId( getTransformGlyphContext( m_GlyphsList[0])) ); - pFrame->ZOrder = m_nNextZOrder++; - pPara = m_pElFactory->createParagraphElement( pFrame ); - - processGlyph( 0, - m_GlyphsList[0], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); + double spaceSize = 0.0; + if (i != 0) + spaceSize = m_GlyphsList[i].getRect().X1 - m_GlyphsList[i - 1].getRect().X2; + bool prependSpace = spaceSize > spaceDetectBoundary; + drawCharGlyphs(m_GlyphsList[i].getGlyph(), + m_GlyphsList[i].getRect(), + m_GlyphsList[i].getGC(), + para, + frame, + prependSpace); } - - preSpaceNull=false; - - for ( unsigned int i=1; i<m_GlyphsList.size()-1; i++ ) - { - double fPrevDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i-1].getPrevGlyphsSpace(); - double fPostDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace(); - - - if( - preSpaceNull && (m_GlyphsList[i].getPrevGlyphsSpace()!= 0.0) - ) - { - preSpaceNull=false; - if( fNullSpaceBreakerAvaregeSpaceValue > m_GlyphsList[i].getPrevGlyphsSpace() ) - { - processGlyph( 0, - m_GlyphsList[i], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); - - } - else - { - processGlyph( 1, - m_GlyphsList[i], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); - - } - - } - else - { - if ( - ( ( m_GlyphsList[i].getPrevGlyphsSpace()<= fPreAvarageSpaceValue )&& - ( fPrevDiffCharSpace<=fAvarageDiffCharSpaceValue )&& - ( fPostDiffCharSpace<=fAvarageDiffCharSpaceValue ) - ) || - ( m_GlyphsList[i].getPrevGlyphsSpace() == 0.0 ) - ) - { - preSpaceNull=true; - - processGlyph( 0, - m_GlyphsList[i], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); - - } - else - { - processGlyph( 1, - m_GlyphsList[i], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); - - } - - } - - } - - if(m_GlyphsList.size()>1) - processGlyph( 0, - m_GlyphsList[m_GlyphsList.size()-1], - pPara, - pFrame, - m_bIsWhiteSpaceInLine ); - m_GlyphsList.clear(); } -void PDFIProcessor::processGlyph( double fPreAvarageSpaceValue, - CharGlyph& aGlyph, - ParagraphElement* pPara, - FrameElement* pFrame, - bool bIsWhiteSpaceInLine - ) -{ - if( !bIsWhiteSpaceInLine ) - { - bool flag=( 0 < fPreAvarageSpaceValue ); - - drawCharGlyphs( aGlyph.getGlyph(), - aGlyph.getRect(), - aGlyph.getGC(), - pPara, - pFrame, - flag); - } - else - { - drawCharGlyphs( aGlyph.getGlyph(), - aGlyph.getRect(), - aGlyph.getGC(), - pPara, - pFrame, - false ); - } -} - void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs, const geometry::RealRectangle2D& rRect, const geometry::Matrix2D& rFontMatrix ) @@ -440,9 +284,7 @@ void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs, processGlyphLine(); } - CharGlyph aGlyph(fXPrevTextPosition, fYPrevTextPosition, fPrevTextHeight, fPrevTextWidth, - m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs); - + CharGlyph aGlyph(m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs); getGCId(getCurrentContext()); @@ -452,13 +294,6 @@ void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs, fXPrevTextPosition = rRect.X2; fPrevTextHeight = rRect.Y2-rRect.Y1; fPrevTextWidth = rRect.X2-rRect.X1; - - if( !m_bIsWhiteSpaceInLine ) - { - static OUString tempWhiteSpaceStr( 0x20 ); - static OUString tempWhiteSpaceNonBreakingStr( 0xa0 ); - m_bIsWhiteSpaceInLine=(rGlyphs.equals( tempWhiteSpaceStr ) || rGlyphs.equals( tempWhiteSpaceNonBreakingStr )); - } } GraphicsContext& PDFIProcessor::getTransformGlyphContext( CharGlyph& rGlyph ) diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.hxx b/sdext/source/pdfimport/tree/pdfiprocessor.hxx index 97c58aa20b31..23861fae1577 100644 --- a/sdext/source/pdfimport/tree/pdfiprocessor.hxx +++ b/sdext/source/pdfimport/tree/pdfiprocessor.hxx @@ -103,11 +103,6 @@ namespace pdfi private: void processGlyphLine(); - void processGlyph( double fPreAvarageSpaceValue, - CharGlyph& rGlyph, - ParagraphElement* pPara, - FrameElement* pFrame, - bool bIsWhiteSpaceInLine ); void drawGlyphLine( const OUString& rGlyphs, const ::com::sun::star::geometry::RealRectangle2D& rRect, @@ -226,7 +221,6 @@ namespace pdfi sal_Int32 m_nPages; sal_Int32 m_nNextZOrder; - bool m_bIsWhiteSpaceInLine; com::sun::star::uno::Reference< com::sun::star::task::XStatusIndicator > m_xStatusIndicator; @@ -236,11 +230,9 @@ namespace pdfi class CharGlyph { public: - CharGlyph(double fXPrevGlyphPosition, double fYPrevGlyphPosition, double fPrevGlyphHeight, double fPrevGlyphWidth, - Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix, + CharGlyph(Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix, const com::sun::star::geometry::RealRectangle2D& rRect, const OUString& rGlyphs ) - : m_fXPrevGlyphPosition(fXPrevGlyphPosition), m_fYPrevGlyphPosition(fYPrevGlyphPosition), m_fPrevGlyphHeight(fPrevGlyphHeight), - m_fPrevGlyphWidth(fPrevGlyphWidth), m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext), + : m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext), m_rFontMatrix(rFontMatrix), m_rRect(rRect), m_rGlyphs(rGlyphs) {}; virtual ~CharGlyph(){}; @@ -250,24 +242,7 @@ namespace pdfi GraphicsContext& getGC(){ return m_rCurrentContext; } Element* getCurElement(){ return m_pCurElement; } - double getYPrevGlyphPosition() const { return m_fYPrevGlyphPosition; } - double getXPrevGlyphPosition() const { return m_fXPrevGlyphPosition; } - double getPrevGlyphHeight() const { return m_fPrevGlyphHeight; } - double getPrevGlyphWidth () const { return m_fPrevGlyphWidth; } - double getPrevGlyphsSpace() const - { - if( (m_rRect.X1-m_fXPrevGlyphPosition)<0 ) - return 0; - else - return m_rRect.X1-m_fXPrevGlyphPosition; - } - private: - - double m_fXPrevGlyphPosition ; - double m_fYPrevGlyphPosition ; - double m_fPrevGlyphHeight ; - double m_fPrevGlyphWidth ; Element* m_pCurElement ; GraphicsContext m_rCurrentContext ; com::sun::star::geometry::Matrix2D m_rFontMatrix ; |