Hi everyone, i want find a solution. I working on translate app too like smartcart.com extract text and translate to other language and create new doc file result and keep format style text. I have issue, i unzip docx and receipt a document.xml file contain:
<w:p w14:paraId="69C7E71C" w14:textId="423B6516" w:rsidRDefault="00426B63">
<w:pPr>
<w:ind w:left="720"/>
<w:jc w:val="both"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman" w:cs="Times New Roman"/>
<w:sz w:val="28"/>
</w:rPr>
<w:tab/>
<w:t>•</w:t>
</w:r>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman" w:cs="Times New Roman"/>
<w:sz w:val="28"/>
</w:rPr>
<w:tab/>
</w:r>
<w:hyperlink r:id="rId5" w:history="1">
<w:r>
<w:rPr>
<w:rStyle w:val="Hyperlink"/>
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman" w:cs="Times New Roman"/>
<w:sz w:val="28"/>
</w:rPr>
<w:t>Vietnam's National Assembly has passed a cybersecurity law requiring companies such as Alphabet Inc.'s Google and Facebook Inc. to store all data of Vietnam-based users in the country and open local offices. The measure</w:t>
</w:r>
</w:hyperlink>
<w:r>
<w:rPr>
<w:rFonts w:ascii="Times New Roman" w:hAnsi="Times New Roman" w:cs="Times New Roman"/>
<w:sz w:val="28"/>
</w:rPr>
<w:t xml:space="preserve"> has drawn rare dissent from some lawmakers and government leaders as well as local tech groups, who sent a petition to the legislature that warned it would hurt the economy. </w:t>
</w:r>
</w:p>
i want extract text by newline break or ". " to 3 segment:
1 => •
2 => Vietnam's National Assembly has passed a cybersecurity law requiring companies such as Alphabet Inc.'s Google and Facebook Inc. to store all data of Vietnam-based users in the country and open local offices.
3 => The measure has drawn rare dissent from some lawmakers and government leaders as well as local tech groups, who sent a petition to the legislature that warned it would hurt the economy.
I see result json api of smartcart for segment 3 same:
{
"id": 4079,
"prevSegmentId": 4078,
"nextSegmentId": 4080,
"number": 7,
"order": 7,
"documentId": "b2fbca4ecc49c3883d7afa77",
"subtitleId": [],
"topicId": "23d961eb-4423-433e-82cc-5f23b8f9abff",
"commentState": 0,
"localizationContext": [],
"wordsCount": 32,
"isSplitting": false,
"paragraphId": 6,
"targetTextLengthLimit": null,
"canModifyTextLengthLimit": true,
"segmentRepetitionState": 0,
"source": {
"segmentId": 4079,
"text": "The measure has drawn rare dissent from some lawmakers and government leaders as well as local tech groups, who sent a petition to the legislature that warned it would hurt the economy.",
"languageId": 6153,
"tags": [
{
"tagNumber": 1,
"tagType": 0,
"position": 0,
"isSubtitleTag": false,
"isVirtual": true,
"formatting": null,
"isRequired": true,
"visualization": null
},
{
"tagNumber": 1,
"tagType": 1,
"position": 11,
"isSubtitleTag": false,
"isVirtual": false,
"formatting": null,
"isRequired": true,
"visualization": null
}
],
"placeholders": []
},
"targets": [
{
"segmentId": 4079,
"languageId": 42,
"documentId": "b2fbca4ecc49c3883d7afa77",
"text": "",
"lockType": 0,
"tags": [],
"placeholders": [],
"revisions": [],
"stageNumber": 1,
"isConfirmed": false,
"errors": [],
"lockUserId": null,
"workflowRollbackStep": null,
"machineTranslations": [
{
"priority": 30,
"id": "61c919bbf07d744bf8632fba",
"segmentId": 4079,
"languageId": 42,
"resourceId": "engine:Yandex",
"resourceType": 2,
"resourceName": "Yandex Free",
"sourceText": "The measure has drawn rare dissent from some lawmakers and government leaders as well as local tech groups, who sent a petition to the legislature that warned it would hurt the economy.",
"targetText": "Các biện pháp đã rút ra hiếm bất đồng từ một nhà lập pháp và các lãnh đạo chính phủ cũng như công nghệ địa phương nhóm những người đã gửi đơn cơ quan lập pháp mà cảnh báo nó sẽ làm tổn thương các nền kinh tế.",
"targetTextAfterAutoReplacement": null,
"matchPercentage": 0,
"createdDate": "2021-12-27T01:41:15.904Z",
"lastUpdateDate": "2021-12-27T01:41:15.904Z",
"tagsTransferData": [
{
"position": 13,
"order": 0
}
],
"tagsTransferDataAfterAutoReplacement": null,
"dialectSourceLanguageId": null,
"dialectTargetLanguageId": null
}
],
"textForNonAlphabeticAutoFill": null,
"termEntries": [],
"insertResourceType": 0,
"matchPercentage": 0
}
],
"images": [],
"previewUrl": null,
"defaultFormatting": {
"hyperlink": null,
"bold": false,
"italic": false,
"strike": false,
"superscript": false,
"subscript": false,
"underline": false,
"fontSize": 14,
"fontFamily": "Times New Roman",
"backgroundColor": "0, 0, 0, 0",
"foregroundColor": "0, 0, 0, 0"
},
"lockType": 0
}