Replies: 4 comments
-
I know you mentioned in the pangea dev chat, not saving text in the original sent rep if it matches the body of the message itself. I think that should work! We'd still have to keep the rest of that representation, though, cause it includes important info like the langCode. The choreoRecord contains a ton of info. That would be the place to look for duplication, but nothing in there immediately stands out to me as something that can be removed. There's also the tokens part of it, but I don't think any of that data is duplicated. |
Beta Was this translation helpful? Give feedback.
-
Here's one that went through IGC: {
"body": "Here's a message with some grammar errors.",
"choreo_record": {
"mtchs": [],
"stps": [
{
"txt": "Heres a messag ewith some grammar error.",
"mtch": {
"match": {
"message": null,
"short_message": "Contractions",
"choices": [
{
"value": "Here's",
"type": "bestCorrection",
"selected": true,
"feedback": null,
"timestamp": "2024-07-31T15:10:30.877"
},
{
"value": "Hers",
"type": "distractor",
"selected": false,
"feedback": null,
"timestamp": null
}
],
"offset": 0,
"length": 5,
"context": null,
"full_text": "Heres a messag ewith some grammar error.",
"type": {
"type_name": "correction"
},
"rule": {
"id": "Contractions"
}
},
"status": "accepted"
},
"stp": null
},
{
"txt": "Here's a messag ewith some grammar error.",
"mtch": {
"match": {
"message": null,
"short_message": "Word Order",
"choices": [
{
"value": "message with",
"type": "bestCorrection",
"selected": true,
"feedback": null,
"timestamp": "2024-07-31T15:10:33.370"
},
{
"value": "messag ewithout",
"type": "distractor",
"selected": false,
"feedback": null,
"timestamp": null
}
],
"offset": 9,
"length": 12,
"context": null,
"full_text": "Here's a messag ewith some grammar error.",
"type": {
"type_name": "correction"
},
"rule": {
"id": "Word Order"
}
},
"status": "accepted"
},
"stp": null
},
{
"txt": "Here's a message with some grammar error.",
"mtch": {
"match": {
"message": null,
"short_message": "Subject-Verb Agreement",
"choices": [
{
"value": "errors.",
"type": "bestCorrection",
"selected": true,
"feedback": null,
"timestamp": "2024-07-31T15:10:35.735"
},
{
"value": "mistake",
"type": "distractor",
"selected": false,
"feedback": null,
"timestamp": null
},
{
"value": "mistakes",
"type": "distractor",
"selected": false,
"feedback": null,
"timestamp": null
}
],
"offset": 35,
"length": 6,
"context": null,
"full_text": "Here's a message with some grammar error.",
"type": {
"type_name": "correction"
},
"rule": {
"id": "Subject-Verb Agreement"
}
},
"status": "accepted"
},
"stp": null
}
]
},
"msgtype": "m.text",
"original_sent": {
"lang": "en",
"snt": true,
"txt": "Here's a message with some grammar errors.",
"wrttn": true
},
"original_written": null,
"tokens_sent": {
"tkns": [
{
"text": {
"offset": 0,
"content": "Heres",
"length": 5
},
"lemma": [
{
"text": "here",
"save_vocab": true,
"form": "Heres"
}
],
"pos": "NOUN",
"morph": {
"Number": "Plur"
}
},
{
"text": {
"offset": 6,
"content": "a",
"length": 1
},
"lemma": [
{
"text": "a",
"save_vocab": true,
"form": "a"
}
],
"pos": "DET",
"morph": {
"Definite": "Ind",
"PronType": "Art"
}
},
{
"text": {
"offset": 8,
"content": "messag",
"length": 6
},
"lemma": [
{
"text": "messag",
"save_vocab": true,
"form": "messag"
}
],
"pos": "NOUN",
"morph": {
"Number": "Sing"
}
},
{
"text": {
"offset": 15,
"content": "ewith",
"length": 5
},
"lemma": [
{
"text": "ewith",
"save_vocab": true,
"form": "ewith"
}
],
"pos": "VERB",
"morph": {
"VerbForm": "Inf"
}
},
{
"text": {
"offset": 21,
"content": "some",
"length": 4
},
"lemma": [
{
"text": "some",
"save_vocab": true,
"form": "some"
}
],
"pos": "DET",
"morph": {}
},
{
"text": {
"offset": 26,
"content": "grammar",
"length": 7
},
"lemma": [
{
"text": "grammar",
"save_vocab": true,
"form": "grammar"
}
],
"pos": "NOUN",
"morph": {
"Number": "Sing"
}
},
{
"text": {
"offset": 34,
"content": "error",
"length": 5
},
"lemma": [
{
"text": "error",
"save_vocab": true,
"form": "error"
}
],
"pos": "NOUN",
"morph": {
"Number": "Sing"
}
},
{
"text": {
"offset": 39,
"content": ".",
"length": 1
},
"lemma": [
{
"text": ".",
"save_vocab": false,
"form": "."
}
],
"pos": "PUNCT",
"morph": {
"PunctType": "Peri"
}
}
]
},
"tokens_written": null
} |
Beta Was this translation helpful? Give feedback.
-
Here's one that went through IT: {
"body": "Here is a message that needs translation.",
"choreo_record": {
"mtchs": [],
"stps": [
{
"txt": "Aqui esta un mesaje que necesita tranduccion.",
"mtch": {
"match": {
"message": null,
"short_message": null,
"choices": [
{
"value": "",
"type": "bestCorrection",
"selected": false,
"feedback": null,
"timestamp": null
}
],
"offset": 0,
"length": 45,
"context": null,
"full_text": "Aqui esta un mesaje que necesita tranduccion.",
"type": {
"type_name": "itStart"
},
"rule": {
"id": "interactive_translation"
}
},
"status": "accepted"
},
"stp": null
},
{
"txt": "",
"mtch": null,
"stp": {
"continuances": [
{
"probability": 1,
"level": 1,
"text": "Here is ",
"clkd": true,
"tokens": []
},
{
"probability": 0.5,
"level": 3,
"text": "This is ",
"clkd": false,
"tokens": []
},
{
"probability": 0.5,
"level": 3,
"text": "There is ",
"clkd": false,
"tokens": []
}
],
"chosen": 0,
"custom_input": null
}
},
{
"txt": "Here is ",
"mtch": null,
"stp": {
"continuances": [
{
"probability": 0.5,
"level": 3,
"text": "the message ",
"clkd": false,
"tokens": []
},
{
"probability": 0.5,
"level": 3,
"text": "an message ",
"clkd": false,
"tokens": []
},
{
"probability": 1,
"level": 1,
"text": "a message ",
"clkd": true,
"tokens": []
}
],
"chosen": 2,
"custom_input": null
}
},
{
"txt": "Here is a message ",
"mtch": null,
"stp": {
"continuances": [
{
"probability": 0.5,
"level": 3,
"text": "what needs ",
"clkd": false,
"tokens": []
},
{
"probability": 0.5,
"level": 3,
"text": "who needs ",
"clkd": false,
"tokens": []
},
{
"probability": 1,
"level": 1,
"text": "that needs ",
"clkd": true,
"tokens": []
}
],
"chosen": 2,
"custom_input": null
}
},
{
"txt": "Here is a message that needs ",
"mtch": null,
"stp": {
"continuances": [
{
"probability": 0.5,
"level": 3,
"text": "translating ",
"clkd": false,
"tokens": []
},
{
"probability": 1,
"level": 1,
"text": "translation. ",
"clkd": true,
"tokens": []
},
{
"probability": 0.5,
"level": 3,
"text": "translate ",
"clkd": false,
"tokens": []
}
],
"chosen": 1,
"custom_input": null
}
}
]
},
"format": "org.matrix.custom.html",
"formatted_body": "Here is a message that needs translation.",
"msgtype": "m.text",
"original_sent": {
"lang": "unk",
"snt": true,
"txt": "Here is a message that needs translation."
},
"original_written": null,
"tokens_sent": {
"tkns": [
{
"text": {
"offset": 0,
"content": "Here",
"length": 4
},
"lemma": [
{
"text": "here",
"save_vocab": true,
"form": "Here"
}
],
"pos": "ADV",
"morph": {
"PronType": "Dem"
}
},
{
"text": {
"offset": 5,
"content": "is",
"length": 2
},
"lemma": [
{
"text": "be",
"save_vocab": true,
"form": "is"
}
],
"pos": "AUX",
"morph": {
"Mood": "Ind",
"Number": "Sing",
"Person": "3",
"Tense": "Pres",
"VerbForm": "Fin"
}
},
{
"text": {
"offset": 8,
"content": "a",
"length": 1
},
"lemma": [
{
"text": "a",
"save_vocab": true,
"form": "a"
}
],
"pos": "DET",
"morph": {
"Definite": "Ind",
"PronType": "Art"
}
},
{
"text": {
"offset": 10,
"content": "message",
"length": 7
},
"lemma": [
{
"text": "message",
"save_vocab": true,
"form": "message"
}
],
"pos": "NOUN",
"morph": {
"Number": "Sing"
}
},
{
"text": {
"offset": 18,
"content": "that",
"length": 4
},
"lemma": [
{
"text": "that",
"save_vocab": true,
"form": "that"
}
],
"pos": "PRON",
"morph": {
"PronType": "Rel"
}
},
{
"text": {
"offset": 23,
"content": "needs",
"length": 5
},
"lemma": [
{
"text": "need",
"save_vocab": true,
"form": "needs"
}
],
"pos": "VERB",
"morph": {
"Number": "Sing",
"Person": "3",
"Tense": "Pres",
"VerbForm": "Fin"
}
},
{
"text": {
"offset": 29,
"content": "translation",
"length": 11
},
"lemma": [
{
"text": "translation",
"save_vocab": true,
"form": "translation"
}
],
"pos": "NOUN",
"morph": {
"Number": "Sing"
}
},
{
"text": {
"offset": 40,
"content": ".",
"length": 1
},
"lemma": [
{
"text": ".",
"save_vocab": false,
"form": "."
}
],
"pos": "PUNCT",
"morph": {
"PunctType": "Peri"
}
}
]
},
"tokens_written": null
} |
Beta Was this translation helpful? Give feedback.
-
@Kelrap This has been a good investigation. Let's hold off on frying this fish until we've eaten some others. |
Beta Was this translation helpful? Give feedback.
-
Message content may appear in multiple different places. It should only need to appear in one place.
Questions that may affect my implimentation:
If a message is edited, will I want to keep the old version in a parameter somewhere?
A message's representation is needed if the original version isn't in its target language. Should the representation only be fetched and saved if immersion mode is on? Or as long as the text doesn't need to be translated, because it's already in the target language?
I'll look at representations and span card matches. Are there any more situations I should check for possible redundancies?
@wcjord Does this look good?
Beta Was this translation helpful? Give feedback.
All reactions